diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index e8c972f89357d2dc47bbe57000e503f46d964f37..1b5042f134a8679bf0d6f0e707a839cd76bfce38 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -112,6 +112,12 @@ OPTIONS --objdump=:: Path to objdump binary. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --skip-missing:: Skip symbols that cannot be annotated. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 0921a3c673815c6a031278be3107f80011eb5c46..bad16512c48d7b38846fff58a1749c9ec54a9f08 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -61,6 +61,9 @@ SUBSYSTEM 'epoll':: Eventpoll (epoll) stressing benchmarks. +'internals':: + Benchmark internal perf functionality. + 'all':: All benchmark subsystems. @@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls. *ctl*:: Suite for evaluating multiple epoll_ctl calls. +SUITES FOR 'internals' +~~~~~~~~~~~~~~~~~~~~~~ +*synthesize*:: +Suite for evaluating perf's event synthesis performance. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index c87180764829c069c27c7c4cfaec4f43248ac29d..417bf17e265c2030d45021ae966fe2a8d0663f65 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -17,7 +17,7 @@ Data file related processing. COMMANDS -------- convert:: - Converts perf data file into another format (only CTF [1] format is support by now). + Converts perf data file into another format. It's possible to set data-convert debug variable to get debug messages from conversion, like: perf --debug data-convert data convert ... @@ -27,6 +27,12 @@ OPTIONS for 'convert' --to-ctf:: Triggers the CTF conversion, specify the path of CTF data directory. +--to-json:: + Triggers JSON conversion. Specify the JSON filename to output. + +--tod:: + Convert time to wall clock time. + -i:: Specify input perf data file path. diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index a64d6588470e605845e1eb4589e8e9a7a8a6f049..c34eb5b8c762b677cc1386e71a6179d4c6c0ab33 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -24,8 +24,12 @@ information could make use of this facility. OPTIONS ------- -b:: ---build-ids=:: +--build-ids:: Inject build-ids into the output stream + +--buildid-all: + Inject build-ids of all DSOs into the output stream + -v:: --verbose:: Be more verbose. @@ -64,6 +68,16 @@ include::itrace.txt[] --force:: Don't complain, do it. +--vm-time-correlation[=OPTIONS]:: + Some architectures may capture AUX area data which contains timestamps + affected by virtualization. This option will update those timestamps + in place, to correlate with host timestamps. The in-place update means + that an output file is not specified, and instead the input file is + modified. The options are architecture specific, except that they may + start with "dry-run" which will cause the file to be processed but + without updating it. Currently this option is supported only by + Intel PT, refer linkperf:perf-intel-pt[1] + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 7315f155803f753ea9603ed001f1daf36a09a64e..f627a962b80eebd31fc1b6bf08470c03b32f947d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -367,6 +367,12 @@ OPTIONS --objdump=:: Path to objdump binary. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --group:: Show event group information together. It forces group output also if there are no groups defined in data file. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 5596129a71cf5d5136e2fba62114ebe9af411ea7..324b6b53c86b65d325dd6726bb35d82b3a03f823 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -158,6 +158,12 @@ Default is to monitor all CPUS. -M:: --disassembler-style=:: Set disassembler style for objdump. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --source:: Interleave source code with assembly code. Enabled by default, disable with --no-source. diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index b0152e1095c58af089b8cd41bbc9264bbcdd8aa3..99866b5ea02844b3045abacad2ac5d6fe8a53893 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -373,6 +373,70 @@ struct { Indicates that trace contains records of PERF_RECORD_COMPRESSED type that have perf_events records in compressed form. + HEADER_CPU_PMU_CAPS = 28, + + A list of cpu PMU capabilities. The format of data is as below. + +struct { + u32 nr_cpu_pmu_caps; + { + char name[]; + char value[]; + } [nr_cpu_pmu_caps] +}; + + +Example: + cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake + + HEADER_CLOCK_DATA = 29, + + Contains clock id and its reference time together with wall clock + time taken at the 'same time', both values are in nanoseconds. + The format of data is as below. + +struct { + u32 version; /* version = 1 */ + u32 clockid; + u64 wall_clock_ns; + u64 clockid_time_ns; +}; + + HEADER_HYBRID_TOPOLOGY = 30, + +Indicate the hybrid CPUs. The format of data is as below. + +struct { + u32 nr; + struct { + char pmu_name[]; + char cpus[]; + } [nr]; /* Variable length records */ +}; + +Example: + hybrid cpu system: + cpu_core cpu list : 0-15 + cpu_atom cpu list : 16-23 + + HEADER_PMU_CAPS = 31, + + List of pmu capabilities (except cpu pmu which is already + covered by HEADER_CPU_PMU_CAPS). Note that hybrid cpu pmu + capabilities are also stored here. + +struct { + u32 nr_pmu; + struct { + u32 nr_caps; + { + char name[]; + char value[]; + } [nr_caps]; + char pmu_name[]; + } [nr_pmu]; +}; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index e4e321b6f8835e88a6d3fe28e18459aeed9ccb9a..075132693375c4c9a15aa97a67b67eb34f2db6ab 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -6,9 +6,10 @@ perf-y += futex-wake.o perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o - perf-y += epoll-wait.o perf-y += epoll-ctl.o +perf-y += synthesize.o +perf-y += inject-buildid.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index d9329ae84e178993042534d5e89d219dffa3fe34..02b93a7a6f4a35184a45348303a8349f3a9e620e 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -29,9 +29,10 @@ int bench_futex_wake_parallel(int argc, const char **argv); int bench_futex_requeue(int argc, const char **argv); /* pi futexes */ int bench_futex_lock_pi(int argc, const char **argv); - int bench_epoll_wait(int argc, const char **argv); int bench_epoll_ctl(int argc, const char **argv); +int bench_synthesize(int argc, const char **argv); +int bench_inject_build_id(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c new file mode 100644 index 0000000000000000000000000000000000000000..0fccf2a9e95b29b96e2e7bab664bdcbbcdb217dd --- /dev/null +++ b/tools/perf/bench/inject-buildid.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bench.h" +#include "util/data.h" +#include "util/stat.h" +#include "util/debug.h" +#include "util/event.h" +#include "util/symbol.h" +#include "util/session.h" +#include "util/build-id.h" +#include "util/synthetic-events.h" + +#define MMAP_DEV_MAJOR 8 +#define DSO_MMAP_RATIO 4 + +static unsigned int iterations = 100; +static unsigned int nr_mmaps = 100; +static unsigned int nr_samples = 100; /* samples per mmap */ + +static u64 bench_sample_type; +static u16 bench_id_hdr_size; + +struct bench_data { + int pid; + int input_pipe[2]; + int output_pipe[2]; + pthread_t th; +}; + +struct bench_dso { + struct list_head list; + char *name; + int ino; +}; + +static int nr_dsos; +static struct bench_dso *dsos; + +extern int cmd_inject(int argc, const char *argv[]); + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average (default: 100)"), + OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps, + "Number of mmap events for each iteration (default: 100)"), + OPT_UINTEGER('n', "nr-samples", &nr_samples, + "Number of sample events per mmap event (default: 100)"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show iteration count, DSO name, etc)"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals inject-build-id ", + NULL +}; + +/* + * Helper for collect_dso that adds the given file as a dso to dso_list + * if it contains a build-id. Stops after collecting 4 times more than + * we need (for MMAP2 events). + */ +static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, + int typeflag, struct FTW *ftwbuf __maybe_unused) +{ + struct bench_dso *dso = &dsos[nr_dsos]; + unsigned char build_id[BUILD_ID_SIZE]; + + if (typeflag == FTW_D || typeflag == FTW_SL) + return 0; + + if (filename__read_build_id(fpath, build_id, BUILD_ID_SIZE) < 0) + return 0; + + dso->name = realpath(fpath, NULL); + if (dso->name == NULL) + return -1; + + dso->ino = nr_dsos++; + pr_debug2(" Adding DSO: %s\n", fpath); + + /* stop if we collected enough DSOs */ + if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps) + return 1; + + return 0; +} + +static void collect_dso(void) +{ + dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos)); + if (dsos == NULL) { + printf(" Memory allocation failed\n"); + exit(1); + } + + if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0) + return; + + pr_debug(" Collected %d DSOs\n", nr_dsos); +} + +static void release_dso(void) +{ + int i; + + for (i = 0; i < nr_dsos; i++) { + struct bench_dso *dso = &dsos[i]; + + free(dso->name); + } + free(dsos); +} + +/* Fake address used by mmap and sample events */ +static u64 dso_map_addr(struct bench_dso *dso) +{ + return 0x400000ULL + dso->ino * 8192ULL; +} + +static u32 synthesize_attr(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.attr) + sizeof(u64)); + + event.header.type = PERF_RECORD_HEADER_ATTR; + event.header.size = sizeof(event.attr) + sizeof(u64); + + event.attr.attr.type = PERF_TYPE_SOFTWARE; + event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK; + event.attr.attr.exclude_kernel = 1; + event.attr.attr.sample_id_all = 1; + event.attr.attr.sample_type = bench_sample_type; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_fork(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size); + + event.header.type = PERF_RECORD_FORK; + event.header.misc = PERF_RECORD_MISC_FORK_EXEC; + event.header.size = sizeof(event.fork) + bench_id_hdr_size; + + event.fork.ppid = 1; + event.fork.ptid = 1; + event.fork.pid = data->pid; + event.fork.tid = data->pid; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + size_t len = offsetof(struct perf_record_mmap2, filename); + u64 *id_hdr_ptr = (void *)&event; + int ts_idx; + + len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size; + + memset(&event, 0, min(len, sizeof(event.mmap2))); + + event.header.type = PERF_RECORD_MMAP2; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = len; + + event.mmap2.pid = data->pid; + event.mmap2.tid = data->pid; + event.mmap2.maj = MMAP_DEV_MAJOR; + event.mmap2.ino = dso->ino; + + strcpy(event.mmap2.filename, dso->name); + + event.mmap2.start = dso_map_addr(dso); + event.mmap2.len = 4096; + event.mmap2.prot = PROT_EXEC; + + if (len > sizeof(event.mmap2)) { + /* write mmap2 event first */ + writen(data->input_pipe[1], &event, len - bench_id_hdr_size); + /* zero-fill sample id header */ + memset(id_hdr_ptr, 0, bench_id_hdr_size); + /* put timestamp in the right position */ + ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size); + } else { + ts_idx = (len / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], &event, len); + } + return len; +} + +static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + struct perf_sample sample = { + .tid = data->pid, + .pid = data->pid, + .ip = dso_map_addr(dso), + .time = timestamp, + }; + + event.header.type = PERF_RECORD_SAMPLE; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0); + + perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample); + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_flush(struct bench_data *data) +{ + struct perf_event_header header = { + .size = sizeof(header), + .type = PERF_RECORD_FINISHED_ROUND, + }; + + return writen(data->input_pipe[1], &header, header.size); +} + +static void *data_reader(void *arg) +{ + struct bench_data *data = arg; + char buf[8192]; + int flag; + int n; + + flag = fcntl(data->output_pipe[0], F_GETFL); + fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK); + + /* read out data from child */ + while (true) { + n = read(data->output_pipe[0], buf, sizeof(buf)); + if (n > 0) + continue; + if (n == 0) + break; + + if (errno != EINTR && errno != EAGAIN) + break; + + usleep(100); + } + + close(data->output_pipe[0]); + return NULL; +} + +static int setup_injection(struct bench_data *data) +{ + int ready_pipe[2]; + int dev_null_fd; + char buf; + + if (pipe(ready_pipe) < 0) + return -1; + + if (pipe(data->input_pipe) < 0) + return -1; + + if (pipe(data->output_pipe) < 0) + return -1; + + data->pid = fork(); + if (data->pid < 0) + return -1; + + if (data->pid == 0) { + const char **inject_argv; + + close(data->input_pipe[1]); + close(data->output_pipe[0]); + close(ready_pipe[0]); + + dup2(data->input_pipe[0], STDIN_FILENO); + close(data->input_pipe[0]); + dup2(data->output_pipe[1], STDOUT_FILENO); + close(data->output_pipe[1]); + + dev_null_fd = open("/dev/null", O_WRONLY); + if (dev_null_fd < 0) + exit(1); + + dup2(dev_null_fd, STDERR_FILENO); + + inject_argv = calloc(3, sizeof(*inject_argv)); + if (inject_argv == NULL) + exit(1); + + inject_argv[0] = strdup("inject"); + inject_argv[1] = strdup("-b"); + + /* signal that we're ready to go */ + close(ready_pipe[1]); + + cmd_inject(2, inject_argv); + + exit(0); + } + + pthread_create(&data->th, NULL, data_reader, data); + + close(ready_pipe[1]); + close(data->input_pipe[0]); + close(data->output_pipe[1]); + + /* wait for child ready */ + if (read(ready_pipe[0], &buf, 1) < 0) + return -1; + close(ready_pipe[0]); + + return 0; +} + +static int inject_build_id(struct bench_data *data, u64 *max_rss) +{ + int status; + unsigned int i, k; + struct rusage rusage; + u64 len = 0; + + /* this makes the child to run */ + if (perf_header__write_pipe(data->input_pipe[1]) < 0) + return -1; + + len += synthesize_attr(data); + len += synthesize_fork(data); + + for (i = 0; i < nr_mmaps; i++) { + int idx = rand() % (nr_dsos - 1); + struct bench_dso *dso = &dsos[idx]; + u64 timestamp = rand() % 1000000; + + pr_debug2(" [%d] injecting: %s\n", i+1, dso->name); + len += synthesize_mmap(data, dso, timestamp); + + for (k = 0; k < nr_samples; k++) + len += synthesize_sample(data, dso, timestamp + k * 1000); + + if ((i + 1) % 10 == 0) + len += synthesize_flush(data); + } + + /* tihs makes the child to finish */ + close(data->input_pipe[1]); + + wait4(data->pid, &status, 0, &rusage); + *max_rss = rusage.ru_maxrss; + + pr_debug(" Child %d exited with %d\n", data->pid, status); + + return 0; +} + +static int do_inject_loop(struct bench_data *data) +{ + unsigned int i; + struct stats time_stats, mem_stats; + double time_average, time_stddev; + double mem_average, mem_stddev; + + srand(time(NULL)); + init_stats(&time_stats); + init_stats(&mem_stats); + symbol__init(NULL); + + bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; + bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; + bench_id_hdr_size = 32; + + collect_dso(); + if (nr_dsos == 0) { + printf(" Cannot collect DSOs for injection\n"); + return -1; + } + + for (i = 0; i < iterations; i++) { + struct timeval start, end, diff; + u64 runtime_us, max_rss; + + pr_debug(" Iteration #%d\n", i+1); + + if (setup_injection(data) < 0) { + printf(" Build-id injection setup failed\n"); + break; + } + + gettimeofday(&start, NULL); + if (inject_build_id(data, &max_rss) < 0) { + printf(" Build-id injection failed\n"); + break; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&mem_stats, max_rss); + + pthread_join(data->th, NULL); + } + + time_average = avg_stats(&time_stats) / USEC_PER_MSEC; + time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; + printf(" Average build-id injection took: %.3f msec (+- %.3f msec)\n", + time_average, time_stddev); + + /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */ + time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + printf(" Average time per event: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + mem_average = avg_stats(&mem_stats); + mem_stddev = stddev_stats(&mem_stats); + printf(" Average memory usage: %.0f KB (+- %.0f KB)\n", + mem_average, mem_stddev); + + release_dso(); + return 0; +} + +int bench_inject_build_id(int argc, const char **argv) +{ + struct bench_data data; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + return do_inject_loop(&data); +} + diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c new file mode 100644 index 0000000000000000000000000000000000000000..05f7c923c745b4e8d2acca73c4c50e0cc7be8ae3 --- /dev/null +++ b/tools/perf/bench/synthesize.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Benchmark synthesis of perf events such as at the start of a 'perf + * record'. Synthesis is done on the current process and the 'dummy' event + * handlers are invoked that support dump_trace but otherwise do nothing. + * + * Copyright 2019 Google LLC. + */ +#include +#include "bench.h" +#include "../util/debug.h" +#include "../util/session.h" +#include "../util/stat.h" +#include "../util/synthetic-events.h" +#include "../util/target.h" +#include "../util/thread_map.h" +#include "../util/tool.h" +#include "../util/util.h" +#include +#include +#include +#include + +static unsigned int min_threads = 1; +static unsigned int max_threads = UINT_MAX; +static unsigned int single_iterations = 10000; +static unsigned int multi_iterations = 10; +static bool run_st; +static bool run_mt; + +static const struct option options[] = { + OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"), + OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"), + OPT_UINTEGER('m', "min-threads", &min_threads, + "Minimum number of threads in multithreaded bench"), + OPT_UINTEGER('M', "max-threads", &max_threads, + "Maximum number of threads in multithreaded bench"), + OPT_UINTEGER('i', "single-iterations", &single_iterations, + "Number of iterations used to compute single-threaded average"), + OPT_UINTEGER('I', "multi-iterations", &multi_iterations, + "Number of iterations used to compute multi-threaded average"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals synthesize ", + NULL +}; + +static atomic_t event_count; + +static int process_synthesized_event(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + atomic_inc(&event_count); + return 0; +} + +static int do_run_single_threaded(struct perf_session *session, + struct perf_thread_map *threads, + struct target *target, bool data_mmap) +{ + const unsigned int nr_threads_synthesize = 1; + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev, event_average, event_stddev; + int err; + struct stats time_stats, event_stats; + + init_stats(&time_stats); + init_stats(&event_stats); + + for (i = 0; i < single_iterations; i++) { + atomic_set(&event_count, 0); + gettimeofday(&start, NULL); + err = __machine__synthesize_threads(&session->machines.host, + NULL, + target, threads, + process_synthesized_event, + data_mmap, + nr_threads_synthesize); + if (err) + return err; + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&event_stats, atomic_read(&event_count)); + } + + time_average = avg_stats(&time_stats); + time_stddev = stddev_stats(&time_stats); + printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n", + data_mmap ? "data " : "", time_average, time_stddev); + + event_average = avg_stats(&event_stats); + event_stddev = stddev_stats(&event_stats); + printf(" Average num. events: %.3f (+- %.3f)\n", + event_average, event_stddev); + + printf(" Average time per event %.3f usec\n", + time_average / event_average); + return 0; +} + +static int run_single_threaded(void) +{ + struct perf_session *session; + struct target target = { + .pid = "self", + }; + struct perf_thread_map *threads; + int err; + + perf_set_singlethreaded(); + session = perf_session__new(NULL, NULL); + if (IS_ERR(session)) { + pr_err("Session creation failed.\n"); + return PTR_ERR(session); + } + threads = thread_map__new_by_pid(getpid()); + if (!threads) { + pr_err("Thread map creation failed.\n"); + err = -ENOMEM; + goto err_out; + } + + puts( +"Computing performance of single threaded perf event synthesis by\n" +"synthesizing events on the perf process itself:"); + + err = do_run_single_threaded(session, threads, &target, false); + if (err) + goto err_out; + + err = do_run_single_threaded(session, threads, &target, true); + +err_out: + if (threads) + perf_thread_map__put(threads); + + perf_session__delete(session); + return err; +} + +static int do_run_multi_threaded(struct target *target, + unsigned int nr_threads_synthesize) +{ + struct timeval start, end, diff; + u64 runtime_us; + unsigned int i; + double time_average, time_stddev, event_average, event_stddev; + int err; + struct stats time_stats, event_stats; + struct perf_session *session; + + init_stats(&time_stats); + init_stats(&event_stats); + for (i = 0; i < multi_iterations; i++) { + session = perf_session__new(NULL, NULL); + if (IS_ERR(session)) + return PTR_ERR(session); + + atomic_set(&event_count, 0); + gettimeofday(&start, NULL); + err = __machine__synthesize_threads(&session->machines.host, + NULL, + target, NULL, + process_synthesized_event, + false, + nr_threads_synthesize); + if (err) { + perf_session__delete(session); + return err; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&event_stats, atomic_read(&event_count)); + perf_session__delete(session); + } + + time_average = avg_stats(&time_stats); + time_stddev = stddev_stats(&time_stats); + printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + event_average = avg_stats(&event_stats); + event_stddev = stddev_stats(&event_stats); + printf(" Average num. events: %.3f (+- %.3f)\n", + event_average, event_stddev); + + printf(" Average time per event %.3f usec\n", + time_average / event_average); + return 0; +} + +static int run_multi_threaded(void) +{ + struct target target = { + .cpu_list = "0" + }; + unsigned int nr_threads_synthesize; + int err; + + if (max_threads == UINT_MAX) + max_threads = sysconf(_SC_NPROCESSORS_ONLN); + + puts( +"Computing performance of multi threaded perf event synthesis by\n" +"synthesizing events on CPU 0:"); + + for (nr_threads_synthesize = min_threads; + nr_threads_synthesize <= max_threads; + nr_threads_synthesize++) { + if (nr_threads_synthesize == 1) + perf_set_singlethreaded(); + else + perf_set_multithreaded(); + + printf(" Number of synthesis threads: %u\n", + nr_threads_synthesize); + + err = do_run_multi_threaded(&target, nr_threads_synthesize); + if (err) + return err; + } + perf_set_singlethreaded(); + return 0; +} + +int bench_synthesize(int argc, const char **argv) +{ + int err = 0; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + /* + * If neither single threaded or multi-threaded are specified, default + * to running just single threaded. + */ + if (!run_st && !run_mt) + run_st = true; + + if (run_st) + err = run_single_threaded(); + + if (!err && run_mt) + err = run_multi_threaded(); + + return err; +} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2cfb6e5da3815220070eef796193080775a93182..e26125c7c1068d3da1f6dcc18caf7e0e2ad03b30 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -535,6 +535,10 @@ int cmd_annotate(int argc, const char **argv) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix", + "Add prefix to source file path names in programs (with --prefix-strip)"), + OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N", + "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, @@ -574,6 +578,9 @@ int cmd_annotate(int argc, const char **argv) annotate.sym_hist_filter = argv[0]; } + if (annotate_check_args(&annotate.opts) < 0) + return -EINVAL; + if (symbol_conf.show_nr_samples && annotate.use_gtk) { pr_err("--show-nr-samples is not available in --gtk mode at this time\n"); return ret; @@ -584,7 +591,7 @@ int cmd_annotate(int argc, const char **argv) data.path = input_name; - annotate.session = perf_session__new(&data, false, &annotate.tool); + annotate.session = perf_session__new(&data, &annotate.tool); if (IS_ERR(annotate.session)) return PTR_ERR(annotate.session); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index c06fe21c86134423fc1da3dd6bd4091a0f9cd2e5..e78e8196ccac4d18660fe8c4a608a515a886c73d 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -76,6 +76,12 @@ static struct bench epoll_benchmarks[] = { }; #endif // HAVE_EVENTFD +static struct bench internals_benchmarks[] = { + { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, + { "inject-build-id", "Benchmark build-id injection", bench_inject_build_id }, + { NULL, NULL, NULL } +}; + struct collection { const char *name; const char *summary; @@ -92,6 +98,7 @@ static struct collection collections[] = { #ifdef HAVE_EVENTFD {"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, #endif + { "internals", "Perf-internals benchmarks", internals_benchmarks }, { "all", "All benchmarks", NULL }, { NULL, NULL, NULL } }; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 39efa51d7fb3d0884b97c1a0f59b969332c69cd7..18821556b6031efc830a04506cd5d44190e38216 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -422,7 +422,7 @@ int cmd_buildid_cache(int argc, const char **argv) data.path = missing_filename; data.force = force; - session = perf_session__new(&data, false, NULL); + session = perf_session__new(&data, NULL); if (IS_ERR(session)) return PTR_ERR(session); } diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index e3ef75583514b61ffe571f4258332a5ca937e7d8..27b0ab5d03ef4739fde6469ac94bd5fd3e062890 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -65,7 +65,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) if (filename__fprintf_build_id(input_name, stdout) > 0) goto out; - session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops); + session = perf_session__new(&data, &build_id__mark_dso_hit_ops); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 29d460c30176709f708dd5a3a6be60e39690e589..cd25ba49195c1a927b82ab87a70a1294a3b47b6c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2789,7 +2789,7 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } - session = perf_session__new(&data, 0, &c2c.tool); + session = perf_session__new(&data, &c2c.tool); if (IS_ERR(session)) { err = PTR_ERR(session); pr_debug("Error creating perf session\n"); diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index ca2fb44874e4b1503140af6edc8a744a142eed39..15ca23675ef028104ff031f6e3f79cb2c47dea51 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -7,7 +7,6 @@ #include "debug.h" #include #include "data-convert.h" -#include "data-convert-bt.h" typedef int (*data_cmd_fn_t)(int argc, const char **argv); @@ -55,7 +54,8 @@ static const char * const data_convert_usage[] = { static int cmd_data_convert(int argc, const char **argv) { - const char *to_ctf = NULL; + const char *to_json = NULL; + const char *to_ctf = NULL; struct perf_data_convert_opts opts = { .force = false, .all = false, @@ -63,19 +63,16 @@ static int cmd_data_convert(int argc, const char **argv) const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"), #ifdef HAVE_LIBBABELTRACE_SUPPORT OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), + OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"), #endif OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"), OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"), OPT_END() }; -#ifndef HAVE_LIBBABELTRACE_SUPPORT - pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); - return -1; -#endif - argc = parse_options(argc, argv, options, data_convert_usage, 0); if (argc) { @@ -83,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv) return -1; } + if (to_json && to_ctf) { + pr_err("You cannot specify both --to-ctf and --to-json.\n"); + return -1; + } + if (!to_json && !to_ctf) { + pr_err("You must specify one of --to-ctf or --to-json.\n"); + return -1; + } + + if (to_json) + return bt_convert__perf2json(input_name, to_json, &opts); + if (to_ctf) { #ifdef HAVE_LIBBABELTRACE_SUPPORT return bt_convert__perf2ctf(input_name, to_ctf, &opts); #else - pr_err("The libbabeltrace support is not compiled in.\n"); + pr_err("The libbabeltrace support is not compiled in. perf should be " + "compiled with environment variables LIBBABELTRACE=1 and " + "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); return -1; #endif } diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 48ce1f99c1ed6cc982df439d4a368aedc757f33d..f679bf7c04b403bf6808c364690d0de6685bcad9 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1154,7 +1154,7 @@ static int check_file_brstack(void) int i; data__for_each_file(i, d) { - d->session = perf_session__new(&d->data, false, &pdiff.tool); + d->session = perf_session__new(&d->data, &pdiff.tool); if (IS_ERR(d->session)) { pr_err("Failed to open %s\n", d->data.path); return PTR_ERR(d->session); @@ -1186,7 +1186,7 @@ static int __cmd_diff(void) ret = -EINVAL; data__for_each_file(i, d) { - d->session = perf_session__new(&d->data, false, &pdiff.tool); + d->session = perf_session__new(&d->data, &pdiff.tool); if (IS_ERR(d->session)) { ret = PTR_ERR(d->session); pr_err("Failed to open %s\n", d->data.path); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 440501994931dcb50beb3991c9065a6616311a98..d1d7a8370c095540fe93d8f5b13be72c2d11a041 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -17,6 +17,14 @@ #include "util/data.h" #include "util/debug.h" #include +#include "util/tool.h" + +static int process_header_feature(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + session_done = 1; + return 0; +} static int __cmd_evlist(const char *file_name, struct perf_attr_details *details) { @@ -27,12 +35,20 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details .mode = PERF_DATA_MODE_READ, .force = details->force, }; + struct perf_tool tool = { + /* only needed for pipe mode */ + .attr = perf_event__process_attr, + .feature = process_header_feature, + }; bool has_tracepoint = false; - session = perf_session__new(&data, 0, NULL); + session = perf_session__new(&data, &tool); if (IS_ERR(session)) return PTR_ERR(session); + if (data.is_pipe) + perf_session__process_events(session); + evlist__for_each_entry(session->evlist, pos) { perf_evsel__fprintf(pos, details, stdout); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6b6bb86d62d3598d3193dd1a6ae59f8e8dae3e15..a45fa511d4e85995841220c07cddde02cee1c972 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -10,6 +10,7 @@ #include "util/color.h" #include "util/dso.h" +#include "util/vdso.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/map.h" @@ -23,11 +24,16 @@ #include "util/symbol.h" #include "util/synthetic-events.h" #include "util/thread.h" -#include +#include "util/namespaces.h" + +#include +#include #include +#include /* To get things like MAP_HUGETLB even on older libc headers */ #include +#include #include #include @@ -35,16 +41,21 @@ struct perf_inject { struct perf_tool tool; struct perf_session *session; bool build_ids; + bool build_id_all; bool sched_stat; bool have_auxtrace; bool strip; bool jit_mode; + bool in_place_update; + bool in_place_update_dry_run; + bool is_pipe; const char *input_name; struct perf_data output; u64 bytes_written; u64 aux_id; struct list_head samples; struct itrace_synth_opts itrace_synth_opts; + struct perf_file_section secs[HEADER_FEAT_BITS]; }; struct event_entry { @@ -53,6 +64,9 @@ struct event_entry { union perf_event event[0]; }; +static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, + struct machine *machine, u8 cpumode, u32 flags); + static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) { ssize_t size; @@ -108,7 +122,7 @@ static int perf_event__repipe_attr(struct perf_tool *tool, if (ret) return ret; - if (!inject->output.is_pipe) + if (!inject->is_pipe) return 0; return perf_event__repipe_synth(tool, event); @@ -274,6 +288,68 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool, } #endif +static struct dso *findnew_dso(int pid, int tid, const char *filename, + struct dso_id *id, struct machine *machine) +{ + struct thread *thread; + struct nsinfo *nsi = NULL; + struct nsinfo *nnsi; + struct dso *dso; + bool vdso; + + thread = machine__findnew_thread(machine, pid, tid); + if (thread == NULL) { + pr_err("cannot find or create a task %d/%d.\n", tid, pid); + return NULL; + } + + vdso = is_vdso_map(filename); + nsi = nsinfo__get(thread->nsinfo); + + if (vdso) { + /* The vdso maps are always on the host and not the + * container. Ensure that we don't use setns to look + * them up. + */ + nnsi = nsinfo__copy(nsi); + if (nnsi) { + nsinfo__put(nsi); + nnsi->need_setns = false; + nsi = nnsi; + } + dso = machine__findnew_vdso(machine, thread); + } else { + dso = machine__findnew_dso_id(machine, filename, id); + } + + if (dso) + dso->nsinfo = nsi; + else + nsinfo__put(nsi); + + thread__put(thread); + return dso; +} + +static int perf_event__repipe_buildid_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct dso *dso; + + dso = findnew_dso(event->mmap.pid, event->mmap.tid, + event->mmap.filename, NULL, machine); + + if (dso && !dso->hit) { + dso->hit = 1; + dso__inject_build_id(dso, tool, machine, sample->cpumode, 0); + dso__put(dso); + } + + return perf_event__repipe(tool, event, sample, machine); +} + static int perf_event__repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -312,6 +388,34 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, } #endif +static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct dso_id dso_id = { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }; + struct dso *dso; + + dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, + event->mmap2.filename, &dso_id, machine); + + if (dso && !dso->hit) { + dso->hit = 1; + dso__inject_build_id(dso, tool, machine, sample->cpumode, + event->mmap2.flags); + dso__put(dso); + } + + perf_event__repipe(tool, event, sample, machine); + + return 0; +} + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -387,34 +491,38 @@ static int perf_event__repipe_id_index(struct perf_session *session, static int dso__read_build_id(struct dso *dso) { + struct nscookie nsc; + if (dso->has_build_id) return 0; + nsinfo__mountns_enter(dso->nsinfo, &nsc); if (filename__read_build_id(dso->long_name, dso->build_id, sizeof(dso->build_id)) > 0) { dso->has_build_id = true; - return 0; } + nsinfo__mountns_exit(&nsc); - return -1; + return dso->has_build_id ? 0 : -1; } static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, - struct machine *machine) + struct machine *machine, u8 cpumode, u32 flags) { - u16 misc = PERF_RECORD_MISC_USER; int err; + if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB) + return 0; + if (is_no_dso_memory(dso->long_name)) + return 0; + if (dso__read_build_id(dso) < 0) { pr_debug("no build_id found for %s\n", dso->long_name); return -1; } - if (dso->kernel) - misc = PERF_RECORD_MISC_KERNEL; - - err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe, - machine); + err = perf_event__synthesize_build_id(tool, dso, cpumode, + perf_event__repipe, machine); if (err) { pr_err("Can't synthesize build_id event for %s\n", dso->long_name); return -1; @@ -423,11 +531,10 @@ static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, return 0; } -static int perf_event__inject_buildid(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct evsel *evsel __maybe_unused, - struct machine *machine) +int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel __maybe_unused, + struct machine *machine) { struct addr_location al; struct thread *thread; @@ -442,19 +549,8 @@ static int perf_event__inject_buildid(struct perf_tool *tool, if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { if (!al.map->dso->hit) { al.map->dso->hit = 1; - if (map__load(al.map) >= 0) { - dso__inject_build_id(al.map->dso, tool, machine); - /* - * If this fails, too bad, let the other side - * account this as unresolved. - */ - } else { -#ifdef HAVE_LIBELF_SUPPORT - pr_warning("no symbols found in %s, maybe " - "install a debug package?\n", - al.map->dso->long_name); -#endif - } + dso__inject_build_id(al.map->dso, tool, machine, + sample->cpumode, al.map->flags); } } @@ -630,18 +726,166 @@ static void strip_fini(struct perf_inject *inject) } } +static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) +{ + struct perf_inject *inject = opt->value; + const char *args; + char *dry_run; + + if (unset) + return 0; + + inject->itrace_synth_opts.set = true; + inject->itrace_synth_opts.vm_time_correlation = true; + inject->in_place_update = true; + + if (!str) + return 0; + + dry_run = skip_spaces(str); + if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { + inject->itrace_synth_opts.vm_tm_corr_dry_run = true; + inject->in_place_update_dry_run = true; + args = dry_run + strlen("dry-run"); + } else { + args = str; + } + + inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); + + return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; +} + +static int save_section_info_cb(struct perf_file_section *section, + struct perf_header *ph __maybe_unused, + int feat, int fd __maybe_unused, void *data) +{ + struct perf_inject *inject = data; + + inject->secs[feat] = *section; + return 0; +} + +static int save_section_info(struct perf_inject *inject) +{ + struct perf_header *header = &inject->session->header; + int fd = perf_data__fd(inject->session->data); + + return perf_header__process_sections(header, fd, inject, save_section_info_cb); +} + +static bool keep_feat(int feat) +{ + switch (feat) { + /* Keep original information that describes the machine or software */ + case HEADER_TRACING_DATA: + case HEADER_HOSTNAME: + case HEADER_OSRELEASE: + case HEADER_VERSION: + case HEADER_ARCH: + case HEADER_NRCPUS: + case HEADER_CPUDESC: + case HEADER_CPUID: + case HEADER_TOTAL_MEM: + case HEADER_CPU_TOPOLOGY: + case HEADER_NUMA_TOPOLOGY: + case HEADER_PMU_MAPPINGS: + case HEADER_CACHE: + case HEADER_MEM_TOPOLOGY: + case HEADER_CLOCKID: + case HEADER_BPF_PROG_INFO: + case HEADER_BPF_BTF: + case HEADER_CPU_PMU_CAPS: + case HEADER_CLOCK_DATA: + case HEADER_HYBRID_TOPOLOGY: + case HEADER_PMU_CAPS: + return true; + /* Information that can be updated */ + case HEADER_BUILD_ID: + case HEADER_CMDLINE: + case HEADER_EVENT_DESC: + case HEADER_BRANCH_STACK: + case HEADER_GROUP_DESC: + case HEADER_AUXTRACE: + case HEADER_STAT: + case HEADER_SAMPLE_TIME: + case HEADER_DIR_FORMAT: + case HEADER_COMPRESSED: + default: + return false; + }; +} + +static int read_file(int fd, u64 offs, void *buf, size_t sz) +{ + ssize_t ret = preadn(fd, buf, sz, offs); + + if (ret < 0) + return -errno; + if ((size_t)ret != sz) + return -EINVAL; + return 0; +} + +static int feat_copy(struct perf_inject *inject, int feat, struct feat_writer *fw) +{ + int fd = perf_data__fd(inject->session->data); + u64 offs = inject->secs[feat].offset; + size_t sz = inject->secs[feat].size; + void *buf = malloc(sz); + int ret; + + if (!buf) + return -ENOMEM; + + ret = read_file(fd, offs, buf, sz); + if (ret) + goto out_free; + + ret = fw->write(fw, buf, sz); +out_free: + free(buf); + return ret; +} + +struct inject_fc { + struct feat_copier fc; + struct perf_inject *inject; +}; + +static int feat_copy_cb(struct feat_copier *fc, int feat, struct feat_writer *fw) +{ + struct inject_fc *inj_fc = container_of(fc, struct inject_fc, fc); + struct perf_inject *inject = inj_fc->inject; + int ret; + + if (!inject->secs[feat].offset || + !keep_feat(feat)) + return 0; + + ret = feat_copy(inject, feat, fw); + if (ret < 0) + return ret; + + return 1; /* Feature section copied */ +} + +static int output_fd(struct perf_inject *inject) +{ + return inject->in_place_update ? -1 : perf_data__fd(&inject->output); +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; struct perf_session *session = inject->session; - struct perf_data *data_out = &inject->output; - int fd = perf_data__fd(data_out); + int fd = output_fd(inject); u64 output_data_offset; signal(SIGINT, sig_handler); if (inject->build_ids || inject->sched_stat || - inject->itrace_synth_opts.set) { + inject->itrace_synth_opts.set || inject->build_id_all) { inject->tool.mmap = perf_event__repipe_mmap; inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; @@ -650,7 +894,10 @@ static int __cmd_inject(struct perf_inject *inject) output_data_offset = session->header.data_offset; - if (inject->build_ids) { + if (inject->build_id_all) { + inject->tool.mmap = perf_event__repipe_buildid_mmap; + inject->tool.mmap2 = perf_event__repipe_buildid_mmap2; + } else if (inject->build_ids) { inject->tool.sample = perf_event__inject_buildid; } else if (inject->sched_stat) { struct evsel *evsel; @@ -668,6 +915,15 @@ static int __cmd_inject(struct perf_inject *inject) else if (!strncmp(name, "sched:sched_stat_", 17)) evsel->handler = perf_inject__sched_stat; } + } else if (inject->itrace_synth_opts.vm_time_correlation) { + session->itrace_synth_opts = &inject->itrace_synth_opts; + memset(&inject->tool, 0, sizeof(inject->tool)); + inject->tool.id_index = perf_event__process_id_index; + inject->tool.auxtrace_info = perf_event__process_auxtrace_info; + inject->tool.auxtrace = perf_event__process_auxtrace; + inject->tool.auxtrace_error = perf_event__process_auxtrace_error; + inject->tool.ordered_events = true; + inject->tool.ordering_requires_timestamps = true; } else if (inject->itrace_synth_opts.set) { session->itrace_synth_opts = &inject->itrace_synth_opts; inject->itrace_synth_opts.inject = true; @@ -690,14 +946,19 @@ static int __cmd_inject(struct perf_inject *inject) if (!inject->itrace_synth_opts.set) auxtrace_index__free(&session->auxtrace_index); - if (!data_out->is_pipe) + if (!inject->is_pipe && !inject->in_place_update) lseek(fd, output_data_offset, SEEK_SET); ret = perf_session__process_events(session); if (ret) return ret; - if (!data_out->is_pipe) { + if (!inject->is_pipe && !inject->in_place_update) { + struct inject_fc inj_fc = { + .fc.copy = feat_copy_cb, + .inject = inject, + }; + if (inject->build_ids) perf_header__set_feat(&session->header, HEADER_BUILD_ID); @@ -734,7 +995,7 @@ static int __cmd_inject(struct perf_inject *inject) } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; - perf_session__write_header(session, session->evlist, fd, true); + perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc); } return ret; @@ -774,16 +1035,21 @@ int cmd_inject(int argc, const char **argv) .output = { .path = "-", .mode = PERF_DATA_MODE_WRITE, + .use_stdio = true, }, }; struct perf_data data = { .mode = PERF_DATA_MODE_READ, + .use_stdio = true, }; int ret; + bool repipe = true; struct option options[] = { OPT_BOOLEAN('b', "build-ids", &inject.build_ids, "Inject build-ids into the output stream"), + OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all, + "Inject build-ids of all DSOs into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", "input file name"), OPT_STRING('o', "output", &inject.output.path, "file", @@ -805,6 +1071,9 @@ int cmd_inject(int argc, const char **argv) itrace_parse_synth_opts), OPT_BOOLEAN(0, "strip", &inject.strip, "strip non-synthesized events (use with --itrace)"), + OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", + "correlate time between VM guests and the host", + parse_vm_time_correlation), OPT_END() }; const char * const inject_usage[] = { @@ -827,15 +1096,42 @@ int cmd_inject(int argc, const char **argv) return -1; } - if (perf_data__open(&inject.output)) { + if (inject.in_place_update) { + if (!strcmp(inject.input_name, "-")) { + pr_err("Input file name required for in-place updating\n"); + return -1; + } + if (strcmp(inject.output.path, "-")) { + pr_err("Output file name must not be specified for in-place updating\n"); + return -1; + } + if (!data.force && !inject.in_place_update_dry_run) { + pr_err("The input file would be updated in place, " + "the --force option is required.\n"); + return -1; + } + if (!inject.in_place_update_dry_run) + data.in_place_update = true; + } else if (perf_data__open(&inject.output)) { perror("failed to create output file"); return -1; } - inject.tool.ordered_events = inject.sched_stat; - data.path = inject.input_name; - inject.session = perf_session__new(&data, inject.output.is_pipe, &inject.tool); + if (!strcmp(inject.input_name, "-") || inject.output.is_pipe) { + inject.is_pipe = true; + /* + * Do not repipe header when input is a regular file + * since either it can rewrite the header at the end + * or write a new pipe header. + */ + if (strcmp(inject.input_name, "-")) + repipe = false; + } + + inject.session = __perf_session__new(&data, repipe, + output_fd(&inject), + &inject.tool); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); goto out_close_output; @@ -844,7 +1140,27 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); - if (inject.build_ids) { + /* Save original section info before feature bits change */ + ret = save_section_info(&inject); + if (ret) + goto out_delete; + + if (!data.is_pipe && inject.output.is_pipe) { + ret = perf_header__write_pipe(perf_data__fd(&inject.output)); + if (ret < 0) { + pr_err("Couldn't write a new pipe header.\n"); + goto out_delete; + } + + ret = perf_event__synthesize_for_pipe(&inject.tool, + inject.session, + &inject.output, + perf_event__repipe); + if (ret < 0) + goto out_delete; + } + + if (inject.build_ids && !inject.build_id_all) { /* * to make sure the mmap records are ordered correctly * and so that the correct especially due to jitted code @@ -854,6 +1170,11 @@ int cmd_inject(int argc, const char **argv) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } + + if (inject.sched_stat) { + inject.tool.ordered_events = true; + } + #ifdef HAVE_JITDUMP if (inject.jit_mode) { inject.tool.mmap2 = perf_event__jit_repipe_mmap2; @@ -876,6 +1197,7 @@ int cmd_inject(int argc, const char **argv) out_delete: zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + free(inject.itrace_synth_opts.vm_tm_corr_args); out_close_output: perf_data__close(&inject.output); return ret; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 9661671cc26ec29029b895aa3a4c630a31e65cd0..afa36cbd34e351bb9b4c69c83db6e598dd2b31ed 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1957,7 +1957,7 @@ int cmd_kmem(int argc, const char **argv) data.path = input_name; - kmem_session = session = perf_session__new(&data, false, &perf_kmem); + kmem_session = session = perf_session__new(&data, &perf_kmem); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index a7ee11cd57c0ef4c771e20e2080cc71a89e8375e..1b49ae6001d64ebf956dfceac05df62a9d26fed6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1094,7 +1094,7 @@ static int read_events(struct perf_kvm_stat *kvm) }; kvm->tool = eops; - kvm->session = perf_session__new(&file, false, &kvm->tool); + kvm->session = perf_session__new(&file, &kvm->tool); if (IS_ERR(kvm->session)) { pr_err("Initializing perf session failed\n"); return PTR_ERR(kvm->session); @@ -1449,7 +1449,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * perf session */ - kvm->session = perf_session__new(&data, false, &kvm->tool); + kvm->session = perf_session__new(&data, &kvm->tool); if (IS_ERR(kvm->session)) { err = PTR_ERR(kvm->session); goto out; diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index d06665077dfe0289f1fb6e28a7e69cc6f37b6fe7..0b29babb38bc4b03565f571918ee5e011f2ebcc6 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -872,7 +872,7 @@ static int __cmd_report(bool display_info) .force = force, }; - session = perf_session__new(&data, false, &eops); + session = perf_session__new(&data, &eops); if (IS_ERR(session)) { pr_err("Initializing perf session failed\n"); return PTR_ERR(session); diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index a13f5817d6fca4abac7cdfa80f5b90b1a8c3a097..a0d93d7e0f63a1fb639bf0ac441bfa2bed1bb6c3 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -247,8 +247,7 @@ static int report_raw_events(struct perf_mem *mem) .force = mem->force, }; int ret; - struct perf_session *session = perf_session__new(&data, false, - &mem->tool); + struct perf_session *session = perf_session__new(&data, &mem->tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4f30f7c1e079c618bd51c030818f536ba4340f2f..2af9b868581789b312bd0803eab4e98bb7a271ba 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -46,6 +46,7 @@ #include "util/bpf-event.h" #include "util/pmu-hybrid.h" #include "util/evlist-hybrid.h" +#include "util/clockid.h" #include "asm/bug.h" #include "perf.h" @@ -62,6 +63,7 @@ #include #include #include +#include struct switch_output { bool enabled; @@ -1074,6 +1076,9 @@ static void record__init_features(struct record *rec) if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) perf_header__clear_feat(&session->header, HEADER_CLOCKID); + if (!rec->opts.use_clockid) + perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); if (!record__comp_enabled(rec)) perf_header__clear_feat(&session->header, HEADER_COMPRESSED); @@ -1244,48 +1249,18 @@ static int record__synthesize(struct record *rec, bool tail) struct perf_data *data = &rec->data; struct record_opts *opts = &rec->opts; struct perf_tool *tool = &rec->tool; - int fd = perf_data__fd(data); int err = 0; if (rec->opts.tail_synthesize != tail) return 0; if (data->is_pipe) { - /* - * We need to synthesize events first, because some - * features works on top of them (on report side). - */ - err = perf_event__synthesize_attrs(tool, rec->evlist, - process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - goto out; - } - - err = perf_event__synthesize_features(tool, session, rec->evlist, + err = perf_event__synthesize_for_pipe(tool, session, data, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize features.\n"); - return err; - } + if (err < 0) + goto out; - if (have_tracepoints(&rec->evlist->core.entries)) { - /* - * FIXME err <= 0 here actually means that - * there were no tracepoints so its not really - * an error, just that we don't need to - * synthesize anything. We really have to - * return this more properly and also - * propagate errors that now are calling die() - */ - err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, - process_synthesized_event); - if (err <= 0) { - pr_err("Couldn't record tracing data.\n"); - goto out; - } - rec->bytes_written += err; - } + rec->bytes_written += err; } err = perf_event__synth_time_conv(record__pick_pc(rec), tool, @@ -1362,6 +1337,40 @@ static int record__synthesize(struct record *rec, bool tail) return err; } +static int record__init_clock(struct record *rec) +{ + struct perf_session *session = rec->session; + struct timespec ref_clockid; + struct timeval ref_tod; + u64 ref; + + if (!rec->opts.use_clockid) + return 0; + + session->header.env.clock.clockid = rec->opts.clockid; + + if (gettimeofday(&ref_tod, NULL) != 0) { + pr_err("gettimeofday failed, cannot set reference time.\n"); + return -1; + } + + if (clock_gettime(rec->opts.clockid, &ref_clockid)) { + pr_err("clock_gettime failed, cannot set reference time.\n"); + return -1; + } + + ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC + + (u64) ref_tod.tv_usec * NSEC_PER_USEC; + + session->header.env.clock.tod_ns = ref; + + ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC + + (u64) ref_clockid.tv_nsec; + + session->header.env.clock.clockid_ns = ref; + return 0; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1404,7 +1413,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGUSR2, SIG_IGN); } - session = perf_session__new(data, false, tool); + session = perf_session__new(data, tool); if (IS_ERR(session)) { pr_err("Perf session creation failed.\n"); return PTR_ERR(session); @@ -1421,6 +1430,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) session->header.env.comp_type = PERF_COMP_ZSTD; session->header.env.comp_level = rec->opts.comp_level; + if (record__init_clock(rec)) + return -1; + record__init_features(rec); if (rec->opts.use_clockid && rec->opts.clockid_res_ns) @@ -1872,103 +1884,6 @@ static int perf_record_config(const char *var, const char *value, void *cb) return 0; } -struct clockid_map { - const char *name; - int clockid; -}; - -#define CLOCKID_MAP(n, c) \ - { .name = n, .clockid = (c), } - -#define CLOCKID_END { .name = NULL, } - - -/* - * Add the missing ones, we need to build on many distros... - */ -#ifndef CLOCK_MONOTONIC_RAW -#define CLOCK_MONOTONIC_RAW 4 -#endif -#ifndef CLOCK_BOOTTIME -#define CLOCK_BOOTTIME 7 -#endif -#ifndef CLOCK_TAI -#define CLOCK_TAI 11 -#endif - -static const struct clockid_map clockids[] = { - /* available for all events, NMI safe */ - CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), - CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), - - /* available for some events */ - CLOCKID_MAP("realtime", CLOCK_REALTIME), - CLOCKID_MAP("boottime", CLOCK_BOOTTIME), - CLOCKID_MAP("tai", CLOCK_TAI), - - /* available for the lazy */ - CLOCKID_MAP("mono", CLOCK_MONOTONIC), - CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), - CLOCKID_MAP("real", CLOCK_REALTIME), - CLOCKID_MAP("boot", CLOCK_BOOTTIME), - - CLOCKID_END, -}; - -static int get_clockid_res(clockid_t clk_id, u64 *res_ns) -{ - struct timespec res; - - *res_ns = 0; - if (!clock_getres(clk_id, &res)) - *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; - else - pr_warning("WARNING: Failed to determine specified clock resolution.\n"); - - return 0; -} - -static int parse_clockid(const struct option *opt, const char *str, int unset) -{ - struct record_opts *opts = (struct record_opts *)opt->value; - const struct clockid_map *cm; - const char *ostr = str; - - if (unset) { - opts->use_clockid = 0; - return 0; - } - - /* no arg passed */ - if (!str) - return 0; - - /* no setting it twice */ - if (opts->use_clockid) - return -1; - - opts->use_clockid = true; - - /* if its a number, we're done */ - if (sscanf(str, "%d", &opts->clockid) == 1) - return get_clockid_res(opts->clockid, &opts->clockid_res_ns); - - /* allow a "CLOCK_" prefix to the name */ - if (!strncasecmp(str, "CLOCK_", 6)) - str += 6; - - for (cm = clockids; cm->name; cm++) { - if (!strcasecmp(str, cm->name)) { - opts->clockid = cm->clockid; - return get_clockid_res(opts->clockid, - &opts->clockid_res_ns); - } - } - - opts->use_clockid = false; - ui__warning("unknown clockid %s, check man page\n", ostr); - return -1; -} static int record__parse_affinity(const struct option *opt, const char *str, int unset) { diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f5c5e0cdfcb3fc351fee09b8d1edcd73d7dc472e..137a2476c6824e5d0894e3b866d1eb25d982fb9b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -740,7 +740,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) map->prot & PROT_EXEC ? 'x' : '-', map->flags & MAP_SHARED ? 's' : 'p', map->pgoff, - map->ino, map->dso->name); + map->dso->id.ino, map->dso->name); } return printed; @@ -1071,6 +1071,8 @@ int cmd_report(int argc, const char **argv) .socket_filter = -1, .annotation_opts = annotation__default_options, }; + char *sort_order_help = sort_help("sort by key(s):"); + char *field_order_help = sort_help("output field(s): overhead period sample "); const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), @@ -1105,9 +1107,9 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN(0, "header-only", &report.header_only, "Show only data header."), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - sort_help("sort by key(s):")), + sort_order_help), OPT_STRING('F', "fields", &field_order, "key[,keys...]", - sort_help("output field(s): overhead period sample ")), + field_order_help), OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, @@ -1164,6 +1166,10 @@ int cmd_report(int argc, const char **argv) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix", + "Add prefix to source file path names in programs (with --prefix-strip)"), + OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N", + "Strip first N entries of source file path name in programs (with --prefix)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, @@ -1223,11 +1229,11 @@ int cmd_report(int argc, const char **argv) char sort_tmp[128]; if (ret < 0) - return ret; + goto exit; ret = perf_config(report__config, &report); if (ret) - return ret; + goto exit; argc = parse_options(argc, argv, options, report_usage, 0); if (argc) { @@ -1241,6 +1247,11 @@ int cmd_report(int argc, const char **argv) report.symbol_filter_str = argv[0]; } + if (annotate_check_args(&report.annotation_opts) < 0) { + ret = -EINVAL; + goto exit; + } + if (report.mmaps_mode) report.tasks_mode = true; @@ -1250,12 +1261,14 @@ int cmd_report(int argc, const char **argv) if (symbol_conf.vmlinux_name && access(symbol_conf.vmlinux_name, R_OK)) { pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); - return -EINVAL; + ret = -EINVAL; + goto exit; } if (symbol_conf.kallsyms_name && access(symbol_conf.kallsyms_name, R_OK)) { pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); - return -EINVAL; + ret = -EINVAL; + goto exit; } if (report.inverted_callchain) @@ -1278,13 +1291,15 @@ int cmd_report(int argc, const char **argv) data.force = symbol_conf.force; repeat: - session = perf_session__new(&data, false, &report.tool); - if (IS_ERR(session)) - return PTR_ERR(session); + session = perf_session__new(&data, &report.tool); + if (IS_ERR(session)) { + ret = PTR_ERR(session); + goto exit; + } ret = evswitch__init(&report.evswitch, session->evlist, stderr); if (ret) - return ret; + goto exit; if (zstd_init(&(session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed. Reported data may be incomplete.\n"); @@ -1491,5 +1506,8 @@ int cmd_report(int argc, const char **argv) } zstd_fini(&(session->zstd_data)); perf_session__delete(session); +exit: + free(sort_order_help); + free(field_order_help); return ret; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5cacc4f84c8d9b6d76ca4203533187a739238811..817a3efd22b4dea410db741e1da830739c376c0e 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1798,7 +1798,7 @@ static int perf_sched__read_events(struct perf_sched *sched) }; int rc = -1; - session = perf_session__new(&data, false, &sched->tool); + session = perf_session__new(&data, &sched->tool); if (IS_ERR(session)) { pr_debug("Error creating perf session"); return PTR_ERR(session); @@ -2990,7 +2990,7 @@ static int perf_sched__timehist(struct perf_sched *sched) symbol_conf.use_callchain = sched->show_callchain; - session = perf_session__new(&data, false, &sched->tool); + session = perf_session__new(&data, &sched->tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb64dbfe043a539524eb4f4bb39545af2c71ca80..e67a534836a76e3efb6ad857a82bac156f13ed90 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3100,7 +3100,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array, int num, char *temp; int i = 0; - session = perf_session__new(&data, false, NULL); + session = perf_session__new(&data, NULL); if (IS_ERR(session)) return PTR_ERR(session); @@ -3766,7 +3766,7 @@ int cmd_script(int argc, const char **argv) use_browser = 0; } - session = perf_session__new(&data, false, &script.tool); + session = perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 40c6c2d822e19a0edf0fa019a1ed3489fdf0943b..748be8cf5a7383d7f9fe858dfefae29f0324e3cc 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1537,7 +1537,7 @@ static int __cmd_record(int argc, const char **argv) return -1; } - session = perf_session__new(data, false, NULL); + session = perf_session__new(data, NULL); if (IS_ERR(session)) { pr_err("Perf session creation failed\n"); return PTR_ERR(session); @@ -1736,7 +1736,7 @@ static int __cmd_report(int argc, const char **argv) perf_stat.data.path = input_name; perf_stat.data.mode = PERF_DATA_MODE_READ; - session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); + session = perf_session__new(&perf_stat.data, &perf_stat.tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 9e84fae9b096c9863585a9a35c175eea03946705..eda2892aa261c508be68f0549e531b3336cb0bfc 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1598,8 +1598,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) .force = tchart->force, }; - struct perf_session *session = perf_session__new(&data, false, - &tchart->tool); + struct perf_session *session = perf_session__new(&data, &tchart->tool); int ret = -EINVAL; if (IS_ERR(session)) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0fec22232daf60045344e2dd7a49d02941cd8463..3892e56e72483d81f6786c9b04434caf9fef8185 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1513,6 +1513,10 @@ int cmd_top(int argc, const char **argv) "objdump binary to use for disassembly and annotations"), OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix", + "Add prefix to source file path names in programs (with --prefix-strip)"), + OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N", + "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", "Don't show entries under that percent", parse_percent_limit), @@ -1567,6 +1571,9 @@ int cmd_top(int argc, const char **argv) if (argc) usage_with_options(top_usage, options); + if (annotate_check_args(&top.annotation_opts) < 0) + goto out_delete_evlist; + if (!top.evlist->core.nr_entries && evlist__add_default(top.evlist) < 0) { pr_err("Not enough memory for event selector list\n"); @@ -1676,7 +1683,7 @@ int cmd_top(int argc, const char **argv) signal(SIGWINCH, winch_sig); } - top.session = perf_session__new(NULL, false, NULL); + top.session = perf_session__new(NULL, NULL); if (IS_ERR(top.session)) { status = PTR_ERR(top.session); goto out_delete_evlist; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 1f40c683bba8296ea4252692b4e5b367b7ba6459..06edb52090d4663d4235ce1429545689c838bd48 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3636,7 +3636,7 @@ static int trace__replay(struct trace *trace) /* add tid to output */ trace->multiple_threads = true; - session = perf_session__new(&data, false, &trace->tool); + session = perf_session__new(&data, &trace->tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/lib/include/internal/lib.h b/tools/perf/lib/include/internal/lib.h index 5175d491b2d4962bb030fc5afdd0477286f5768e..85471a4b900f73aa5095d9941d67025d53880612 100644 --- a/tools/perf/lib/include/internal/lib.h +++ b/tools/perf/lib/include/internal/lib.h @@ -9,4 +9,6 @@ extern unsigned int page_size; ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); +ssize_t preadn(int fd, void *buf, size_t n, off_t offs); + #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/perf/lib/lib.c b/tools/perf/lib/lib.c index 18658931fc7145f9788ca27279a381591c68e12e..696fb0ea67c6e31e35b3bd9ab8055f121051f707 100644 --- a/tools/perf/lib/lib.c +++ b/tools/perf/lib/lib.c @@ -38,6 +38,26 @@ ssize_t readn(int fd, void *buf, size_t n) return ion(true, fd, buf, n); } +ssize_t preadn(int fd, void *buf, size_t n, off_t offs) +{ + size_t left = n; + + while (left) { + ssize_t ret = pread(fd, buf, left, offs); + + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + return ret; + + left -= ret; + buf += ret; + offs += ret; + } + + return n; +} + /* * Write exactly 'n' bytes or return an error. */ diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index f4a2c0df09549e4cccf1f478ac8bd1fccc38043c..c8af1310bd7c7b7073bfdae9d3c80aaf2399eaa8 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -37,7 +37,7 @@ static int session_write_header(char *path) .mode = PERF_DATA_MODE_WRITE, }; - session = perf_session__new(&data, false, NULL); + session = perf_session__new(&data, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); session->evlist = perf_evlist__new_default(); @@ -67,7 +67,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) }; int i; - session = perf_session__new(&data, false, NULL); + session = perf_session__new(&data, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); /* On platforms with large numbers of CPUs process_cpu_topology() diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d9df8a2a83cded5dbd2dc745cccf4ebcd00e4e20..ae0dafa75a697eedd7e1afa01c207caa3adabc59 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -128,6 +128,7 @@ perf-y += time-utils.o perf-y += expr-bison.o perf-y += branch.o perf-y += mem2node.o +perf-y += clockid.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o @@ -155,6 +156,7 @@ perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +perf-y += data-convert-json.o perf-y += scripting-engines/ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0344cb86fa9554efbb9361380dad7ccc1a0913b6..ac7a5de062ccd14603e6a65634daa39d4242bf84 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1486,44 +1486,26 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, +static int symbol__parse_objdump_line(struct symbol *sym, struct annotate_args *args, - int *line_nr) + char *parsed_line, int *line_nr) { struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; - char *line = NULL, *parsed_line, *tmp, *tmp2; - size_t line_len; + char *tmp; s64 line_ip, offset = -1; regmatch_t match[2]; - if (getline(&line, &line_len, file) < 0) - return -1; - - if (!line) - return -1; - - line_ip = -1; - parsed_line = strim(line); - /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); return 0; } - tmp = skip_spaces(parsed_line); - if (*tmp) { - /* - * Parse hexa addresses followed by ':' - */ - line_ip = strtoull(tmp, &tmp2, 16); - if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0') - line_ip = -1; - } - - if (line_ip != -1) { + /* Process hex address followed by ':'. */ + line_ip = strtoull(parsed_line, &tmp, 16); + if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') { u64 start = map__rip_2objdump(map, sym->start), end = map__rip_2objdump(map, sym->end); @@ -1531,7 +1513,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, if ((u64)line_ip < start || (u64)line_ip >= end) offset = -1; else - parsed_line = tmp2 + 1; + parsed_line = tmp + 1; } args->offset = offset; @@ -1540,7 +1522,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, args->ms.sym = sym; dl = disasm_line__new(args); - free(line); (*line_nr)++; if (dl == NULL) @@ -1858,6 +1839,67 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, } #endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +/* + * Possibly create a new version of line with tabs expanded. Returns the + * existing or new line, storage is updated if a new line is allocated. If + * allocation fails then NULL is returned. + */ +static char *expand_tabs(char *line, char **storage, size_t *storage_len) +{ + size_t i, src, dst, len, new_storage_len, num_tabs; + char *new_line; + size_t line_len = strlen(line); + + for (num_tabs = 0, i = 0; i < line_len; i++) + if (line[i] == '\t') + num_tabs++; + + if (num_tabs == 0) + return line; + + /* + * Space for the line and '\0', less the leading and trailing + * spaces. Each tab may introduce 7 additional spaces. + */ + new_storage_len = line_len + 1 + (num_tabs * 7); + + new_line = malloc(new_storage_len); + if (new_line == NULL) { + pr_err("Failure allocating memory for tab expansion\n"); + return NULL; + } + + /* + * Copy regions starting at src and expand tabs. If there are two + * adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces + * are inserted. + */ + for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) { + if (line[i] == '\t') { + len = i - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst++] = ' '; + while (dst % 8 != 0) + new_line[dst++] = ' '; + src = i + 1; + num_tabs--; + } + } + + /* Expand the last region. */ + len = line_len + 1 - src; + memcpy(&new_line[dst], &line[src], len); + dst += len; + new_line[dst] = '\0'; + + free(*storage); + *storage = new_line; + *storage_len = new_storage_len; + return new_line; + +} + static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *opts = args->options; @@ -1873,6 +1915,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) int lineno = 0; int nline; pid_t pid; + char *line; + size_t line_len; int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); if (err) @@ -1911,14 +1955,20 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C \"$1\" 2>/dev/null|grep -v \"$1:\"|expand", + " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), - opts->show_asm_raw ? "" : "--no-show-raw", - opts->annotate_src ? "-S" : ""); + opts->show_asm_raw ? "" : "--no-show-raw-insn", + opts->annotate_src ? "-S" : "", + opts->prefix ? "--prefix " : "", + opts->prefix ? '"' : ' ', + opts->prefix ?: "", + opts->prefix ? '"' : ' ', + opts->prefix_strip ? "--prefix-strip=" : "", + opts->prefix_strip ?: ""); if (err < 0) { pr_err("Failure allocating memory for the command to run\n"); @@ -1961,18 +2011,40 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) goto out_free_command; } + /* Storage for getline. */ + line = NULL; + line_len = 0; + nline = 0; while (!feof(file)) { + const char *match; + char *expanded_line; + + if (getline(&line, &line_len, file) < 0 || !line) + break; + + /* Skip lines containing "filename:" */ + match = strstr(line, symfs_filename); + if (match && match[strlen(symfs_filename)] == ':') + continue; + + expanded_line = strim(line); + expanded_line = expand_tabs(expanded_line, &line, &line_len); + if (!expanded_line) + break; + /* * The source code line number (lineno) needs to be kept in * across calls to symbol__parse_objdump_line(), so that it * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) + if (symbol__parse_objdump_line(sym, args, expanded_line, + &lineno) < 0) break; nline++; } + free(line); if (nline == 0) pr_err("No output from %s\n", command); @@ -3134,3 +3206,12 @@ int annotate_parse_percent_type(const struct option *opt, const char *_str, free(str1); return err; } + +int annotate_check_args(struct annotation_options *args) +{ + if (args->prefix_strip && !args->prefix) { + pr_err("--prefix-strip requires --prefix\n"); + return -1; + } + return 0; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d76fd0e81f4658ed2b1f3e23874d031b094bdaf6..47f3d2fd20ced6016480463ab7d05f72b4c1fa56 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -93,6 +93,8 @@ struct annotation_options { int context; const char *objdump_path; const char *disassembler_style; + const char *prefix; + const char *prefix_strip; unsigned int percent_type; }; @@ -419,4 +421,7 @@ void annotation_config__init(void); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); + +int annotate_check_args(struct annotation_options *args); + #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 39ef333ddb6c01e5b2e7566f95909f2d09b36bb9..15d24882ce400a6e020e5636d337691536aa2cc9 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -82,6 +82,9 @@ enum itrace_period_type { * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events * @remote_access: whether to synthesize remote access events + * @vm_time_correlation: perform VM Time Correlation + * @vm_tm_corr_dry_run: VM Time Correlation dry-run + * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -113,6 +116,9 @@ struct itrace_synth_opts { bool llc; bool tlb; bool remote_access; + bool vm_time_correlation; + bool vm_tm_corr_dry_run; + char *vm_tm_corr_args; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index aad419bb165cd9a3b38db2de85f4934d63f238d1..949f7e54c9cb0045dacb4c07cc24692c6596243c 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -29,6 +29,10 @@ int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, int dsos__hit_all(struct perf_session *session); +int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct evsel *evsel, + struct machine *machine); + bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, struct feat_fd *fd); diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c new file mode 100644 index 0000000000000000000000000000000000000000..74365a5d99c1f17af2beba84eeaf4fe7181f7eb7 --- /dev/null +++ b/tools/perf/util/clockid.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include "debug.h" +#include "clockid.h" +#include "record.h" + +struct clockid_map { + const char *name; + int clockid; +}; + +#define CLOCKID_MAP(n, c) \ + { .name = n, .clockid = (c), } + +#define CLOCKID_END { .name = NULL, } + + +/* + * Add the missing ones, we need to build on many distros... + */ +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif +#ifndef CLOCK_BOOTTIME +#define CLOCK_BOOTTIME 7 +#endif +#ifndef CLOCK_TAI +#define CLOCK_TAI 11 +#endif + +static const struct clockid_map clockids[] = { + /* available for all events, NMI safe */ + CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), + CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), + + /* available for some events */ + CLOCKID_MAP("realtime", CLOCK_REALTIME), + CLOCKID_MAP("boottime", CLOCK_BOOTTIME), + CLOCKID_MAP("tai", CLOCK_TAI), + + /* available for the lazy */ + CLOCKID_MAP("mono", CLOCK_MONOTONIC), + CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), + CLOCKID_MAP("real", CLOCK_REALTIME), + CLOCKID_MAP("boot", CLOCK_BOOTTIME), + + CLOCKID_END, +}; + +static int get_clockid_res(clockid_t clk_id, u64 *res_ns) +{ + struct timespec res; + + *res_ns = 0; + if (!clock_getres(clk_id, &res)) + *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; + else + pr_warning("WARNING: Failed to determine specified clock resolution.\n"); + + return 0; +} + +int parse_clockid(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + const struct clockid_map *cm; + const char *ostr = str; + + if (unset) { + opts->use_clockid = 0; + return 0; + } + + /* no arg passed */ + if (!str) + return 0; + + /* no setting it twice */ + if (opts->use_clockid) + return -1; + + opts->use_clockid = true; + + /* if its a number, we're done */ + if (sscanf(str, "%d", &opts->clockid) == 1) + return get_clockid_res(opts->clockid, &opts->clockid_res_ns); + + /* allow a "CLOCK_" prefix to the name */ + if (!strncasecmp(str, "CLOCK_", 6)) + str += 6; + + for (cm = clockids; cm->name; cm++) { + if (!strcasecmp(str, cm->name)) { + opts->clockid = cm->clockid; + return get_clockid_res(opts->clockid, + &opts->clockid_res_ns); + } + } + + opts->use_clockid = false; + ui__warning("unknown clockid %s, check man page\n", ostr); + return -1; +} + +const char *clockid_name(clockid_t clk_id) +{ + const struct clockid_map *cm; + + for (cm = clockids; cm->name; cm++) { + if (cm->clockid == clk_id) + return cm->name; + } + return "(not found)"; +} diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h new file mode 100644 index 0000000000000000000000000000000000000000..9b49b4711c76837a08a13d6379b49c117493d459 --- /dev/null +++ b/tools/perf/util/clockid.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PERF_CLOCKID_H +#define __PERF_CLOCKID_H + +struct option; +int parse_clockid(const struct option *opt, const char *str, int unset); + +const char *clockid_name(clockid_t clk_id); + +#endif diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 1b52402a892307d9bb32188580f4e84faf938fb2..ec77e2a7b3ca1875ef610d0f6ce3be17fc75a266 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -12,6 +12,7 @@ #include "cpumap.h" #include "debug.h" #include "env.h" +#include "pmu-hybrid.h" #define CORE_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" @@ -351,3 +352,82 @@ void numa_topology__delete(struct numa_topology *tp) free(tp); } + +static int load_hybrid_node(struct hybrid_topology_node *node, + struct perf_pmu *pmu) +{ + const char *sysfs; + char path[PATH_MAX]; + char *buf = NULL, *p; + FILE *fp; + size_t len = 0; + + node->pmu_name = strdup(pmu->name); + if (!node->pmu_name) + return -1; + + sysfs = sysfs__mountpoint(); + if (!sysfs) + goto err; + + snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name); + fp = fopen(path, "r"); + if (!fp) + goto err; + + if (getline(&buf, &len, fp) <= 0) { + fclose(fp); + goto err; + } + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + fclose(fp); + node->cpus = buf; + return 0; + +err: + zfree(&node->pmu_name); + free(buf); + return -1; +} + +struct hybrid_topology *hybrid_topology__new(void) +{ + struct perf_pmu *pmu; + struct hybrid_topology *tp = NULL; + u32 nr, i = 0; + + nr = perf_pmu__hybrid_pmu_num(); + if (nr == 0) + return NULL; + + tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0]) * nr); + if (!tp) + return NULL; + + tp->nr = nr; + perf_pmu__for_each_hybrid_pmu(pmu) { + if (load_hybrid_node(&tp->nodes[i], pmu)) { + hybrid_topology__delete(tp); + return NULL; + } + i++; + } + + return tp; +} + +void hybrid_topology__delete(struct hybrid_topology *tp) +{ + u32 i; + + for (i = 0; i < tp->nr; i++) { + zfree(&tp->nodes[i].pmu_name); + zfree(&tp->nodes[i].cpus); + } + + free(tp); +} diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 7bf6b811f715ea329759486168822cc00c391787..528b747152f328fbda8519007df84a2d740027d8 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -25,10 +25,23 @@ struct numa_topology { struct numa_topology_node nodes[0]; }; +struct hybrid_topology_node { + char *pmu_name; + char *cpus; +}; + +struct hybrid_topology { + u32 nr; + struct hybrid_topology_node nodes[]; +}; + struct cpu_topology *cpu_topology__new(void); void cpu_topology__delete(struct cpu_topology *tp); struct numa_topology *numa_topology__new(void); void numa_topology__delete(struct numa_topology *tp); +struct hybrid_topology *hybrid_topology__new(void); +void hybrid_topology__delete(struct hybrid_topology *tp); + #endif /* __PERF_CPUTOPO_H */ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index dbc772bfb04ecbd6183ef4ccb1519c311ac900bf..c764c6da33a2aaa2ffa52387fac80f5d3eeb6834 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -21,7 +21,7 @@ #include #include #include "asm/bug.h" -#include "data-convert-bt.h" +#include "data-convert.h" #include "session.h" #include "debug.h" #include "tool.h" @@ -31,6 +31,9 @@ #include "config.h" #include #include +#include +#include "util.h" +#include "clockid.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) @@ -1381,11 +1384,26 @@ do { \ return 0; } -static int ctf_writer__setup_clock(struct ctf_writer *cw) +static int ctf_writer__setup_clock(struct ctf_writer *cw, + struct perf_session *session, + bool tod) { struct bt_ctf_clock *clock = cw->clock; + const char *desc = "perf clock"; + int64_t offset = 0; - bt_ctf_clock_set_description(clock, "perf clock"); + if (tod) { + struct perf_env *env = &session->header.env; + + if (!env->clock.enabled) { + pr_err("Can't provide --tod time, missing clock data. " + "Please record with -k/--clockid option.\n"); + return -1; + } + + desc = clockid_name(env->clock.clockid); + offset = env->clock.tod_ns - env->clock.clockid_ns; + } #define SET(__n, __v) \ do { \ @@ -1394,8 +1412,8 @@ do { \ } while (0) SET(frequency, 1000000000); - SET(offset_s, 0); - SET(offset, 0); + SET(offset, offset); + SET(description, desc); SET(precision, 10); SET(is_absolute, 0); @@ -1481,7 +1499,8 @@ static void ctf_writer__cleanup(struct ctf_writer *cw) memset(cw, 0, sizeof(*cw)); } -static int ctf_writer__init(struct ctf_writer *cw, const char *path) +static int ctf_writer__init(struct ctf_writer *cw, const char *path, + struct perf_session *session, bool tod) { struct bt_ctf_writer *writer; struct bt_ctf_stream_class *stream_class; @@ -1505,7 +1524,7 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path) cw->clock = clock; - if (ctf_writer__setup_clock(cw)) { + if (ctf_writer__setup_clock(cw, session, tod)) { pr("Failed to setup CTF clock.\n"); goto err_cleanup; } @@ -1613,17 +1632,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, if (err) return err; - /* CTF writer */ - if (ctf_writer__init(cw, path)) - return -1; - err = -1; /* perf.data session */ - session = perf_session__new(&data, 0, &c.tool); - if (IS_ERR(session)) { - err = PTR_ERR(session); - goto free_writer; - } + session = perf_session__new(&data, &c.tool); + if (IS_ERR(session)) + return PTR_ERR(session); + + /* CTF writer */ + if (ctf_writer__init(cw, path, session, opts->tod)) + goto free_session; if (c.queue_size) { ordered_events__set_alloc_size(&session->ordered_events, @@ -1632,17 +1649,17 @@ int bt_convert__perf2ctf(const char *input, const char *path, /* CTF writer env/clock setup */ if (ctf_writer__setup_env(cw, session)) - goto free_session; + goto free_writer; /* CTF events setup */ if (setup_events(cw, session)) - goto free_session; + goto free_writer; if (opts->all && setup_non_sample_events(cw, session)) - goto free_session; + goto free_writer; if (setup_streams(cw, session)) - goto free_session; + goto free_writer; err = perf_session__process_events(session); if (!err) @@ -1670,10 +1687,10 @@ int bt_convert__perf2ctf(const char *input, const char *path, return err; -free_session: - perf_session__delete(session); free_writer: ctf_writer__cleanup(cw); +free_session: + perf_session__delete(session); pr_err("Error during conversion setup.\n"); return err; } diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h deleted file mode 100644 index 821674d63c4ed8f208efc1f859a34b374275065a..0000000000000000000000000000000000000000 --- a/tools/perf/util/data-convert-bt.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __DATA_CONVERT_BT_H -#define __DATA_CONVERT_BT_H -#include "data-convert.h" -#ifdef HAVE_LIBBABELTRACE_SUPPORT - -int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, - struct perf_data_convert_opts *opts); - -#endif /* HAVE_LIBBABELTRACE_SUPPORT */ -#endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c new file mode 100644 index 0000000000000000000000000000000000000000..f1ab6edba446bb6159971abefc8f78379b309e10 --- /dev/null +++ b/tools/perf/util/data-convert-json.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * JSON export. + * + * Copyright (C) 2021, CodeWeavers Inc. + */ + +#include "data-convert.h" + +#include +#include +#include +#include + +#include "linux/compiler.h" +#include "linux/err.h" +#include "util/auxtrace.h" +#include "util/debug.h" +#include "util/dso.h" +#include "util/event.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/header.h" +#include "util/map.h" +#include "util/session.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/tool.h" + +struct convert_json { + struct perf_tool tool; + FILE *out; + bool first; + u64 events_count; +}; + +// Outputs a JSON-encoded string surrounded by quotes with characters escaped. +static void output_json_string(FILE *out, const char *s) +{ + fputc('"', out); + while (*s) { + switch (*s) { + + // required escapes with special forms as per RFC 8259 + case '"': fputs("\\\"", out); break; + case '\\': fputs("\\\\", out); break; + case '\b': fputs("\\b", out); break; + case '\f': fputs("\\f", out); break; + case '\n': fputs("\\n", out); break; + case '\r': fputs("\\r", out); break; + case '\t': fputs("\\t", out); break; + + default: + // all other control characters must be escaped by hex code + if (*s <= 0x1f) + fprintf(out, "\\u%04x", *s); + else + fputc(*s, out); + break; + } + + ++s; + } + fputc('"', out); +} + +// Outputs an optional comma, newline and indentation to delimit a new value +// from the previous one in a JSON object or array. +static void output_json_delimiters(FILE *out, bool comma, int depth) +{ + int i; + + if (comma) + fputc(',', out); + fputc('\n', out); + for (i = 0; i < depth; ++i) + fputc('\t', out); +} + +// Outputs a printf format string (with delimiter) as a JSON value. +__printf(4, 5) +static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...) +{ + va_list args; + + output_json_delimiters(out, comma, depth); + va_start(args, format); + vfprintf(out, format, args); + va_end(args); +} + +// Outputs a JSON key-value pair where the value is a string. +static void output_json_key_string(FILE *out, bool comma, int depth, + const char *key, const char *value) +{ + output_json_delimiters(out, comma, depth); + output_json_string(out, key); + fputs(": ", out); + output_json_string(out, value); +} + +// Outputs a JSON key-value pair where the value is a printf format string. +__printf(5, 6) +static void output_json_key_format(FILE *out, bool comma, int depth, + const char *key, const char *format, ...) +{ + va_list args; + + output_json_delimiters(out, comma, depth); + output_json_string(out, key); + fputs(": ", out); + va_start(args, format); + vfprintf(out, format, args); + va_end(args); +} + +static void output_sample_callchain_entry(struct perf_tool *tool, + u64 ip, struct addr_location *al) +{ + struct convert_json *c = container_of(tool, struct convert_json, tool); + FILE *out = c->out; + + output_json_format(out, false, 4, "{"); + output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip); + + if (al && al->sym && al->sym->namelen) { + fputc(',', out); + output_json_key_string(out, false, 5, "symbol", al->sym->name); + + if (al->map && al->map->dso) { + const char *dso = al->map->dso->short_name; + + if (dso && strlen(dso) > 0) { + fputc(',', out); + output_json_key_string(out, false, 5, "dso", dso); + } + } + } + + output_json_format(out, false, 4, "}"); +} + +static int process_sample_event(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct evsel *evsel __maybe_unused, + struct machine *machine) +{ + struct convert_json *c = container_of(tool, struct convert_json, tool); + FILE *out = c->out; + struct addr_location al, tal; + u8 cpumode = PERF_RECORD_MISC_USER; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("Sample resolution failed!\n"); + return -1; + } + + ++c->events_count; + + if (c->first) + c->first = false; + else + fputc(',', out); + output_json_format(out, false, 2, "{"); + + output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time); + output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_); + output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid); + + if (al.thread->cpu >= 0) + output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu); + + output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread)); + + output_json_key_format(out, true, 3, "callchain", "["); + if (sample->callchain) { + unsigned int i; + bool ok; + bool first_callchain = true; + + for (i = 0; i < sample->callchain->nr; ++i) { + u64 ip = sample->callchain->ips[i]; + + if (ip >= PERF_CONTEXT_MAX) { + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %" + PRId64 "\n", (s64) ip); + break; + } + continue; + } + + if (first_callchain) + first_callchain = false; + else + fputc(',', out); + + ok = thread__find_symbol(al.thread, cpumode, ip, &tal); + output_sample_callchain_entry(tool, ip, ok ? &tal : NULL); + } + } else { + output_sample_callchain_entry(tool, sample->ip, &al); + } + output_json_format(out, false, 3, "]"); + + output_json_format(out, false, 2, "}"); + return 0; +} + +static void output_headers(struct perf_session *session, struct convert_json *c) +{ + struct stat st; + struct perf_header *header = &session->header; + int ret; + int fd = perf_data__fd(session->data); + int i; + FILE *out = c->out; + + output_json_key_format(out, false, 2, "header-version", "%u", header->version); + + ret = fstat(fd, &st); + if (ret >= 0) { + time_t stctime = st.st_mtime; + char buf[256]; + + strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime)); + output_json_key_string(out, true, 2, "captured-on", buf); + } else { + pr_debug("Failed to get mtime of source file, not writing captured-on"); + } + + output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset); + output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size); + output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset); + + output_json_key_string(out, true, 2, "hostname", header->env.hostname); + output_json_key_string(out, true, 2, "os-release", header->env.os_release); + output_json_key_string(out, true, 2, "arch", header->env.arch); + + output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc); + output_json_key_string(out, true, 2, "cpuid", header->env.cpuid); + output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online); + output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail); + + if (header->env.clock.enabled) { + output_json_key_format(out, true, 2, "clockid", + "%u", header->env.clock.clockid); + output_json_key_format(out, true, 2, "clock-time", + "%" PRIu64, header->env.clock.clockid_ns); + output_json_key_format(out, true, 2, "real-time", + "%" PRIu64, header->env.clock.tod_ns); + } + + output_json_key_string(out, true, 2, "perf-version", header->env.version); + + output_json_key_format(out, true, 2, "cmdline", "["); + for (i = 0; i < header->env.nr_cmdline; i++) { + output_json_delimiters(out, i != 0, 3); + output_json_string(c->out, header->env.cmdline_argv[i]); + } + output_json_format(out, false, 2, "]"); +} + +int bt_convert__perf2json(const char *input_name, const char *output_name, + struct perf_data_convert_opts *opts __maybe_unused) +{ + struct perf_session *session; + int fd; + int ret = -1; + + struct convert_json c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .id_index = perf_event__process_id_index, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .event_update = perf_event__process_event_update, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, + .first = true, + .events_count = 0, + }; + + struct perf_data data = { + .mode = PERF_DATA_MODE_READ, + .path = input_name, + .force = opts->force, + }; + + if (opts->all) { + pr_err("--all is currently unsupported for JSON output.\n"); + goto err; + } + if (opts->tod) { + pr_err("--tod is currently unsupported for JSON output.\n"); + goto err; + } + + fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666); + if (fd == -1) { + if (errno == EEXIST) + pr_err("Output file exists. Use --force to overwrite it.\n"); + else + pr_err("Error opening output file!\n"); + goto err; + } + + c.out = fdopen(fd, "w"); + if (!c.out) { + fprintf(stderr, "Error opening output file!\n"); + close(fd); + goto err; + } + + session = perf_session__new(&data, &c.tool); + if (IS_ERR(session)) { + fprintf(stderr, "Error creating perf session!\n"); + goto err_fclose; + } + + if (symbol__init(&session->header.env) < 0) { + fprintf(stderr, "Symbol init error!\n"); + goto err_session_delete; + } + + // The opening brace is printed manually because it isn't delimited from a + // previous value (i.e. we don't want a leading newline) + fputc('{', c.out); + + // Version number for future-proofing. Most additions should be able to be + // done in a backwards-compatible way so this should only need to be bumped + // if some major breaking change must be made. + output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1"); + + // Output headers + output_json_format(c.out, true, 1, "\"headers\": {"); + output_headers(session, &c); + output_json_format(c.out, false, 1, "}"); + + // Output samples + output_json_format(c.out, true, 1, "\"samples\": ["); + perf_session__process_events(session); + output_json_format(c.out, false, 1, "]"); + output_json_format(c.out, false, 0, "}"); + fputc('\n', c.out); + + fprintf(stderr, + "[ perf data convert: Converted '%s' into JSON data '%s' ]\n", + data.path, output_name); + + fprintf(stderr, + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + (ftell(c.out)) / 1024.0 / 1024.0, c.events_count); + + ret = 0; +err_session_delete: + perf_session__delete(session); +err_fclose: + fclose(c.out); +err: + return ret; +} diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h index af90b6076c0616b31351a2d803839c16871ad678..1b4c5f598415d34a01e31150dde72a377f6b1c19 100644 --- a/tools/perf/util/data-convert.h +++ b/tools/perf/util/data-convert.h @@ -2,9 +2,20 @@ #ifndef __DATA_CONVERT_H #define __DATA_CONVERT_H +#include + struct perf_data_convert_opts { bool force; bool all; + bool tod; }; +#ifdef HAVE_LIBBABELTRACE_SUPPORT +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, + struct perf_data_convert_opts *opts); +#endif /* HAVE_LIBBABELTRACE_SUPPORT */ + +int bt_convert__perf2json(const char *input_name, const char *to_ctf, + struct perf_data_convert_opts *opts); + #endif /* __DATA_CONVERT_H */ diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 4da900bdb2f18123e90048a69cf327145233fd81..4f37475ad515ac3e71a38d079658a10a65011875 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -169,8 +169,21 @@ static bool check_pipe(struct perf_data *data) is_pipe = true; } - if (is_pipe) - data->file.fd = fd; + if (is_pipe) { + if (data->use_stdio) { + const char *mode; + + mode = perf_data__is_read(data) ? "r" : "w"; + data->file.fptr = fdopen(fd, mode); + + if (data->file.fptr == NULL) { + data->file.fd = fd; + data->use_stdio = false; + } + } else { + data->file.fd = fd; + } + } return data->is_pipe = is_pipe; } @@ -221,11 +234,12 @@ static bool is_dir(struct perf_data *data) static int open_file_read(struct perf_data *data) { + int flags = data->in_place_update ? O_RDWR : O_RDONLY; struct stat st; int fd; char sbuf[STRERR_BUFSIZE]; - fd = open(data->file.path, O_RDONLY); + fd = open(data->file.path, flags); if (fd < 0) { int err = errno; @@ -329,6 +343,9 @@ int perf_data__open(struct perf_data *data) if (check_pipe(data)) return 0; + /* currently it allows stdio for pipe only */ + data->use_stdio = false; + if (!data->path) data->path = "perf.data"; @@ -348,7 +365,21 @@ void perf_data__close(struct perf_data *data) perf_data__close_dir(data); zfree(&data->file.path); - close(data->file.fd); + + if (data->use_stdio) + fclose(data->file.fptr); + else + close(data->file.fd); +} + +ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size) +{ + if (data->use_stdio) { + if (fread(buf, size, 1, data->file.fptr) == 1) + return size; + return feof(data->file.fptr) ? 0 : -1; + } + return readn(data->file.fd, buf, size); } ssize_t perf_data_file__write(struct perf_data_file *file, @@ -360,6 +391,11 @@ ssize_t perf_data_file__write(struct perf_data_file *file, ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size) { + if (data->use_stdio) { + if (fwrite(buf, size, 1, data->file.fptr) == 1) + return size; + return -1; + } return perf_data_file__write(&data->file, buf, size); } diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 252d9907124964f00f3cc1b99bcd111994e3ad34..10fbadebae81688d7c5d54e886ac6bb1767e99a5 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -2,6 +2,7 @@ #ifndef __PERF_DATA_H #define __PERF_DATA_H +#include #include #include @@ -12,7 +13,10 @@ enum perf_data_mode { struct perf_data_file { char *path; - int fd; + union { + int fd; + FILE *fptr; + }; unsigned long size; }; @@ -22,6 +26,8 @@ struct perf_data { bool is_pipe; bool is_dir; bool force; + bool in_place_update; + bool use_stdio; enum perf_data_mode mode; struct { @@ -53,11 +59,15 @@ static inline bool perf_data__is_dir(struct perf_data *data) static inline int perf_data__fd(struct perf_data *data) { + if (data->use_stdio) + return fileno(data->file.fptr); + return data->file.fd; } int perf_data__open(struct perf_data *data); void perf_data__close(struct perf_data *data); +ssize_t perf_data__read(struct perf_data *data, void *buf, size_t size); ssize_t perf_data__write(struct perf_data *data, void *buf, size_t size); ssize_t perf_data_file__write(struct perf_data_file *file, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 7f07a5dc555f8b23dc4ddf49b66bdafbe0495af2..d12921a903f99a29316354d6d77d372f9681b5fe 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1114,7 +1114,7 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, return dso; } -void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) +static void dso__set_long_name_id(struct dso *dso, const char *name, struct dso_id *id, bool name_allocated) { struct rb_root *root = dso->root; @@ -1127,8 +1127,8 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) if (root) { rb_erase(&dso->rb_node, root); /* - * __dsos__findnew_link_by_longname() isn't guaranteed to add it - * back, so a clean removal is required here. + * __dsos__findnew_link_by_longname_id() isn't guaranteed to + * add it back, so a clean removal is required here. */ RB_CLEAR_NODE(&dso->rb_node); dso->root = NULL; @@ -1139,7 +1139,12 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) dso->long_name_allocated = name_allocated; if (root) - __dsos__findnew_link_by_longname(root, dso, NULL); + __dsos__findnew_link_by_longname_id(root, dso, NULL, id); +} + +void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) +{ + dso__set_long_name_id(dso, name, NULL, name_allocated); } void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) @@ -1180,13 +1185,15 @@ void dso__set_sorted_by_name(struct dso *dso) dso->sorted_by_name = true; } -struct dso *dso__new(const char *name) +struct dso *dso__new_id(const char *name, struct dso_id *id) { struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1); if (dso != NULL) { strcpy(dso->name, name); - dso__set_long_name(dso, dso->name, false); + if (id) + dso->id = *id; + dso__set_long_name_id(dso, dso->name, id, false); dso__set_short_name(dso, dso->name, false); dso->symbols = dso->symbol_names = RB_ROOT_CACHED; dso->data.cache = RB_ROOT; @@ -1217,6 +1224,11 @@ struct dso *dso__new(const char *name) return dso; } +struct dso *dso__new(const char *name) +{ + return dso__new_id(name, NULL); +} + void dso__delete(struct dso *dso) { if (!RB_EMPTY_NODE(&dso->rb_node)) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 69bb77d191644dea563463bc15b081e5564afa71..90843e2213a1c13341587435339449c7d29be71c 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -123,6 +123,16 @@ enum dso_load_errno { #define DSO__DATA_CACHE_SIZE 4096 #define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1) +/* + * Data about backing storage DSO, comes from PERF_RECORD_MMAP2 meta events + */ +struct dso_id { + u32 maj; + u32 min; + u64 ino; + u64 ino_generation; +}; + struct dso_cache { struct rb_node rb_node; u64 offset; @@ -197,6 +207,7 @@ struct dso { u64 db_id; }; struct nsinfo *nsinfo; + struct dso_id id; refcount_t refcnt; char name[0]; }; @@ -215,9 +226,11 @@ static inline void dso__set_loaded(struct dso *dso) dso->loaded = true; } +struct dso *dso__new_id(const char *name, struct dso_id *id); struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); +int dso__cmp_id(struct dso *a, struct dso *b); void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated); void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 3ea80d203587a281d6d7626d1555790c08530099..f62e9100091b446d46b37bd4640aed1f1f0d189d 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -9,6 +9,40 @@ #include #include // filename__read_build_id +static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) +{ + if (a->maj > b->maj) return -1; + if (a->maj < b->maj) return 1; + + if (a->min > b->min) return -1; + if (a->min < b->min) return 1; + + if (a->ino > b->ino) return -1; + if (a->ino < b->ino) return 1; + + if (a->ino_generation > b->ino_generation) return -1; + if (a->ino_generation < b->ino_generation) return 1; + + return 0; +} + +static int dso_id__cmp(struct dso_id *a, struct dso_id *b) +{ + /* + * The second is always dso->id, so zeroes if not set, assume passing + * NULL for a means a zeroed id + */ + if (a == NULL) + return 0; + + return __dso_id__cmp(a, b); +} + +int dso__cmp_id(struct dso *a, struct dso *b) +{ + return __dso_id__cmp(&a->id, &b->id); +} + bool __dsos__read_build_ids(struct list_head *head, bool with_hits) { bool have_build_id = false; @@ -34,12 +68,30 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } +static int __dso__cmp_long_name(const char *long_name, struct dso_id *id, struct dso *b) +{ + int rc = strcmp(long_name, b->long_name); + return rc ?: dso_id__cmp(id, &b->id); +} + +static int __dso__cmp_short_name(const char *short_name, struct dso_id *id, struct dso *b) +{ + int rc = strcmp(short_name, b->short_name); + return rc ?: dso_id__cmp(id, &b->id); +} + +static int dso__cmp_short_name(struct dso *a, struct dso *b) +{ + return __dso__cmp_short_name(a->short_name, &a->id, b); +} + /* * Find a matching entry and/or link current entry to RB tree. * Either one of the dso or name parameter must be non-NULL or the * function will not work. */ -struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *dso, const char *name) +struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, + const char *name, struct dso_id *id) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -51,7 +103,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d */ while (*p) { struct dso *this = rb_entry(*p, struct dso, rb_node); - int rc = strcmp(name, this->long_name); + int rc = __dso__cmp_long_name(name, id, this); parent = *p; if (rc == 0) { @@ -67,7 +119,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d * In this case, the short name should be different. * Comparing the short names to differentiate the DSOs. */ - rc = strcmp(dso->short_name, this->short_name); + rc = dso__cmp_short_name(dso, this); if (rc == 0) { pr_err("Duplicated dso name: %s\n", name); return NULL; @@ -90,7 +142,7 @@ struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *d void __dsos__add(struct dsos *dsos, struct dso *dso) { list_add_tail(&dso->node, &dsos->head); - __dsos__findnew_link_by_longname(&dsos->root, dso, NULL); + __dsos__findnew_link_by_longname_id(&dsos->root, dso, NULL, &dso->id); /* * It is now in the linked list, grab a reference, then garbage collect * this when needing memory, by looking at LRU dso instances in the @@ -121,17 +173,27 @@ void dsos__add(struct dsos *dsos, struct dso *dso) up_write(&dsos->lock); } -struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +static struct dso *__dsos__findnew_by_longname_id(struct rb_root *root, const char *name, struct dso_id *id) +{ + return __dsos__findnew_link_by_longname_id(root, NULL, name, id); +} + +static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, struct dso_id *id, bool cmp_short) { struct dso *pos; if (cmp_short) { list_for_each_entry(pos, &dsos->head, node) - if (strcmp(pos->short_name, name) == 0) + if (__dso__cmp_short_name(name, id, pos) == 0) return pos; return NULL; } - return __dsos__findnew_by_longname(&dsos->root, name); + return __dsos__findnew_by_longname_id(&dsos->root, name, id); +} + +struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) +{ + return __dsos__find_id(dsos, name, NULL, cmp_short); } struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) @@ -175,9 +237,9 @@ static void dso__set_basename(struct dso *dso) dso__set_short_name(dso, base, true); } -struct dso *__dsos__addnew(struct dsos *dsos, const char *name) +static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { - struct dso *dso = dso__new(name); + struct dso *dso = dso__new_id(name, id); if (dso != NULL) { __dsos__add(dsos, dso); @@ -188,18 +250,22 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name) return dso; } -struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +struct dso *__dsos__addnew(struct dsos *dsos, const char *name) { - struct dso *dso = __dsos__find(dsos, name, false); + return __dsos__addnew_id(dsos, name, NULL); +} - return dso ? dso : __dsos__addnew(dsos, name); +static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) +{ + struct dso *dso = __dsos__find_id(dsos, name, id, false); + return dso ? dso : __dsos__addnew_id(dsos, name, id); } -struct dso *dsos__findnew(struct dsos *dsos, const char *name) +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id) { struct dso *dso; down_write(&dsos->lock); - dso = dso__get(__dsos__findnew(dsos, name)); + dso = dso__get(__dsos__findnew_id(dsos, name, id)); up_write(&dsos->lock); return dso; } diff --git a/tools/perf/util/dsos.h b/tools/perf/util/dsos.h index 32f1fbee0feb22841088f6986fd5c7302f17c784..a9b552af1d34688f75dc190d0c00550d2130cc5f 100644 --- a/tools/perf/util/dsos.h +++ b/tools/perf/util/dsos.h @@ -9,6 +9,7 @@ #include "rwsem.h" struct dso; +struct dso_id; /* * DSOs are put into both a list for fast iteration and rbtree for fast @@ -25,15 +26,11 @@ void dsos__add(struct dsos *dsos, struct dso *dso); struct dso *__dsos__addnew(struct dsos *dsos, const char *name); struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); -struct dso *__dsos__findnew(struct dsos *dsos, const char *name); -struct dso *dsos__findnew(struct dsos *dsos, const char *name); -struct dso *__dsos__findnew_link_by_longname(struct rb_root *root, struct dso *dso, const char *name); - -static inline struct dso *__dsos__findnew_by_longname(struct rb_root *root, const char *name) -{ - return __dsos__findnew_link_by_longname(root, NULL, name); -} +struct dso *dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id); + +struct dso *__dsos__findnew_link_by_longname_id(struct rb_root *root, struct dso *dso, + const char *name, struct dso_id *id); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6b8252dd42c1368034615fec00fab4b6a4460642..9a2e9c8a1ea09037c4d88a2e6a482d267b749103 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -169,7 +169,7 @@ static void perf_env__purge_bpf(struct perf_env *env) void perf_env__exit(struct perf_env *env) { - int i; + int i, j; perf_env__purge_bpf(env); zfree(&env->hostname); @@ -185,6 +185,8 @@ void perf_env__exit(struct perf_env *env) zfree(&env->sibling_threads); zfree(&env->pmu_mappings); zfree(&env->cpu); + for (i = 0; i < env->nr_cpu_pmu_caps; i++) + zfree(&env->cpu_pmu_caps[i]); zfree(&env->numa_map); for (i = 0; i < env->nr_numa_nodes; i++) @@ -198,6 +200,20 @@ void perf_env__exit(struct perf_env *env) for (i = 0; i < env->nr_memory_nodes; i++) zfree(&env->memory_nodes[i].set); zfree(&env->memory_nodes); + + for (i = 0; i < env->nr_hybrid_nodes; i++) { + zfree(&env->hybrid_nodes[i].pmu_name); + zfree(&env->hybrid_nodes[i].cpus); + } + zfree(&env->hybrid_nodes); + + for (i = 0; i < env->nr_pmus_with_caps; i++) { + for (j = 0; j < env->pmu_caps[i].nr_caps; j++) + zfree(&env->pmu_caps[i].caps[j]); + zfree(&env->pmu_caps[i].caps); + zfree(&env->pmu_caps[i].pmu_name); + } + zfree(&env->pmu_caps); } void perf_env__init(struct perf_env *env) @@ -476,3 +492,51 @@ int perf_env__numa_node(struct perf_env *env, int cpu) return cpu >= 0 && cpu < env->nr_numa_map ? env->numa_map[cpu] : -1; } + +char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, + const char *cap) +{ + char *cap_eq; + int cap_size; + char **ptr; + int i, j; + + if (!pmu_name || !cap) + return NULL; + + cap_size = strlen(cap); + cap_eq = zalloc(cap_size + 2); + if (!cap_eq) + return NULL; + + memcpy(cap_eq, cap, cap_size); + cap_eq[cap_size] = '='; + + if (!strcmp(pmu_name, "cpu")) { + for (i = 0; i < env->nr_cpu_pmu_caps; i++) { + if (!strncmp(env->cpu_pmu_caps[i], cap_eq, cap_size + 1)) { + free(cap_eq); + return &env->cpu_pmu_caps[i][cap_size + 1]; + } + } + goto out; + } + + for (i = 0; i < env->nr_pmus_with_caps; i++) { + if (strcmp(env->pmu_caps[i].pmu_name, pmu_name)) + continue; + + ptr = env->pmu_caps[i].caps; + + for (j = 0; j < env->pmu_caps[i].nr_caps; j++) { + if (!strncmp(ptr[j], cap_eq, cap_size + 1)) { + free(cap_eq); + return &ptr[j][cap_size + 1]; + } + } + } + +out: + free(cap_eq); + return NULL; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 0b862b4a01e3659c2090feac909d0d0f3cb811b2..3bc7ee843df4d1f8266f92d10a742ff211b45c61 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -37,6 +37,18 @@ struct memory_node { unsigned long *set; }; +struct hybrid_node { + char *pmu_name; + char *cpus; +}; + +struct pmu_caps { + int nr_caps; + unsigned int max_branches; + char **caps; + char *pmu_name; +}; + struct perf_env { char *hostname; char *os_release; @@ -48,6 +60,7 @@ struct perf_env { char *cpuid; unsigned long long total_mem; unsigned int msr_pmu_type; + unsigned int max_branches; int nr_cmdline; int nr_sibling_cores; @@ -57,12 +70,16 @@ struct perf_env { int nr_memory_nodes; int nr_pmu_mappings; int nr_groups; + int nr_cpu_pmu_caps; + int nr_hybrid_nodes; + int nr_pmus_with_caps; char *cmdline; const char **cmdline_argv; char *sibling_cores; char *sibling_dies; char *sibling_threads; char *pmu_mappings; + char **cpu_pmu_caps; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; @@ -76,6 +93,8 @@ struct perf_env { unsigned long long memory_bsize; u64 clockid_res_ns; + struct hybrid_node *hybrid_nodes; + struct pmu_caps *pmu_caps; /* * bpf_info_lock protects bpf rbtrees. This is needed because the * trees are accessed by different threads in perf-top @@ -91,6 +110,18 @@ struct perf_env { /* For fast cpu to numa node lookup via perf_env__numa_node */ int *numa_map; int nr_numa_map; + + /* For real clock time reference. */ + struct { + u64 tod_ns; + u64 clockid_ns; + int clockid; + /* + * enabled is valid for report mode, and is true if above + * values are set, it's set in process_clock_data + */ + bool enabled; + } clock; }; enum perf_compress_type { @@ -131,4 +162,6 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, int cpu); +char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, + const char *cap); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d3412f2c0d18eee2b9daed474cf77b28452abde0..3b04ddc2f1244fade39be852cf52520b6be8ddcc 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -46,6 +46,8 @@ #include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" +#include "clockid.h" +#include "pmu-hybrid.h" #include #include @@ -896,6 +898,74 @@ static int write_clockid(struct feat_fd *ff, sizeof(ff->ph->env.clockid_res_ns)); } +static int write_clock_data(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + u64 *data64; + u32 data32; + int ret; + + /* version */ + data32 = 1; + + ret = do_write(ff, &data32, sizeof(data32)); + if (ret < 0) + return ret; + + /* clockid */ + data32 = ff->ph->env.clock.clockid; + + ret = do_write(ff, &data32, sizeof(data32)); + if (ret < 0) + return ret; + + /* TOD ref time */ + data64 = &ff->ph->env.clock.tod_ns; + + ret = do_write(ff, data64, sizeof(*data64)); + if (ret < 0) + return ret; + + /* clockid ref time */ + data64 = &ff->ph->env.clock.clockid_ns; + + return do_write(ff, data64, sizeof(*data64)); +} + +static int write_hybrid_topology(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct hybrid_topology *tp; + int ret; + u32 i; + + tp = hybrid_topology__new(); + if (!tp) + return -ENOENT; + + ret = do_write(ff, &tp->nr, sizeof(u32)); + if (ret < 0) + goto err; + + for (i = 0; i < tp->nr; i++) { + struct hybrid_topology_node *n = &tp->nodes[i]; + + ret = do_write_string(ff, n->pmu_name); + if (ret < 0) + goto err; + + ret = do_write_string(ff, n->cpus); + if (ret < 0) + goto err; + } + + ret = 0; + +err: + hybrid_topology__delete(tp); + return ret; +} + static int write_dir_format(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { @@ -1395,6 +1465,96 @@ static int write_compressed(struct feat_fd *ff __maybe_unused, return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); } +static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, + bool write_pmu) +{ + struct perf_pmu_caps *caps = NULL; + int ret; + + ret = do_write(ff, &pmu->nr_caps, sizeof(pmu->nr_caps)); + if (ret < 0) + return ret; + + list_for_each_entry(caps, &pmu->caps, list) { + ret = do_write_string(ff, caps->name); + if (ret < 0) + return ret; + + ret = do_write_string(ff, caps->value); + if (ret < 0) + return ret; + } + + if (write_pmu) { + ret = do_write_string(ff, pmu->name); + if (ret < 0) + return ret; + } + + return ret; +} + +static int write_cpu_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); + int ret; + + if (!cpu_pmu) + return -ENOENT; + + ret = perf_pmu__caps_parse(cpu_pmu); + if (ret < 0) + return ret; + + return __write_pmu_caps(ff, cpu_pmu, false); +} + +static int write_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct perf_pmu *pmu = NULL; + int nr_pmu = 0; + int ret; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name || !strcmp(pmu->name, "cpu") || + perf_pmu__caps_parse(pmu) <= 0) + continue; + nr_pmu++; + } + + ret = do_write(ff, &nr_pmu, sizeof(nr_pmu)); + if (ret < 0) + return ret; + + if (!nr_pmu) + return 0; + + /* + * Write hybrid pmu caps first to maintain compatibility with + * older perf tool. + */ + pmu = NULL; + perf_pmu__for_each_hybrid_pmu(pmu) { + ret = __write_pmu_caps(ff, pmu, true); + if (ret < 0) + return ret; + } + + pmu = NULL; + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name || !strcmp(pmu->name, "cpu") || + !pmu->nr_caps || perf_pmu__is_hybrid(pmu->name)) + continue; + + ret = __write_pmu_caps(ff, pmu, true); + if (ret < 0) + return ret; + } + return 0; +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1518,6 +1678,61 @@ static void print_clockid(struct feat_fd *ff, FILE *fp) ff->ph->env.clockid_res_ns * 1000); } +static void print_clock_data(struct feat_fd *ff, FILE *fp) +{ + struct timespec clockid_ns; + char tstr[64], date[64]; + struct timeval tod_ns; + clockid_t clockid; + struct tm ltime; + u64 ref; + + if (!ff->ph->env.clock.enabled) { + fprintf(fp, "# reference time disabled\n"); + return; + } + + /* Compute TOD time. */ + ref = ff->ph->env.clock.tod_ns; + tod_ns.tv_sec = ref / NSEC_PER_SEC; + ref -= tod_ns.tv_sec * NSEC_PER_SEC; + tod_ns.tv_usec = ref / NSEC_PER_USEC; + + /* Compute clockid time. */ + ref = ff->ph->env.clock.clockid_ns; + clockid_ns.tv_sec = ref / NSEC_PER_SEC; + ref -= clockid_ns.tv_sec * NSEC_PER_SEC; + clockid_ns.tv_nsec = ref; + + clockid = ff->ph->env.clock.clockid; + + if (localtime_r(&tod_ns.tv_sec, <ime) == NULL) + snprintf(tstr, sizeof(tstr), ""); + else { + strftime(date, sizeof(date), "%F %T", <ime); + scnprintf(tstr, sizeof(tstr), "%s.%06d", + date, (int) tod_ns.tv_usec); + } + + fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid); + fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n", + tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec, + clockid_ns.tv_sec, clockid_ns.tv_nsec, + clockid_name(clockid)); +} + +static void print_hybrid_topology(struct feat_fd *ff, FILE *fp) +{ + int i; + struct hybrid_node *n; + + fprintf(fp, "# hybrid cpu system:\n"); + for (i = 0; i < ff->ph->env.nr_hybrid_nodes; i++) { + n = &ff->ph->env.hybrid_nodes[i]; + fprintf(fp, "# %s cpu list : %s\n", n->pmu_name, n->cpus); + } +} + static void print_dir_format(struct feat_fd *ff, FILE *fp) { struct perf_session *session; @@ -1772,6 +1987,42 @@ static void print_compressed(struct feat_fd *ff, FILE *fp) ff->ph->env.comp_level, ff->ph->env.comp_ratio); } +static void __print_pmu_caps(FILE *fp, int nr_caps, char **caps, char *pmu_name) +{ + const char *delimiter = ""; + int i; + + if (!nr_caps) { + fprintf(fp, "# %s pmu capabilities: not available\n", pmu_name); + return; + } + + fprintf(fp, "# %s pmu capabilities: ", pmu_name); + for (i = 0; i < nr_caps; i++) { + fprintf(fp, "%s%s", delimiter, caps[i]); + delimiter = ", "; + } + + fprintf(fp, "\n"); +} + +static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +{ + __print_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps, + ff->ph->env.cpu_pmu_caps, (char *)"cpu"); +} + +static void print_pmu_caps(struct feat_fd *ff, FILE *fp) +{ + struct pmu_caps *pmu_caps; + + for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) { + pmu_caps = &ff->ph->env.pmu_caps[i]; + __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, + pmu_caps->pmu_name); + } +} + static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; @@ -2651,6 +2902,80 @@ static int process_clockid(struct feat_fd *ff, return 0; } +static int process_clock_data(struct feat_fd *ff, + void *_data __maybe_unused) +{ + u32 data32; + u64 data64; + + /* version */ + if (do_read_u32(ff, &data32)) + return -1; + + if (data32 != 1) + return -1; + + /* clockid */ + if (do_read_u32(ff, &data32)) + return -1; + + ff->ph->env.clock.clockid = data32; + + /* TOD ref time */ + if (do_read_u64(ff, &data64)) + return -1; + + ff->ph->env.clock.tod_ns = data64; + + /* clockid ref time */ + if (do_read_u64(ff, &data64)) + return -1; + + ff->ph->env.clock.clockid_ns = data64; + ff->ph->env.clock.enabled = true; + return 0; +} + +static int process_hybrid_topology(struct feat_fd *ff, + void *data __maybe_unused) +{ + struct hybrid_node *nodes, *n; + u32 nr, i; + + /* nr nodes */ + if (do_read_u32(ff, &nr)) + return -1; + + nodes = zalloc(sizeof(*nodes) * nr); + if (!nodes) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + n = &nodes[i]; + + n->pmu_name = do_read_string(ff); + if (!n->pmu_name) + goto error; + + n->cpus = do_read_string(ff); + if (!n->cpus) + goto error; + } + + ff->ph->env.nr_hybrid_nodes = nr; + ff->ph->env.hybrid_nodes = nodes; + return 0; + +error: + for (i = 0; i < nr; i++) { + free(nodes[i].pmu_name); + free(nodes[i].cpus); + } + + free(nodes); + return -1; +} + static int process_dir_format(struct feat_fd *ff, void *_data __maybe_unused) { @@ -2809,6 +3134,126 @@ static int process_compressed(struct feat_fd *ff, return 0; } +static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, + char ***caps, unsigned int *max_branches) +{ + char *name, *value, *ptr; + u32 nr_pmu_caps, i; + + *nr_caps = 0; + *caps = NULL; + + if (do_read_u32(ff, &nr_pmu_caps)) + return -1; + + if (!nr_pmu_caps) + return 0; + + *caps = zalloc(sizeof(char *) * nr_pmu_caps); + if (!*caps) + return -1; + + for (i = 0; i < nr_pmu_caps; i++) { + name = do_read_string(ff); + if (!name) + goto error; + + value = do_read_string(ff); + if (!value) + goto free_name; + + if (asprintf(&ptr, "%s=%s", name, value) < 0) + goto free_value; + + (*caps)[i] = ptr; + + if (!strcmp(name, "branches")) + *max_branches = atoi(value); + + free(value); + free(name); + } + *nr_caps = nr_pmu_caps; + return 0; + +free_value: + free(value); +free_name: + free(name); +error: + for (; i > 0; i--) + free((*caps)[i - 1]); + free(*caps); + *caps = NULL; + *nr_caps = 0; + return -1; +} + +static int process_cpu_pmu_caps(struct feat_fd *ff, + void *data __maybe_unused) +{ + int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, + &ff->ph->env.cpu_pmu_caps, + &ff->ph->env.max_branches); + + if (!ret && !ff->ph->env.cpu_pmu_caps) + pr_debug("cpu pmu capabilities not available\n"); + return ret; +} + +static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) +{ + struct pmu_caps *pmu_caps; + u32 nr_pmu, i; + int ret; + int j; + + if (do_read_u32(ff, &nr_pmu)) + return -1; + + if (!nr_pmu) { + pr_debug("pmu capabilities not available\n"); + return 0; + } + + pmu_caps = zalloc(sizeof(*pmu_caps) * nr_pmu); + if (!pmu_caps) + return -ENOMEM; + + for (i = 0; i < nr_pmu; i++) { + ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, + &pmu_caps[i].caps, + &pmu_caps[i].max_branches); + if (ret) + goto err; + + pmu_caps[i].pmu_name = do_read_string(ff); + if (!pmu_caps[i].pmu_name) { + ret = -1; + goto err; + } + if (!pmu_caps[i].nr_caps) { + pr_debug("%s pmu capabilities not available\n", + pmu_caps[i].pmu_name); + } + } + + ff->ph->env.nr_pmus_with_caps = nr_pmu; + ff->ph->env.pmu_caps = pmu_caps; + return 0; + +err: + for (i = 0; i < nr_pmu; i++) { + for (j = 0; j < pmu_caps[i].nr_caps; j++) + free(pmu_caps[i].caps[j]); + free(pmu_caps[i].caps); + free(pmu_caps[i].pmu_name); + } + + free(pmu_caps); + return ret; +} + #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -2866,6 +3311,10 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), FEAT_OPR(COMPRESSED, compressed, false), + FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), + FEAT_OPR(CLOCK_DATA, clock_data, false), + FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true), + FEAT_OPR(PMU_CAPS, pmu_caps, false), }; struct header_print_data { @@ -2946,9 +3395,22 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) return 0; } +struct header_fw { + struct feat_writer fw; + struct feat_fd *ff; +}; + +static int feat_writer_cb(struct feat_writer *fw, void *buf, size_t sz) +{ + struct header_fw *h = container_of(fw, struct header_fw, fw); + + return do_write(h->ff, buf, sz); +} + static int do_write_feat(struct feat_fd *ff, int type, struct perf_file_section **p, - struct evlist *evlist) + struct evlist *evlist, + struct feat_copier *fc) { int err; int ret = 0; @@ -2962,7 +3424,23 @@ static int do_write_feat(struct feat_fd *ff, int type, (*p)->offset = lseek(ff->fd, 0, SEEK_CUR); - err = feat_ops[type].write(ff, evlist); + /* + * Hook to let perf inject copy features sections from the input + * file. + */ + if (fc && fc->copy) { + struct header_fw h = { + .fw.write = feat_writer_cb, + .ff = ff, + }; + + /* ->copy() returns 0 if the feature was not copied */ + err = fc->copy(fc, type, &h.fw); + } else { + err = 0; + } + if (!err) + err = feat_ops[type].write(ff, evlist); if (err < 0) { pr_debug("failed to write feature %s\n", feat_ops[type].name); @@ -2978,7 +3456,8 @@ static int do_write_feat(struct feat_fd *ff, int type, } static int perf_header__adds_write(struct perf_header *header, - struct evlist *evlist, int fd) + struct evlist *evlist, int fd, + struct feat_copier *fc) { int nr_sections; struct feat_fd ff; @@ -3007,7 +3486,7 @@ static int perf_header__adds_write(struct perf_header *header, lseek(fd, sec_start + sec_size, SEEK_SET); for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) { - if (do_write_feat(&ff, feat, &p, evlist)) + if (do_write_feat(&ff, feat, &p, evlist, fc)) perf_header__clear_feat(header, feat); } @@ -3045,9 +3524,10 @@ int perf_header__write_pipe(int fd) return 0; } -int perf_session__write_header(struct perf_session *session, - struct evlist *evlist, - int fd, bool at_exit) +static int perf_session__do_write_header(struct perf_session *session, + struct evlist *evlist, + int fd, bool at_exit, + struct feat_copier *fc) { struct perf_file_header f_header; struct perf_file_attr f_attr; @@ -3091,7 +3571,7 @@ int perf_session__write_header(struct perf_session *session, header->feat_offset = header->data_offset + header->data_size; if (at_exit) { - err = perf_header__adds_write(header, evlist, fd); + err = perf_header__adds_write(header, evlist, fd, fc); if (err < 0) return err; } @@ -3124,6 +3604,21 @@ int perf_session__write_header(struct perf_session *session, return 0; } +int perf_session__write_header(struct perf_session *session, + struct evlist *evlist, + int fd, bool at_exit) +{ + return perf_session__do_write_header(session, evlist, fd, at_exit, NULL); +} + +int perf_session__inject_header(struct perf_session *session, + struct evlist *evlist, + int fd, + struct feat_copier *fc) +{ + return perf_session__do_write_header(session, evlist, fd, true, fc); +} + static int perf_header__getbuffer64(struct perf_header *header, int fd, void *buf, size_t size) { @@ -3393,16 +3888,17 @@ static int perf_file_section__process(struct perf_file_section *section, } static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, - struct perf_header *ph, int fd, - bool repipe) + struct perf_header *ph, + struct perf_data* data, + bool repipe, int repipe_fd) { struct feat_fd ff = { - .fd = STDOUT_FILENO, + .fd = repipe_fd, .ph = ph, }; ssize_t ret; - ret = readn(fd, header, sizeof(*header)); + ret = perf_data__read(data, header, sizeof(*header)); if (ret <= 0) return -1; @@ -3420,19 +3916,18 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, return 0; } -static int perf_header__read_pipe(struct perf_session *session) +static int perf_header__read_pipe(struct perf_session *session, int repipe_fd) { struct perf_header *header = &session->header; struct perf_pipe_file_header f_header; - if (perf_file_header__read_pipe(&f_header, header, - perf_data__fd(session->data), - session->repipe) < 0) { + if (perf_file_header__read_pipe(&f_header, header, session->data, + session->repipe, repipe_fd) < 0) { pr_debug("incompatible file format\n"); return -EINVAL; } - return 0; + return f_header.size == sizeof(f_header) ? 0 : -1; } static int read_attr(int fd, struct perf_header *ph, @@ -3527,14 +4022,14 @@ static int perf_evlist__prepare_tracepoint_events(struct evlist *evlist, return 0; } -int perf_session__read_header(struct perf_session *session) +int perf_session__read_header(struct perf_session *session, int repipe_fd) { struct perf_data *data = session->data; struct perf_header *header = &session->header; struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; - int nr_attrs, nr_ids, i, j; + int nr_attrs, nr_ids, i, j, err; int fd = perf_data__fd(data); session->evlist = evlist__new(); @@ -3543,12 +4038,25 @@ int perf_session__read_header(struct perf_session *session) session->evlist->env = &header->env; session->machines.host.env = &header->env; - if (perf_data__is_pipe(data)) - return perf_header__read_pipe(session); + + /* + * We can read 'pipe' data event from regular file, + * check for the pipe header regardless of source. + */ + err = perf_header__read_pipe(session, repipe_fd); + if (!err || perf_data__is_pipe(data)) { + data->is_pipe = true; + return err; + } if (perf_file_header__read(&f_header, header, fd) < 0) return -EINVAL; + if (header->needs_swap && data->in_place_update) { + pr_err("In-place update not supported when byte-swapping is required\n"); + return -EINVAL; + } + /* * Sanity check that perf.data was written cleanly; data size is * initialized to 0 and updated only if the on_exit function is run. diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index ca53a929e9fdd5f2e1bb69e388974707e8324ef7..5db0f8fc074c3b18cb86143c32526c53e4fb41ad 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -43,6 +43,10 @@ enum { HEADER_BPF_PROG_INFO, HEADER_BPF_BTF, HEADER_COMPRESSED, + HEADER_CPU_PMU_CAPS, + HEADER_CLOCK_DATA, + HEADER_HYBRID_TOPOLOGY, + HEADER_PMU_CAPS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -115,12 +119,27 @@ struct perf_session; struct perf_tool; union perf_event; -int perf_session__read_header(struct perf_session *session); +int perf_session__read_header(struct perf_session *session, int repipe_fd); int perf_session__write_header(struct perf_session *session, struct evlist *evlist, int fd, bool at_exit); int perf_header__write_pipe(int fd); +/* feat_writer writes a feature section to output */ +struct feat_writer { + int (*write)(struct feat_writer *fw, void *buf, size_t sz); +}; + +/* feat_copier copies a feature section using feat_writer to output */ +struct feat_copier { + int (*copy)(struct feat_copier *fc, int feat, struct feat_writer *fw); +}; + +int perf_session__inject_header(struct perf_session *session, + struct evlist *evlist, + int fd, + struct feat_copier *fc); + void perf_header__set_feat(struct perf_header *header, int feat); void perf_header__clear_feat(struct perf_header *header, int feat); bool perf_header__has_feat(const struct perf_header *header, int feat); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7e395760288527c695919576c24e551fe18afb85..0b51c453bfda860511e99ef53e97d024adc9eaf0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1661,6 +1661,12 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; + struct dso_id dso_id = { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }; int ret = 0; if (dump_trace) @@ -1681,10 +1687,7 @@ int machine__process_mmap2_event(struct machine *machine, map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, - event->mmap2.maj, - event->mmap2.min, event->mmap2.ino, - event->mmap2.ino_generation, - event->mmap2.prot, + &dso_id, event->mmap2.prot, event->mmap2.flags, event->mmap2.filename, thread); @@ -1737,9 +1740,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - 0, 0, 0, 0, prot, 0, - event->mmap.filename, - thread); + NULL, prot, 0, event->mmap.filename, thread); if (map == NULL) goto out_problem_map; @@ -2714,9 +2715,14 @@ u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr) return addr_cpumode; } +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id) +{ + return dsos__findnew_id(&machine->dsos, filename, id); +} + struct dso *machine__findnew_dso(struct machine *machine, const char *filename) { - return dsos__findnew(&machine->dsos, filename); + return machine__findnew_dso_id(machine, filename, NULL); } char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 4f734337661a464ac44752beb90bad85f5d16fd6..fbb13c07f07e9255a5ac8a64b99a117dd3d08c55 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -11,6 +11,7 @@ struct addr_location; struct branch_stack; struct dso; +struct dso_id; struct evsel; struct perf_sample; struct symbol; @@ -205,6 +206,7 @@ int machine__nr_cpus_avail(struct machine *machine); struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); +struct dso *machine__findnew_dso_id(struct machine *machine, const char *filename, struct dso_id *id); struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 571e99c908a0e165d72d8c70706872e8dc53c5d1..2028922982ba9da7af9b2222d622c4af28ae9a84 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -28,21 +28,6 @@ static void __maps__insert(struct maps *maps, struct map *map); static void __maps__insert_name(struct maps *maps, struct map *map); -static inline int is_anon_memory(const char *filename, u32 flags) -{ - return flags & MAP_HUGETLB || - !strcmp(filename, "//anon") || - !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) || - !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1); -} - -static inline int is_no_dso_memory(const char *filename) -{ - return !strncmp(filename, "[stack", 6) || - !strncmp(filename, "/SYSV",5) || - !strcmp(filename, "[heap]"); -} - static inline int is_android_lib(const char *filename) { return !strncmp(filename, "/data/app-lib", 13) || @@ -145,8 +130,8 @@ void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) } struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, u32 prot, u32 flags, char *filename, + u64 pgoff, struct dso_id *id, + u32 prot, u32 flags, char *filename, struct thread *thread) { struct map *map = malloc(sizeof(*map)); @@ -159,14 +144,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, int anon, no_dso, vdso, android; android = is_android_lib(filename); - anon = is_anon_memory(filename, flags); + anon = is_anon_memory(filename) || flags & MAP_HUGETLB; vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); - - map->maj = d_maj; - map->min = d_min; - map->ino = ino; - map->ino_generation = ino_gen; map->prot = prot; map->flags = flags; nsi = nsinfo__get(thread->nsinfo); @@ -196,7 +176,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, pgoff = 0; dso = machine__findnew_vdso(machine, thread); } else - dso = machine__findnew_dso(machine, filename); + dso = machine__findnew_dso_id(machine, filename, id); if (dso == NULL) goto out_delete; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index c3614195ddc7ee29e3834af1280658dde4fe7d0f..0be5e970b9a8e51dfbb8d9027f040ccf86c992c7 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -32,9 +32,6 @@ struct map { u32 flags; u64 pgoff; u64 reloc; - u32 maj, min; /* only valid for MMAP2 record */ - u64 ino; /* only valid for MMAP2 record */ - u64 ino_generation;/* only valid for MMAP2 record */ /* ip -> dso rip */ u64 (*map_ip)(struct map *, u64); @@ -110,9 +107,11 @@ struct thread; void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); + +struct dso_id; + struct map *map__new(struct machine *machine, u64 start, u64 len, - u64 pgoff, u32 d_maj, u32 d_min, u64 ino, - u64 ino_gen, u32 prot, u32 flags, + u64 pgoff, struct dso_id *id, u32 prot, u32 flags, char *filename, struct thread *thread); struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); @@ -176,4 +175,18 @@ static inline bool is_entry_trampoline(const char *name) return !strcmp(name, ENTRY_TRAMPOLINE_NAME); } + +static inline int is_anon_memory(const char *filename) +{ + return !strcmp(filename, "//anon") || + !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) || + !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1); +} + +static inline int is_no_dso_memory(const char *filename) +{ + return !strncmp(filename, "[stack", 6) || + !strncmp(filename, "/SYSV", 5) || + !strcmp(filename, "[heap]"); +} #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h index d0fa7bc50a76f67b9ba5cbbe374ea819e04f2a6e..2b186c26a43eaef4f55f6b2339d5a2d8ac40cb42 100644 --- a/tools/perf/util/pmu-hybrid.h +++ b/tools/perf/util/pmu-hybrid.h @@ -19,4 +19,15 @@ struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name); bool perf_pmu__is_hybrid(const char *name); char *perf_pmu__hybrid_type_to_pmu(const char *type); +static inline int perf_pmu__hybrid_pmu_num(void) +{ + struct perf_pmu *pmu; + int num = 0; + + perf_pmu__for_each_hybrid_pmu(pmu) + num++; + + return num; +} + #endif /* __PMU_HYBRID_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 2677ea6b9344417696da3908b774cfe9be282ec3..708cbd322d613a32f1ecfdbb5a84723241958ce4 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1651,7 +1651,11 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) const char *sysfs = sysfs__mountpoint(); DIR *caps_dir; struct dirent *evt_ent; - int nr_caps = 0; + + if (pmu->caps_initialized) + return pmu->nr_caps; + + pmu->nr_caps = 0; if (!sysfs) return -1; @@ -1659,8 +1663,10 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) snprintf(caps_path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); - if (stat(caps_path, &st) < 0) + if (stat(caps_path, &st) < 0) { + pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ + } caps_dir = opendir(caps_path); if (!caps_dir) @@ -1687,13 +1693,14 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) continue; } - nr_caps++; + pmu->nr_caps++; fclose(file); } closedir(caps_dir); - return nr_caps; + pmu->caps_initialized = true; + return pmu->nr_caps; } void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 57c7bf476350f8efa0c4b4865d3107d56f9f5dc3..e0cf540acac2a43d29e76adc4a2d4492e60d6088 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -42,6 +42,8 @@ struct perf_pmu { struct perf_cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + bool caps_initialized; + u32 nr_caps; struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ struct list_head hybrid_list; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7710481ab01ad6408723bff6c25abe0483a702c4..782896153efeea802a88439ee0c9810e8f106121 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -101,11 +101,11 @@ static int perf_session__deliver_event(struct perf_session *session, struct perf_tool *tool, u64 file_offset); -static int perf_session__open(struct perf_session *session) +static int perf_session__open(struct perf_session *session, int repipe_fd) { struct perf_data *data = session->data; - if (perf_session__read_header(session) < 0) { + if (perf_session__read_header(session, repipe_fd) < 0) { pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1; } @@ -184,8 +184,9 @@ static int ordered_events__deliver_event(struct ordered_events *oe, session->tool, event->file_offset); } -struct perf_session *perf_session__new(struct perf_data *data, - bool repipe, struct perf_tool *tool) +struct perf_session *__perf_session__new(struct perf_data *data, + bool repipe, int repipe_fd, + struct perf_tool *tool) { int ret = -ENOMEM; struct perf_session *session = zalloc(sizeof(*session)); @@ -209,7 +210,7 @@ struct perf_session *perf_session__new(struct perf_data *data, session->data = data; if (perf_data__is_read(data)) { - ret = perf_session__open(session); + ret = perf_session__open(session, repipe_fd); if (ret < 0) goto out_delete; @@ -1862,7 +1863,6 @@ static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; - int fd = perf_data__fd(session->data); union perf_event *event; uint32_t size, cur_size = 0; void *buf = NULL; @@ -1882,7 +1882,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session) ordered_events__set_copy_on_queue(oe, true); more: event = buf; - err = readn(fd, event, sizeof(struct perf_event_header)); + err = perf_data__read(session->data, event, + sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) goto done; @@ -1914,7 +1915,8 @@ static int __perf_session__process_pipe_events(struct perf_session *session) p += sizeof(struct perf_event_header); if (size - sizeof(struct perf_event_header)) { - err = readn(fd, p, size - sizeof(struct perf_event_header)); + err = perf_data__read(session->data, p, + size - sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) { pr_err("unexpected end of event stream\n"); @@ -2070,6 +2072,7 @@ struct reader { u64 data_size; u64 data_offset; reader_cb_t process; + bool in_place_update; }; static int @@ -2103,7 +2106,9 @@ reader__process_events(struct reader *rd, struct perf_session *session, mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; - if (session->header.needs_swap) { + if (rd->in_place_update) { + mmap_prot |= PROT_WRITE; + } else if (session->header.needs_swap) { mmap_prot |= PROT_WRITE; mmap_flags = MAP_PRIVATE; } @@ -2189,6 +2194,7 @@ static int __perf_session__process_events(struct perf_session *session) .data_size = session->header.data_size, .data_offset = session->header.data_offset, .process = process_simple, + .in_place_update = session->data->in_place_update, }; struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index b4c9428c18f0728445e462526d52c6bb380cdeac..f68c33562e73ee8ee586a437db57f0fa0b245fa9 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -54,8 +54,16 @@ struct decomp { struct perf_tool; -struct perf_session *perf_session__new(struct perf_data *data, - bool repipe, struct perf_tool *tool); +struct perf_session *__perf_session__new(struct perf_data *data, + bool repipe, int repipe_fd, + struct perf_tool *tool); + +static inline struct perf_session *perf_session__new(struct perf_data *data, + struct perf_tool *tool) +{ + return __perf_session__new(data, false, -1, tool); +} + void perf_session__delete(struct perf_session *session); void perf_event_header__bswap(struct perf_event_header *hdr); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4027906fd3e38eb356ebf335cfa6e48b3fe93a29..90b0da006e7a13a18a5a9ae92fbc5ed716f96d6d 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1200,6 +1200,7 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) { u64 l, r; struct map *l_map, *r_map; + int rc; if (!left->mem_info) return -1; if (!right->mem_info) return 1; @@ -1218,18 +1219,9 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if (!l_map) return -1; if (!r_map) return 1; - if (l_map->maj > r_map->maj) return -1; - if (l_map->maj < r_map->maj) return 1; - - if (l_map->min > r_map->min) return -1; - if (l_map->min < r_map->min) return 1; - - if (l_map->ino > r_map->ino) return -1; - if (l_map->ino < r_map->ino) return 1; - - if (l_map->ino_generation > r_map->ino_generation) return -1; - if (l_map->ino_generation < r_map->ino_generation) return 1; - + rc = dso__cmp_id(l_map->dso, r_map->dso); + if (rc) + return rc; /* * Addresses with no major/minor numbers are assumed to be * anonymous in userspace. Sort those on pid then address. @@ -1240,8 +1232,8 @@ sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) if ((left->cpumode != PERF_RECORD_MISC_KERNEL) && (!(l_map->flags & MAP_SHARED)) && - !l_map->maj && !l_map->min && !l_map->ino && - !l_map->ino_generation) { + !l_map->dso->id.maj && !l_map->dso->id.min && + !l_map->dso->id.ino && !l_map->dso->id.ino_generation) { /* userspace anonymous */ if (left->thread->pid_ > right->thread->pid_) return -1; @@ -1277,8 +1269,8 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && map && !(map->prot & PROT_EXEC) && (map->flags & MAP_SHARED) && - (map->maj || map->min || map->ino || - map->ino_generation)) + (map->dso->id.maj || map->dso->id.min || + map->dso->id.ino || map->dso->id.ino_generation)) level = 's'; else if (!map) level = 'X'; @@ -3142,7 +3134,7 @@ static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int add_key(sb, s[i].name, llen); } -const char *sort_help(const char *prefix) +char *sort_help(const char *prefix) { struct strbuf sb; char *s; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7b93f34ac1f4c34328f2dc1378ccc2663ef4fe43..c047b4591b7055de3deb0e5525c9ef3ab5550240 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -285,7 +285,7 @@ void reset_output_field(void); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); -const char *sort_help(const char *prefix); +char *sort_help(const char *prefix); int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index b7d0f6614dafeb5742db2991f0cab0b9437c1536..96e2adca27191bd3b2de8c77a2d9e4820b0bd852 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include "util/cgroup.h" +#include "util/data.h" #include "util/debug.h" #include "util/dso.h" #include "util/event.h" @@ -1922,3 +1924,53 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session free(ff.buf); return ret; } + +int perf_event__synthesize_for_pipe(struct perf_tool *tool, + struct perf_session *session, + struct perf_data *data, + perf_event__handler_t process) +{ + int err; + int ret = 0; + struct evlist *evlist = session->evlist; + + /* + * We need to synthesize events first, because some + * features works on top of them (on report side). + */ + err = perf_event__synthesize_attrs(tool, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + ret += err; + + err = perf_event__synthesize_features(tool, session, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize features.\n"); + return err; + } + ret += err; + + if (have_tracepoints(&evlist->core.entries)) { + int fd = perf_data__fd(data); + + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = perf_event__synthesize_tracing_data(tool, fd, evlist, + process); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + return err; + } + ret += err; + } + + return ret; +} diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index baead0cdc381033b2568b2f5098605c51bf2b417..96e11c7eed9d67c2ff98e5963d66e0d307ce6413 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -14,6 +14,7 @@ struct evsel; struct machine; struct perf_counts_values; struct perf_cpu_map; +struct perf_data; struct perf_event_attr; struct perf_event_mmap_page; struct perf_sample; @@ -100,4 +101,9 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session } #endif // HAVE_LIBBPF_SUPPORT +int perf_event__synthesize_for_pipe(struct perf_tool *tool, + struct perf_session *session, + struct perf_data *data, + perf_event__handler_t process); + #endif // __PERF_SYNTHETIC_EVENTS_H