From 8363150c0c267673744e486fd7269997854f8492 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Fri, 25 Nov 2022 17:50:06 +0800 Subject: [PATCH 01/42] perf cs-etm: Update ETM metadata format mainline inclusion from mainline-v5.11 commit 42b2b570b34a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=42b2b570b34afb5fb9dc16ac77cb332194136a85 -------------------------------------------------------------------------- The current fixed metadata version format (version 0), means that adding metadata parameter items renders files from a previous version of perf unreadable. Per CPU parameters appear in a fixed order, but there is no field to indicate the number of ETM parameters per CPU. This patch updates the per CPU parameter blocks to include a NR_PARAMs value which indicates the number of parameters in the block. The header version is incremented to 1. Fixed ordering is retained, new ETM parameters are added to the end of the list. The reader code is updated to be able to read current version 0 files, For version 1, the reader will read the number of parameters in the per CPU block. This allows the reader to process older or newer files that may have different numbers of parameters than in use at the time perf was built. Signed-off-by: Mike Leach Reviewed-by: Leo Yan Tested-by: Leo Yan Link: https://lore.kernel.org/r/20210202214040.32349-1-mike.leach@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-2-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/arch/arm/util/cs-etm.c | 7 +- tools/perf/util/cs-etm.c | 235 ++++++++++++++++++++++++------ tools/perf/util/cs-etm.h | 30 +++- 3 files changed, 223 insertions(+), 49 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index cad7bf783413..e7fcfe5a36a2 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -572,7 +572,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) { - u32 increment; + u32 increment, nr_trc_params; u64 magic; struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); @@ -607,6 +607,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETMV4_PRIV_MAX; + nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR; } else { magic = __perf_cs_etmv3_magic; /* Get configuration register */ @@ -624,11 +625,13 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETM_PRIV_MAX; + nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR; } /* Build generic header portion */ info->priv[*offset + CS_ETM_MAGIC] = magic; info->priv[*offset + CS_ETM_CPU] = cpu; + info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params; /* Where the next CPU entry should start from */ *offset += increment; } @@ -674,7 +677,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, /* First fill out the session header */ info->type = PERF_AUXTRACE_CS_ETM; - info->priv[CS_HEADER_VERSION_0] = 0; + info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION; info->priv[CS_PMU_TYPE_CPUS] = type << 32; info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu; info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a2a369e2fbb6..ee32d023e9bd 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2435,7 +2435,7 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) } static const char * const cs_etm_global_header_fmts[] = { - [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_HEADER_VERSION] = " Header version %llx\n", [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", }; @@ -2443,6 +2443,7 @@ static const char * const cs_etm_global_header_fmts[] = { static const char * const cs_etm_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETM_ETMCR] = " ETMCR %llx\n", [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", [CS_ETM_ETMCCER] = " ETMCCER %llx\n", @@ -2452,6 +2453,7 @@ static const char * const cs_etm_priv_fmts[] = { static const char * const cs_etmv4_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", @@ -2461,26 +2463,167 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", }; -static void cs_etm__print_auxtrace_info(__u64 *val, int num) +static const char * const param_unk_fmt = + " Unknown parameter [%d] %llx\n"; +static const char * const magic_unk_fmt = + " Magic number Unknown %llx\n"; + +static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) { - int i, j, cpu = 0; + int i = *offset, j, nr_params = 0, fmt_offset; + __u64 magic; - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) - fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + /* check magic value */ + magic = val[i + CS_ETM_MAGIC]; + if ((magic != __perf_cs_etmv3_magic) && + (magic != __perf_cs_etmv4_magic)) { + /* failure - note bad magic value */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + + /* print common header block */ + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); + + if (magic == __perf_cs_etmv3_magic) { + nr_params = CS_ETM_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETM_ETMCR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } else if (magic == __perf_cs_etmv4_magic) { + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETMV4_TRCCONFIGR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + *offset = i; + return 0; +} + +static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) +{ + int i = *offset, j, total_params = 0; + __u64 magic; + + magic = val[i + CS_ETM_MAGIC]; + /* total params to print is NR_PARAMS + common block size for v1 */ + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; - for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { - if (val[i] == __perf_cs_etmv3_magic) - for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + if (magic == __perf_cs_etmv3_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETM_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - else if (val[i] == __perf_cs_etmv4_magic) - for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + } + } else if (magic == __perf_cs_etmv4_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETMV4_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - else - /* failure.. return */ + } + } else { + /* failure - note bad magic value and error out */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + *offset = i; + return 0; +} + +static void cs_etm__print_auxtrace_info(__u64 *val, int num) +{ + int i, cpu = 0, version, err; + + /* bail out early on bad header version */ + version = val[0]; + if (version > CS_HEADER_CURRENT_VERSION) { + /* failure.. return */ + fprintf(stdout, " Unknown Header Version = %x, ", version); + fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); + return; + } + + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { + if (version == 0) + err = cs_etm__print_cpu_metadata_v0(val, &i); + else if (version == 1) + err = cs_etm__print_cpu_metadata_v1(val, &i); + if (err) return; } } +/* + * Read a single cpu parameter block from the auxtrace_info priv block. + * + * For version 1 there is a per cpu nr_params entry. If we are handling + * version 1 file, then there may be less, the same, or more params + * indicated by this value than the compile time number we understand. + * + * For a version 0 info block, there are a fixed number, and we need to + * fill out the nr_param value in the metadata we create. + */ +static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, + int out_blk_size, int nr_params_v0) +{ + u64 *metadata = NULL; + int hdr_version; + int nr_in_params, nr_out_params, nr_cmn_params; + int i, k; + + metadata = zalloc(sizeof(*metadata) * out_blk_size); + if (!metadata) + return NULL; + + /* read block current index & version */ + i = *buff_in_offset; + hdr_version = buff_in[CS_HEADER_VERSION]; + + if (!hdr_version) { + /* read version 0 info block into a version 1 metadata block */ + nr_in_params = nr_params_v0; + metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; + metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; + metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; + /* remaining block params at offset +1 from source */ + for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) + metadata[k + 1] = buff_in[i + k]; + /* version 0 has 2 common params */ + nr_cmn_params = 2; + } else { + /* read version 1 info block - input and output nr_params may differ */ + /* version 1 has 3 common params */ + nr_cmn_params = 3; + nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; + + /* if input has more params than output - skip excess */ + nr_out_params = nr_in_params + nr_cmn_params; + if (nr_out_params > out_blk_size) + nr_out_params = out_blk_size; + + for (k = CS_ETM_MAGIC; k < nr_out_params; k++) + metadata[k] = buff_in[i + k]; + + /* record the actual nr params we copied */ + metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; + } + + /* adjust in offset by number of in params used */ + i += nr_in_params + nr_cmn_params; + *buff_in_offset = i; + return metadata; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2492,11 +2635,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, int info_header_size; int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; - int err = 0, idx = -1; - int i, j, k; + int num_cpu, trcidr_idx; + int err = 0; + int i, j; u64 *ptr, *hdr = NULL; u64 **metadata = NULL; + u64 hdr_version; /* * sizeof(auxtrace_info_event::type) + @@ -2512,16 +2656,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* First the global part */ ptr = (u64 *) auxtrace_info->priv; - /* Look for version '0' of the header */ - if (ptr[0] != 0) + /* Look for version of the header */ + hdr_version = ptr[0]; + if (hdr_version > CS_HEADER_CURRENT_VERSION) { + /* print routine will print an error on bad version */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); return -EINVAL; + } - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); if (!hdr) return -ENOMEM; /* Extract header information - see cs-etm.h for format */ - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) hdr[i] = ptr[i]; num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & @@ -2552,35 +2701,31 @@ int cs_etm__process_auxtrace_info(union perf_event *event, */ for (j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETM_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETM_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETM_PRIV_MAX, + CS_ETM_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETM_ETMTRACEIDR]; - i += CS_ETM_PRIV_MAX; + trcidr_idx = CS_ETM_ETMTRACEIDR; + } else if (ptr[i] == __perf_cs_etmv4_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETMV4_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETMV4_PRIV_MAX, + CS_ETMV4_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; - i += CS_ETMV4_PRIV_MAX; + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } + + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; } /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, idx); + inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); /* Something went wrong, no need to continue */ if (!inode) { @@ -2601,7 +2746,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, } /* - * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and * CS_ETMV4_PRIV_MAX mark how many double words are in the * global metadata, and each cpu's metadata respectively. * The following tests if the correct number of double words was @@ -2703,6 +2848,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, intlist__delete(traceid_list); err_free_hdr: zfree(&hdr); - + /* + * At this point, as a minimum we have valid header. Dump the rest of + * the info section - the print routines will error out on structural + * issues. + */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 4ad925d6d799..e153d02df0de 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -17,23 +17,37 @@ struct perf_session; */ enum { /* Starting with 0x0 */ - CS_HEADER_VERSION_0, + CS_HEADER_VERSION, /* PMU->type (32 bit), total # of CPUs (32 bit) */ CS_PMU_TYPE_CPUS, CS_ETM_SNAPSHOT, - CS_HEADER_VERSION_0_MAX, + CS_HEADER_VERSION_MAX, }; +/* + * Update the version for new format. + * + * New version 1 format adds a param count to the per cpu metadata. + * This allows easy adding of new metadata parameters. + * Requires that new params always added after current ones. + * Also allows client reader to handle file versions that are different by + * checking the number of params in the file vs the number expected. + */ +#define CS_HEADER_CURRENT_VERSION 1 + /* Beginning of header common to both ETMv3 and V4 */ enum { CS_ETM_MAGIC, CS_ETM_CPU, + /* Number of trace config params in following ETM specific block */ + CS_ETM_NR_TRC_PARAMS, + CS_ETM_COMMON_BLK_MAX_V1, }; /* ETMv3/PTM metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETM_ETMCR = CS_ETM_CPU + 1, + CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETM_ETMTRACEIDR, /* RO, taken from sysFS */ CS_ETM_ETMCCER, @@ -41,10 +55,13 @@ enum { CS_ETM_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1) + /* ETMv4 metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1, + CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETMV4_TRCTRACEIDR, /* RO, taken from sysFS */ CS_ETMV4_TRCIDR0, @@ -55,6 +72,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) + /* * ETMv3 exception encoding number: * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) @@ -162,7 +182,7 @@ struct cs_etm_packet_queue { #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) -#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) +#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64)) #define __perf_cs_etmv3_magic 0x3030303030303030ULL #define __perf_cs_etmv4_magic 0x4040404040404040ULL -- Gitee From b1c8cdb6a77d39215c3f9407dfa54a5f3029d43a Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 25 Nov 2022 17:50:07 +0800 Subject: [PATCH 02/42] perf cs-etm: Fix bitmap for option mainline inclusion from mainline-v5.12 commit 8c559e8d6863 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=8c559e8d68630d64d932bada633705f6551427df -------------------------------------------------------------------------- When set option with macros ETM_OPT_CTXTID and ETM_OPT_TS, it wrongly takes these two values (14 and 28 prespectively) as bit masks, but actually both are the offset for bits. But this doesn't lead to further failure due to the AND logic operation will be always true for ETM_OPT_CTXTID / ETM_OPT_TS. This patch uses the BIT() macro for option bits, thus it can request the correct bitmaps for "contextid" and "timestamp" when calling cs_etm_set_option(). Signed-off-by: Suzuki K Poulose Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Link: https://lore.kernel.org/r/20210213113220.292229-3-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-4-mathieu.poirier@linaro.org [Extract the change as a separate patch for easier review] Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/arch/arm/util/cs-etm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index e7fcfe5a36a2..f3fa9dbe191c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -169,17 +169,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_OPT_CTXTID) { + if (option & BIT(ETM_OPT_CTXTID)) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_OPT_TS) { + if (option & BIT(ETM_OPT_TS)) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) + if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS))) /* Nothing else is currently supported */ goto out; } @@ -406,7 +406,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_OPT_CTXTID | ETM_OPT_TS); + BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); if (err) goto out; } -- Gitee From fa00b92fc6369df76463d071fefe29a094a8ddf1 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 25 Nov 2022 17:50:08 +0800 Subject: [PATCH 03/42] perf cs-etm: Support PID tracing in config mainline inclusion from mainline-v5.11 commit 30cb76aabfb4 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=30cb76aabfb4deab4ffef54882f86df319b4d862 -------------------------------------------------------------------------- If the kernel is running at EL2, the pid of a task is exposed via VMID instead of the CONTEXTID. Add support for this in the perf tool. This patch respects user setting if user has specified any configs from "contextid", "contextid1" or "contextid2"; otherwise, it dynamically sets config based on PMU format "contextid". Signed-off-by: Suzuki K Poulose Co-developed-by: Leo Yan Signed-off-by: Leo Yan Reviewed-by: Mike Leach Reviewed-by: Mathieu Poirier Cc: Al Grant Link: https://lore.kernel.org/r/20210213113220.292229-4-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-5-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/include/linux/coresight-pmu.h | 3 ++ tools/perf/arch/arm/util/cs-etm.c | 61 +++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 0c2cee05c4e3..301991bec285 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -14,14 +14,17 @@ /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_CYCACC 12 #define ETM_OPT_CTXTID 14 +#define ETM_OPT_CTXTID2 15 #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 +#define ETM4_CFG_BIT_VMID 7 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 +#define ETM4_CFG_BIT_VMID_OPT 15 static inline int coresight_get_trace_id(int cpu) { diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index f3fa9dbe191c..c8e966d794b4 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -67,6 +67,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, char path[PATH_MAX]; int err = -EINVAL; u32 val; + u64 contextid; ptr = container_of(itr, struct cs_etm_recording, itr); cs_etm_pmu = ptr->cs_etm_pmu; @@ -86,25 +87,59 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, goto out; } + /* User has configured for PID tracing, respects it. */ + contextid = evsel->core.attr.config & + (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2)); + /* - * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing - * is supported: - * 0b00000 Context ID tracing is not supported. - * 0b00100 Maximum of 32-bit Context ID size. - * All other values are reserved. + * If user doesn't configure the contextid format, parse PMU format and + * enable PID tracing according to the "contextid" format bits: + * + * If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1; + * If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2. */ - val = BMVAL(val, 5, 9); - if (!val || val != 0x4) { - err = -EINVAL; - goto out; + if (!contextid) + contextid = perf_pmu__format_bits(&cs_etm_pmu->format, + "contextid"); + + if (contextid & BIT(ETM_OPT_CTXTID)) { + /* + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID + * tracing is supported: + * 0b00000 Context ID tracing is not supported. + * 0b00100 Maximum of 32-bit Context ID size. + * All other values are reserved. + */ + val = BMVAL(val, 5, 9); + if (!val || val != 0x4) { + pr_err("%s: CONTEXTIDR_EL1 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } + } + + if (contextid & BIT(ETM_OPT_CTXTID2)) { + /* + * TRCIDR2.VMIDOPT[30:29] != 0 and + * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid) + * We can't support CONTEXTIDR in VMID if the size of the + * virtual context id is < 32bit. + * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us. + */ + if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) { + pr_err("%s: CONTEXTIDR_EL2 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } } /* All good, let the kernel know */ - evsel->core.attr.config |= (1 << ETM_OPT_CTXTID); + evsel->core.attr.config |= contextid; err = 0; out: - return err; } @@ -485,7 +520,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) config |= BIT(ETM4_CFG_BIT_TS); if (config_opts & BIT(ETM_OPT_RETSTK)) config |= BIT(ETM4_CFG_BIT_RETSTK); - + if (config_opts & BIT(ETM_OPT_CTXTID2)) + config |= BIT(ETM4_CFG_BIT_VMID) | + BIT(ETM4_CFG_BIT_VMID_OPT); return config; } -- Gitee From 1f74c7a695bad2103fe3a0256bc5efe793608675 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 25 Nov 2022 17:50:09 +0800 Subject: [PATCH 04/42] perf cs-etm: Add helper cs_etm__get_pid_fmt() mainline inclusion from mainline-v5.11 commit 47f0d94c2037 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=47f0d94c203751ddcfdb296fcf15df20fffcef0c -------------------------------------------------------------------------- This patch adds helper function cs_etm__get_pid_fmt(), by passing parameter "traceID", it returns the PID format. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Cc: Mike Leach Link: https://lore.kernel.org/r/20210213113220.292229-5-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-6-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 42 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cs-etm.h | 1 + 2 files changed, 43 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index ee32d023e9bd..9ac80fc23c58 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -156,6 +157,47 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +/* + * The returned PID format is presented by two bits: + * + * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; + * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. + * + * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * are enabled at the same time when the session runs on an EL2 kernel. + * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be + * recorded in the trace data, the tool will selectively use + * CONTEXTIDR_EL2 as PID. + */ +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) +{ + struct int_node *inode; + u64 *metadata, val; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + val = metadata[CS_ETM_ETMCR]; + /* CONTEXTIDR is traced */ + if (val & BIT(ETM_OPT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } else { + val = metadata[CS_ETMV4_TRCCONFIGR]; + /* CONTEXTIDR_EL2 is traced */ + if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + *pid_fmt = BIT(ETM_OPT_CTXTID2); + /* CONTEXTIDR_EL1 is traced */ + else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } + + return 0; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index e153d02df0de..85ed11e9d2a7 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -193,6 +193,7 @@ struct cs_etm_packet_queue { int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id); bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); -- Gitee From 3dc9a9d82ba647571bfa16927741899a7d1cb3e9 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Fri, 25 Nov 2022 17:50:10 +0800 Subject: [PATCH 05/42] perf cs-etm: Detect pid in VMID for kernel running at EL2 mainline inclusion from mainline-v5.11 commit 8e1488a46dcf category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=8e1488a46dcf73b1f1916d95421e303dbf773fb4 -------------------------------------------------------------------------- The PID of the task could be traced as VMID when the kernel is running at EL2. Teach the decoder to look for VMID when the CONTEXTIDR (Arm32) or CONTEXTIDR_EL1 (Arm64) is invalid but we have a valid VMID. Signed-off-by: Suzuki K Poulose Co-developed-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Al Grant Cc: Mike Leach Link: https://lore.kernel.org/r/20210213113220.292229-6-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-7-mathieu.poirier@linaro.org Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3f4bc4050477..4052c9ce6e2f 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,6 +6,7 @@ * Author: Mathieu Poirier */ +#include #include #include #include @@ -491,13 +492,42 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { - pid_t tid; + pid_t tid = -1; + static u64 pid_fmt; + int ret; - /* Ignore PE_CONTEXT packets that don't have a valid contextID */ - if (!elem->context.ctxt_id_valid) + /* + * As all the ETMs run at the same exception level, the system should + * have the same PID format crossing CPUs. So cache the PID format + * and reuse it for sequential decoding. + */ + if (!pid_fmt) { + ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt); + if (ret) + return OCSD_RESP_FATAL_SYS_ERR; + } + + /* + * Process the PE_CONTEXT packets if we have a valid contextID or VMID. + * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 + * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + */ + switch (pid_fmt) { + case BIT(ETM_OPT_CTXTID): + if (elem->context.ctxt_id_valid) + tid = elem->context.context_id; + break; + case BIT(ETM_OPT_CTXTID2): + if (elem->context.vmid_valid) + tid = elem->context.vmid; + break; + default: + break; + } + + if (tid == -1) return OCSD_RESP_CONT; - tid = elem->context.context_id; if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) return OCSD_RESP_FATAL_SYS_ERR; -- Gitee From f895fd59f8576c48e6c9b81ffb1a3b2ba1280e56 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:11 +0800 Subject: [PATCH 06/42] perf cs-etm: Refactor timestamp variable names mainline inclusion from mainline-v5.13-rc1 commit aadd6ba40934 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=aadd6ba409344776fc155451684c728822bf4a24 -------------------------------------------------------------------------- Remove ambiguity in variable names relating to timestamps. A later commit will save the sample kernel timestamp in one of the etm structs, so name all elements appropriately to avoid confusion. This is also removes some ambiguity arising from the fact that the --timestamp argument to perf record refers to sample kernel timestamps, and the /timestamp/ event modifier refers to CS timestamps, so the term is overloaded. Signed-off-by: James Clark Reviewed-by: Mathieu Poirier Cc: Alexander Shishkin Cc: Al Grant Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Link: https://lore.kernel.org/r/20210510143248.27423-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 18 ++++---- tools/perf/util/cs-etm.c | 42 +++++++++---------- tools/perf/util/cs-etm.h | 4 +- 3 files changed, 31 insertions(+), 33 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 4052c9ce6e2f..a4178a4b80be 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -276,13 +276,13 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, const uint8_t trace_chan_id) { /* No timestamp packet has been received, nothing to do */ - if (!packet_queue->timestamp) + if (!packet_queue->cs_timestamp) return OCSD_RESP_CONT; - packet_queue->timestamp = packet_queue->next_timestamp; + packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; /* Estimate the timestamp for the next range packet */ - packet_queue->next_timestamp += packet_queue->instr_count; + packet_queue->next_cs_timestamp += packet_queue->instr_count; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -308,8 +308,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * Function do_soft_timestamp() will report the value to the front end, * hence asking the decoder to keep decoding rather than stopping. */ - if (packet_queue->timestamp) { - packet_queue->next_timestamp = elem->timestamp; + if (packet_queue->cs_timestamp) { + packet_queue->next_cs_timestamp = elem->timestamp; return OCSD_RESP_CONT; } @@ -320,8 +320,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * which instructions started by substracting the number of instructions * executed to the timestamp. */ - packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; - packet_queue->next_timestamp = elem->timestamp; + packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count; + packet_queue->next_cs_timestamp = elem->timestamp; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -334,8 +334,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, static void cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) { - packet_queue->timestamp = 0; - packet_queue->next_timestamp = 0; + packet_queue->cs_timestamp = 0; + packet_queue->next_cs_timestamp = 0; packet_queue->instr_count = 0; } diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 9ac80fc23c58..a0e2af3593c1 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -38,8 +38,6 @@ #include #include "util/synthetic-events.h" -#define MAX_TIMESTAMP (~0ULL) - struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -86,7 +84,7 @@ struct cs_etm_queue { struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; - u8 pending_timestamp; + u8 pending_timestamp_chan_id; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; @@ -208,7 +206,7 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, * be more than one channel per cs_etm_queue, we need to specify * what traceID queue needs servicing. */ - etmq->pending_timestamp = trace_chan_id; + etmq->pending_timestamp_chan_id = trace_chan_id; } static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, @@ -216,22 +214,22 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, { struct cs_etm_packet_queue *packet_queue; - if (!etmq->pending_timestamp) + if (!etmq->pending_timestamp_chan_id) return 0; if (trace_chan_id) - *trace_chan_id = etmq->pending_timestamp; + *trace_chan_id = etmq->pending_timestamp_chan_id; packet_queue = cs_etm__etmq_get_packet_queue(etmq, - etmq->pending_timestamp); + etmq->pending_timestamp_chan_id); if (!packet_queue) return 0; /* Acknowledge pending status */ - etmq->pending_timestamp = 0; + etmq->pending_timestamp_chan_id = 0; /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ - return packet_queue->timestamp; + return packet_queue->cs_timestamp; } static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) @@ -814,7 +812,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, int ret = 0; unsigned int cs_queue_nr; u8 trace_chan_id; - u64 timestamp; + u64 cs_timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) @@ -854,7 +852,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, /* * Run decoder on the trace block. The decoder will stop when - * encountering a timestamp, a full packet queue or the end of + * encountering a CS timestamp, a full packet queue or the end of * trace for that block. */ ret = cs_etm__decode_data_block(etmq); @@ -865,10 +863,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all * the timestamp calculation for us. */ - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); /* We found a timestamp, no need to continue. */ - if (timestamp) + if (cs_timestamp) break; /* @@ -892,7 +890,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * queue and will be processed in cs_etm__process_queues(). */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); out: return ret; } @@ -2221,7 +2219,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) int ret = 0; unsigned int cs_queue_nr, queue_nr; u8 trace_chan_id; - u64 timestamp; + u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq; @@ -2283,9 +2281,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) if (ret) goto out; - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); - if (!timestamp) { + if (!cs_timestamp) { /* * Function cs_etm__decode_data_block() returns when * there is no more traces to decode in the current @@ -2308,7 +2306,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) * this queue/traceID. */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } out: @@ -2380,7 +2378,7 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_tool *tool) { int err = 0; - u64 timestamp; + u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -2394,11 +2392,11 @@ static int cs_etm__process_event(struct perf_session *session, } if (sample->time && (sample->time != (u64) -1)) - timestamp = sample->time; + sample_kernel_timestamp = sample->time; else - timestamp = 0; + sample_kernel_timestamp = 0; - if (timestamp || etm->timeless_decoding) { + if (sample_kernel_timestamp || etm->timeless_decoding) { err = cs_etm__update_queues(etm); if (err) return err; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 85ed11e9d2a7..e2841ac9676d 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -170,8 +170,8 @@ struct cs_etm_packet_queue { u32 head; u32 tail; u32 instr_count; - u64 timestamp; - u64 next_timestamp; + u64 cs_timestamp; + u64 next_cs_timestamp; struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; }; -- Gitee From aa063d64f5c297c740deeb16748ccbb6ba5580a1 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:12 +0800 Subject: [PATCH 07/42] perf cs-etm: Set time on synthesised samples to preserve ordering mainline inclusion from mainline-v5.13-rc1 commit 1ac9e0b5731a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1ac9e0b5731ad732ddc045cfcfd6739e4e12f3b5 -------------------------------------------------------------------------- The following attribute is set when synthesising samples in timed decoding mode: attr.sample_type |= PERF_SAMPLE_TIME; This results in new samples that appear to have timestamps but because we don't assign any timestamps to the samples, when the resulting inject file is opened again, the synthesised samples will be on the wrong side of the MMAP or COMM events. For example, this results in the samples being associated with the perf binary, rather than the target of the record: perf record -e cs_etm/@tmc_etr0/u top perf inject -i perf.data -o perf.inject --itrace=i100il perf report -i perf.inject Where 'Command' == perf should show as 'top': # Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles # ........ ....... .................... ...................... ...................... .................. # 31.08% perf [unknown] [.] 0x000000000040c3f8 [.] 0x000000000040c3e8 - If the perf.data file is opened directly with perf, without the inject step, then this already works correctly because the events are synthesised after the COMM and MMAP events and no second sorting happens. Re-sorting only happens when opening the perf.inject file for the second time so timestamps are needed. Using the timestamp from the AUX record mirrors the current behaviour when opening directly with perf, because the events are generated on the call to cs_etm__process_queues(). The ETM trace could optionally contain time stamps, but there is no way to correlate this with the kernel time. So, the best available time value is that of the AUX_RECORD header. This patch uses the timestamp from the header for all the samples. The ordering of the samples are implicit in the trace and thus is fine with respect to relative ordering. Reviewed-by: Leo Yan Reviewed-by: Mathieu Poirier Co-developed-by: Al Grant Signed-off-by: Al Grant Signed-off-by: James Clark Acked-by: Suzuki K Poulos Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Link: https://lore.kernel.org/r/20210510143248.27423-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a0e2af3593c1..4e16d849b874 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -54,6 +54,7 @@ struct cs_etm_auxtrace { u8 sample_instructions; int num_cpu; + u64 latest_kernel_timestamp; u32 auxtrace_type; u64 branches_sample_type; u64 branches_id; @@ -1192,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = addr; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1248,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = ip; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -2412,9 +2417,15 @@ static int cs_etm__process_event(struct perf_session *session, else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) return cs_etm__process_switch_cpu_wide(etm, event); - if (!etm->timeless_decoding && - event->header.type == PERF_RECORD_AUX) + if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) { + /* + * Record the latest kernel timestamp available in the header + * for samples so that synthesised samples occur from this point + * onwards. + */ + etm->latest_kernel_timestamp = sample_kernel_timestamp; return cs_etm__process_queues(etm); + } return 0; } -- Gitee From ddfb93ea67a809b9a3f85e72f05dc864cd53232c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Nov 2022 17:50:13 +0800 Subject: [PATCH 08/42] perf auxtrace: Add Z itrace option for timeless decoding mainline inclusion from mainline-v5.13-rc1 commit 18f4949427dc category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=18f4949427dc80bd9027001d28118cd8d555d890 -------------------------------------------------------------------------- Issues correlating timestamps can be avoided with timeless decoding. Add an option for that, so that timeless decoding can be used even when timestamps are present. Signed-off-by: Adrian Hunter Reviewed-by: Andi Kleen Cc: Jiri Olsa Link: https://lore.kernel.org/r/20210430070309.17624-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/Documentation/itrace.txt | 1 + tools/perf/util/auxtrace.c | 3 +++ tools/perf/util/auxtrace.h | 2 ++ 3 files changed, 6 insertions(+) diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 079cdfabb352..9d5a2a222fa0 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -20,6 +20,7 @@ L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + Z prefer to ignore timestamps (so-called "timeless" decoding) The default is all events i.e. the same as --itrace=ibxwpe, except for perf script where it is --itrace=ce diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ead536d02160..5df645f1ef13 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1561,6 +1561,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'q': synth_opts->quick += 1; break; + case 'Z': + synth_opts->timeless_decoding = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index ac94d3974e16..3a4e921f9d48 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -90,6 +90,7 @@ enum itrace_period_type { * @tlb: whether to synthesize TLB events * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events + * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -129,6 +130,7 @@ struct itrace_synth_opts { bool tlb; bool remote_access; bool mem; + bool timeless_decoding; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; -- Gitee From 928f4346b5973aeabfa886969f103c4fe189c3eb Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:14 +0800 Subject: [PATCH 09/42] perf cs-etm: Move synth_opts initialisation mainline inclusion from mainline-v5.13-rc1 commit cac314186718 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=cac314186718f16f494b892de4dc67215ee05ef7 -------------------------------------------------------------------------- Move initialisation of synth_opts earlier in the function so that synth_opts can be used at an earlier stage in a later commit. Signed-off-by: James Clark Reviewed-by: Leo Yan Cc: Alexander Shishkin Cc: Al Grant Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210517131741.3027-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 4e16d849b874..afecbe8dc023 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2819,6 +2819,14 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_free_etm; + if (session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); + etm->synth_opts.callchain = false; + } + etm->session = session; etm->machine = &session->machines.host; @@ -2863,14 +2871,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, return 0; } - if (session->itrace_synth_opts->set) { - etm->synth_opts = *session->itrace_synth_opts; - } else { - itrace_synth_opts__set_default(&etm->synth_opts, - session->itrace_synth_opts->default_no_sample); - etm->synth_opts.callchain = false; - } - err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread; -- Gitee From 46ce0170014c10af56b70465001f1cc32c97749b Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:15 +0800 Subject: [PATCH 10/42] perf cs-etm: Start reading 'Z' --itrace option mainline inclusion from mainline-v5.13-rc1 commit c36c1ef6f691 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=c36c1ef6f6912114c7fb0aa8f7c0af2634704de7 -------------------------------------------------------------------------- Recently the 'Z' --itrace option was added to override detection of timeless decoding. This is also useful in Coresight to work around issues with invalid timestamps on some hardware. When the 'Z' option is provided, the existing timeless decoding mode will be used, even if timestamps were recorded. Signed-off-by: James Clark Reviewed-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210517131741.3027-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index afecbe8dc023..3392f37ff152 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2473,6 +2473,10 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) struct evlist *evlist = etm->session->evlist; bool timeless_decoding = true; + /* Override timeless mode with user input from --itrace=Z */ + if (etm->synth_opts.timeless_decoding) + return true; + /* * Circle through the list of event and complain if we find one * with the time bit set. -- Gitee From 6297a5410df3ddb340ea2ad45df3de4879c29934 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:16 +0800 Subject: [PATCH 11/42] perf cs-etm: Prevent and warn on underflows during timestamp calculation. mainline inclusion from mainline-v5.13-rc1 commit c1a6165a639c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=c1a6165a639c2d85f06be7dd9c29ce1cfd994775 -------------------------------------------------------------------------- When a zero timestamp is encountered, warn once. This is to make hardware or configuration issues visible. Also suggest that the issue can be worked around with the --itrace=Z option. When an underflow with a non-zero timestamp occurs, warn every time. This is an unexpected scenario, and with increasing timestamps, it's unlikely that it would occur more than once, therefore it should be ok to warn every time. Only try to calculate the timestamp by subtracting the instruction count if neither of the above cases are true. This makes attempting to decode files with zero timestamps in non-timeless mode more consistent. Currently it can half work if the timestamp wraps around and becomes non-zero, although the behavior is undefined and unpredictable. Signed-off-by: James Clark Reviewed-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210517131741.3027-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 44 ++++++++++++++----- 1 file changed, 33 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index a4178a4b80be..662df00d7bee 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,6 +6,7 @@ * Author: Mathieu Poirier */ +#include #include #include #include @@ -17,6 +18,7 @@ #include "cs-etm.h" #include "cs-etm-decoder.h" +#include "debug.h" #include "intlist.h" /* use raw logging */ @@ -294,7 +296,8 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, - const uint8_t trace_chan_id) + const uint8_t trace_chan_id, + const ocsd_trc_index_t indx) { struct cs_etm_packet_queue *packet_queue; @@ -313,14 +316,32 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, return OCSD_RESP_CONT; } - /* - * This is the first timestamp we've seen since the beginning of traces - * or a discontinuity. Since timestamps packets are generated *after* - * range packets have been generated, we need to estimate the time at - * which instructions started by substracting the number of instructions - * executed to the timestamp. - */ - packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count; + if (!elem->timestamp) { + /* + * Zero timestamps can be seen due to misconfiguration or hardware bugs. + * Warn once, and don't try to subtract instr_count as it would result in an + * underflow. + */ + packet_queue->cs_timestamp = 0; + WARN_ONCE(true, "Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR + ". Decoding may be improved with --itrace=Z...\n", indx); + } else if (packet_queue->instr_count > elem->timestamp) { + /* + * Sanity check that the elem->timestamp - packet_queue->instr_count would not + * result in an underflow. Warn and clamp at 0 if it would. + */ + packet_queue->cs_timestamp = 0; + pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx); + } else { + /* + * This is the first timestamp we've seen since the beginning of traces + * or a discontinuity. Since timestamps packets are generated *after* + * range packets have been generated, we need to estimate the time at + * which instructions started by subtracting the number of instructions + * executed to the timestamp. + */ + packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count; + } packet_queue->next_cs_timestamp = elem->timestamp; packet_queue->instr_count = 0; @@ -542,7 +563,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, - const ocsd_trc_index_t indx __maybe_unused, + const ocsd_trc_index_t indx, const u8 trace_chan_id __maybe_unused, const ocsd_generic_trace_elem *elem) { @@ -579,7 +600,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( break; case OCSD_GEN_TRC_ELEM_TIMESTAMP: resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, - trace_chan_id); + trace_chan_id, + indx); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: resp = cs_etm_decoder__set_tid(etmq, packet_queue, -- Gitee From bbcbb13e710c0ec39fdce3a0f70e7f3ddce77c48 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 25 Nov 2022 17:50:17 +0800 Subject: [PATCH 12/42] perf cs-etm: Remove callback cs_etm_find_snapshot() mainline inclusion from mainline-v5.13-rc1 commit 2f01c200d440 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=2f01c200d4405c4562e45e8bb4de44a5ce37b217 -------------------------------------------------------------------------- The callback cs_etm_find_snapshot() is invoked for snapshot mode, its main purpose is to find the correct AUX trace data and returns "head" and "old" (we can call "old" as "old head") to the caller, the caller __auxtrace_mmap__read() uses these two pointers to decide the AUX trace data size. This patch removes cs_etm_find_snapshot() with below reasons: - The first thing in cs_etm_find_snapshot() is to check if the head has wrapped around, if it is not, directly bails out. The checking is pointless, this is because the "head" and "old" pointers both are monotonical increasing so they never wrap around. - cs_etm_find_snapshot() adjusts the "head" and "old" pointers and assumes the AUX ring buffer is fully filled with the hardware trace data, so it always subtracts the difference "mm->len" from "head" to get "old". Let's imagine the snapshot is taken in very short interval, the tracers only fill a small chunk of the trace data into the AUX ring buffer, in this case, it's wrongly to copy the whole the AUX ring buffer to perf file. - As the "head" and "old" pointers are monotonically increased, the function __auxtrace_mmap__read() handles these two pointers properly. It calculates the reminders for these two pointers, and the size is clamped to be never more than "snapshot_size". We can simply reply on the function __auxtrace_mmap__read() to calculate the correct result for data copying, it's not necessary to add Arm CoreSight specific callback. Signed-off-by: Leo Yan Reviewed-by: James Clark Tested-by: James Clark Cc: Alexander Shishkin Cc: Daniel Kiss Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Link: http://lore.kernel.org/lkml/20210701093537.90759-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/arch/arm/util/cs-etm.c | 133 ------------------------------ 1 file changed, 133 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index c8e966d794b4..435aaf7e7de1 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -38,8 +38,6 @@ struct cs_etm_recording { struct auxtrace_record itr; struct perf_pmu *cs_etm_pmu; struct evlist *evlist; - int wrapped_cnt; - bool *wrapped; bool snapshot_mode; size_t snapshot_size; }; @@ -730,135 +728,6 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, return 0; } -static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx) -{ - bool *wrapped; - int cnt = ptr->wrapped_cnt; - - /* Make @ptr->wrapped as big as @idx */ - while (cnt <= idx) - cnt++; - - /* - * Free'ed in cs_etm_recording_free(). Using realloc() to avoid - * cross compilation problems where the host's system supports - * reallocarray() but not the target. - */ - wrapped = realloc(ptr->wrapped, cnt * sizeof(bool)); - if (!wrapped) - return -ENOMEM; - - wrapped[cnt - 1] = false; - ptr->wrapped_cnt = cnt; - ptr->wrapped = wrapped; - - return 0; -} - -static bool cs_etm_buffer_has_wrapped(unsigned char *buffer, - size_t buffer_size, u64 head) -{ - u64 i, watermark; - u64 *buf = (u64 *)buffer; - size_t buf_size = buffer_size; - - /* - * We want to look the very last 512 byte (chosen arbitrarily) in - * the ring buffer. - */ - watermark = buf_size - 512; - - /* - * @head is continuously increasing - if its value is equal or greater - * than the size of the ring buffer, it has wrapped around. - */ - if (head >= buffer_size) - return true; - - /* - * The value of @head is somewhere within the size of the ring buffer. - * This can be that there hasn't been enough data to fill the ring - * buffer yet or the trace time was so long that @head has numerically - * wrapped around. To find we need to check if we have data at the very - * end of the ring buffer. We can reliably do this because mmap'ed - * pages are zeroed out and there is a fresh mapping with every new - * session. - */ - - /* @head is less than 512 byte from the end of the ring buffer */ - if (head > watermark) - watermark = head; - - /* - * Speed things up by using 64 bit transactions (see "u64 *buf" above) - */ - watermark >>= 3; - buf_size >>= 3; - - /* - * If we find trace data at the end of the ring buffer, @head has - * been there and has numerically wrapped around at least once. - */ - for (i = watermark; i < buf_size; i++) - if (buf[i]) - return true; - - return false; -} - -static int cs_etm_find_snapshot(struct auxtrace_record *itr, - int idx, struct auxtrace_mmap *mm, - unsigned char *data, - u64 *head, u64 *old) -{ - int err; - bool wrapped; - struct cs_etm_recording *ptr = - container_of(itr, struct cs_etm_recording, itr); - - /* - * Allocate memory to keep track of wrapping if this is the first - * time we deal with this *mm. - */ - if (idx >= ptr->wrapped_cnt) { - err = cs_etm_alloc_wrapped_array(ptr, idx); - if (err) - return err; - } - - /* - * Check to see if *head has wrapped around. If it hasn't only the - * amount of data between *head and *old is snapshot'ed to avoid - * bloating the perf.data file with zeros. But as soon as *head has - * wrapped around the entire size of the AUX ring buffer it taken. - */ - wrapped = ptr->wrapped[idx]; - if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) { - wrapped = true; - ptr->wrapped[idx] = true; - } - - pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", - __func__, idx, (size_t)*old, (size_t)*head, mm->len); - - /* No wrap has occurred, we can just use *head and *old. */ - if (!wrapped) - return 0; - - /* - * *head has wrapped around - adjust *head and *old to pickup the - * entire content of the AUX buffer. - */ - if (*head >= mm->len) { - *old = *head - mm->len; - } else { - *head += mm->len; - *old = *head - mm->len; - } - - return 0; -} - static int cs_etm_snapshot_start(struct auxtrace_record *itr) { struct cs_etm_recording *ptr = @@ -896,7 +765,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr) struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); - zfree(&ptr->wrapped); free(ptr); } @@ -924,7 +792,6 @@ struct auxtrace_record *cs_etm_record_init(int *err) ptr->itr.recording_options = cs_etm_recording_options; ptr->itr.info_priv_size = cs_etm_info_priv_size; ptr->itr.info_fill = cs_etm_info_fill; - ptr->itr.find_snapshot = cs_etm_find_snapshot; ptr->itr.snapshot_start = cs_etm_snapshot_start; ptr->itr.snapshot_finish = cs_etm_snapshot_finish; ptr->itr.reference = cs_etm_reference; -- Gitee From 489bd2d946c5d3166fe786ce1d9d8d065b93577d Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:18 +0800 Subject: [PATCH 13/42] perf cs-etm: Delay decode of non-timeless data until cs_etm__flush_events() mainline inclusion from mainline-v5.13-rc1 commit 0323dea3185a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=0323dea3185a5ca347a5add7cae56ad37465338d -------------------------------------------------------------------------- Currently, timeless mode starts the decode on PERF_RECORD_EXIT, and non-timeless mode starts decoding on the fist PERF_RECORD_AUX record. This can cause the "data has no samples!" error if the first PERF_RECORD_AUX record comes before the first (or any relevant) PERF_RECORD_MMAP2 record because the mmaps are required by the decoder to access the binary data. This change pushes the start of non-timeless decoding to the very end of parsing the file. The PERF_RECORD_EXIT event can't be used because it might not exist in system-wide or snapshot modes. I have not been able to find the exact cause for the events to be intermittently in the wrong order in the basic scenario: perf record -e cs_etm/@tmc_etr0/u top But it can be made to happen every time with the --delay option. This is because "enable_on_exec" is disabled, which causes tracing to start before the process to be launched is exec'd. For example: perf record -e cs_etm/@tmc_etr0/u --delay=1 top perf report -D | grep 'AUX\|MAP' 0 16714475632740 0x520 [0x40]: PERF_RECORD_AUX offset: 0 size: 0x30 flags: 0 [] 0 16714476494960 0x5d0 [0x40]: PERF_RECORD_AUX offset: 0x30 size: 0x30 flags: 0 [] 0 16714478208900 0x660 [0x40]: PERF_RECORD_AUX offset: 0x60 size: 0x30 flags: 0 [] 4294967295 16714478293340 0x700 [0x70]: PERF_RECORD_MMAP2 8712/8712: [0x557a460000(0x54000) @ 0 00:17 5329258 0]: r-xp /usr/bin/top 4294967295 16714478353020 0x770 [0x88]: PERF_RECORD_MMAP2 8712/8712: [0x7f86f72000(0x34000) @ 0 00:17 5214354 0]: r-xp /usr/lib/aarch64-linux-gnu/ld-2.31.so Another scenario in which decoding from the first aux record fails is a workload that forks. Although the aux record comes after 'bash', it comes before 'top', which is what we are interested in. For example: perf record -e cs_etm/@tmc_etr0/u -- bash -c top perf report -D | grep 'AUX\|MAP' 4294967295 16853946421300 0x510 [0x70]: PERF_RECORD_MMAP2 8723/8723: [0x558f280000(0x142000) @ 0 00:17 5213953 0]: r-xp /usr/bin/bash 4294967295 16853946543560 0x580 [0x88]: PERF_RECORD_MMAP2 8723/8723: [0x7fbba6e000(0x34000) @ 0 00:17 5214354 0]: r-xp /usr/lib/aarch64-linux-gnu/ld-2.31.so 4294967295 16853946628420 0x608 [0x68]: PERF_RECORD_MMAP2 8723/8723: [0x7fbba9e000(0x1000) @ 0 00:00 0 0]: r-xp [vdso] 0 16853947067300 0x690 [0x40]: PERF_RECORD_AUX offset: 0 size: 0x3a60 flags: 0 [] ... 0 16853966602580 0x1758 [0x40]: PERF_RECORD_AUX offset: 0xc2470 size: 0x30 flags: 0 [] 4294967295 16853967119860 0x1818 [0x70]: PERF_RECORD_MMAP2 8723/8723: [0x5559e70000(0x54000) @ 0 00:17 5329258 0]: r-xp /usr/bin/top 4294967295 16853967181620 0x1888 [0x88]: PERF_RECORD_MMAP2 8723/8723: [0x7f9ed06000(0x34000) @ 0 00:17 5214354 0]: r-xp /usr/lib/aarch64-linux-gnu/ld-2.31.so 4294967295 16853967237180 0x1910 [0x68]: PERF_RECORD_MMAP2 8723/8723: [0x7f9ed36000(0x1000) @ 0 00:00 0 0]: r-xp [vdso] A third scenario is when the majority of time is spent in a shared library that is not loaded at startup. For example a dynamically loaded plugin. Testing ======= Testing was done by checking if any samples that are present in the old output are missing from the new output. Timestamps must be stripped out with awk because now they are set to the last AUX sample, rather than the first: ./perf script $4 | awk '!($4="")' > new.script ./perf-default script $4 | awk '!($4="")' > default.script comm -13 <(sort -u new.script) <(sort -u default.script) Testing showed that the new output is a superset of the old. When lines appear in the comm output, it is not because they are missing but because [unknown] is now resolved to sensible locations. For example last putp branch here now resolves to libtinfo, so it's not missing from the output, but is actually improved: Old: top 305 [001] 1 branches:uH: 402830 _init+0x30 (/usr/bin/top.procps) => 404a1c [unknown] (/usr/bin/top.procps) top 305 [001] 1 branches:uH: 404a20 [unknown] (/usr/bin/top.procps) => 402970 putp@plt+0x0 (/usr/bin/top.procps) top 305 [001] 1 branches:uH: 40297c putp@plt+0xc (/usr/bin/top.procps) => 0 [unknown] ([unknown]) New: top 305 [001] 1 branches:uH: 402830 _init+0x30 (/usr/bin/top.procps) => 404a1c [unknown] (/usr/bin/top.procps) top 305 [001] 1 branches:uH: 404a20 [unknown] (/usr/bin/top.procps) => 402970 putp@plt+0x0 (/usr/bin/top.procps) top 305 [001] 1 branches:uH: 40297c putp@plt+0xc (/usr/bin/top.procps) => 7f8ab39208 putp+0x0 (/lib/libtinfo.so.5.9) In the following two modes, decoding now works and the "data has no samples!" error is not displayed any more: perf record -e cs_etm/@tmc_etr0/u -- bash -c top perf record -e cs_etm/@tmc_etr0/u --delay=1 top In snapshot mode, there is also an improvement to decoding. Previously samples for the 'kill' process that was used to send SIGUSR2 were completely missing, because the process hadn't started yet. But now there are additional samples present: perf record -e cs_etm/@tmc_etr0/u --snapshot -a perf script stress 19380 [003] 161627.938153: 1000000 instructions:uH: aaaabb612fb4 [unknown] (/usr/bin/stress) kill 19644 [000] 161627.938153: 1000000 instructions:uH: ffffae0ef210 [unknown] (/lib/aarch64-linux-gnu/ld-2.27.so) stress 19380 [003] 161627.938153: 1000000 instructions:uH: ffff9e754d40 random_r+0x20 (/lib/aarch64-linux-gnu/libc-2.27.so) Also tested was the round trip of 'perf inject' followed by 'perf report' which has the same differences and improvements. Signed-off-by: James Clark Reviewed-by: Leo Yan Tested-by: Leo Yan Acked-by: Mathieu Poirier Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210609130421.13934-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 3392f37ff152..3b3012f3a2f0 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2407,6 +2407,11 @@ static int cs_etm__process_event(struct perf_session *session, return err; } + /* + * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We + * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because + * ETM_OPT_CTXTID is not enabled. + */ if (etm->timeless_decoding && event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, @@ -2424,7 +2429,6 @@ static int cs_etm__process_event(struct perf_session *session, * onwards. */ etm->latest_kernel_timestamp = sample_kernel_timestamp; - return cs_etm__process_queues(etm); } return 0; -- Gitee From adc97cc29c9c6ee6bfeae2b03358f55e02fd0cdd Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:19 +0800 Subject: [PATCH 14/42] perf cs-etm: Split Coresight decode by aux records mainline inclusion from mainline-v5.13-rc1 commit 83d1fc92d4cd category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=83d1fc92d4cdd1ea2d229347ddf11ea2aa751059 -------------------------------------------------------------------------- Populate the auxtrace queues using AUX records rather than whole auxtrace buffers so that the decoder is reset between each aux record. This is similar to the auxtrace_queues__process_index() -> auxtrace_queues__add_indexed_event() flow where perf_session__peek_event() is used to read AUXTRACE events out of random positions in the file based on the auxtrace index. But now we loop over all PERF_RECORD_AUX events instead of AUXTRACE buffers. For each PERF_RECORD_AUX event, we find the corresponding AUXTRACE buffer using the index, and add a fragment of that buffer to the auxtrace queues. No other changes to decoding were made, apart from populating the auxtrace queues. The result of decoding is identical to before, except in cases where decoding failed completely, due to not resetting the decoder. The reason for this change is because AUX records are emitted any time tracing is disabled, for example when the process is scheduled out. Because ETM was disabled and enabled again, the decoder also needs to be reset to force the search for a sync packet. Otherwise there would be fatal decoding errors. Testing ======= Testing was done with the following script, to diff the decoding results between the patched and un-patched versions of perf: #!/bin/bash set -ex $1 script -i $3 $4 > split.script $2 script -i $3 $4 > default.script diff split.script default.script | head -n 20 And it was run like this, with various itrace options depending on the quantity of synthesised events: compare.sh ./perf-patched ./perf-default perf-per-cpu-2-threads.data --itrace=i100000ns No changes in output were observed in the following scenarios: * Simple per-cpu perf record -e cs_etm/@tmc_etr0/u top * Per-thread, single thread perf record -e cs_etm/@tmc_etr0/u --per-thread ./threads_C * Per-thread multiple threads (but only one thread collected data): perf record -e cs_etm/@tmc_etr0/u --per-thread --pid 4596,4597 * Per-thread multiple threads (both threads collected data): perf record -e cs_etm/@tmc_etr0/u --per-thread --pid 4596,4597 * Per-cpu explicit threads: perf record -e cs_etm/@tmc_etr0/u --pid 853,854 * System-wide (per-cpu): perf record -e cs_etm/@tmc_etr0/u -a * No data collected (no aux buffers) Can happen with any command when run for a short period * Containing truncated records Can happen with any command * Containing aux records with 0 size Can happen with any command * Snapshot mode (various files with and without buffer wrap) perf record -e cs_etm/@tmc_etr0/u -a --snapshot Some differences were observed in the following scenario: * Snapshot mode (with duplicate buffers) perf record -e cs_etm/@tmc_etr0/u -a --snapshot Fewer samples are generated in snapshot mode if duplicate buffers were gathered because buffers with the same offset are now only added once. This gives different, but more correct results and no duplicate data is decoded any more. Signed-off-by: James Clark Reviewed-by: Mathieu Poirier Tested-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210624164303.28632-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 168 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 3b3012f3a2f0..8907fcb150e3 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2683,6 +2683,172 @@ static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, return metadata; } +/** + * Puts a fragment of an auxtrace buffer into the auxtrace queues based + * on the bounds of aux_event, if it matches with the buffer that's at + * file_offset. + * + * Normally, whole auxtrace buffers would be added to the queue. But we + * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder + * is reset across each buffer, so splitting the buffers up in advance has + * the same effect. + */ +static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, + struct perf_record_aux *aux_event, struct perf_sample *sample) +{ + int err; + char buf[PERF_SAMPLE_MAX_SIZE]; + union perf_event *auxtrace_event_union; + struct perf_record_auxtrace *auxtrace_event; + union perf_event auxtrace_fragment; + __u64 aux_offset, aux_size; + + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + /* + * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got + * from looping through the auxtrace index. + */ + err = perf_session__peek_event(session, file_offset, buf, + PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); + if (err) + return err; + auxtrace_event = &auxtrace_event_union->auxtrace; + if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) + return -EINVAL; + + if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || + auxtrace_event->header.size != sz) { + return -EINVAL; + } + + /* + * In per-thread mode, CPU is set to -1, but TID will be set instead. See + * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match. + */ + if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) || + auxtrace_event->cpu != sample->cpu) + return 1; + + if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { + /* + * Clamp size in snapshot mode. The buffer size is clamped in + * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect + * the buffer size. + */ + aux_size = min(aux_event->aux_size, auxtrace_event->size); + + /* + * In this mode, the head also points to the end of the buffer so aux_offset + * needs to have the size subtracted so it points to the beginning as in normal mode + */ + aux_offset = aux_event->aux_offset - aux_size; + } else { + aux_size = aux_event->aux_size; + aux_offset = aux_event->aux_offset; + } + + if (aux_offset >= auxtrace_event->offset && + aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { + /* + * If this AUX event was inside this buffer somewhere, create a new auxtrace event + * based on the sizes of the aux event, and queue that fragment. + */ + auxtrace_fragment.auxtrace = *auxtrace_event; + auxtrace_fragment.auxtrace.size = aux_size; + auxtrace_fragment.auxtrace.offset = aux_offset; + file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; + + pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); + return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + } + + /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ + return 1; +} + +static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, + u64 offset __maybe_unused, void *data __maybe_unused) +{ + struct perf_sample sample; + int ret; + struct auxtrace_index_entry *ent; + struct auxtrace_index *auxtrace_index; + struct evsel *evsel; + size_t i; + + /* Don't care about any other events, we're only queuing buffers for AUX events */ + if (event->header.type != PERF_RECORD_AUX) + return 0; + + if (event->header.size < sizeof(struct perf_record_aux)) + return -EINVAL; + + /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ + if (!event->aux.aux_size) + return 0; + + /* + * Parse the sample, we need the sample_id_all data that comes after the event so that the + * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. + */ + evsel = perf_evlist__event2evsel(session->evlist, event); + if (!evsel) + return -EINVAL; + ret = evsel__parse_sample(evsel, event, &sample); + if (ret) + return ret; + + /* + * Loop through the auxtrace index to find the buffer that matches up with this aux event. + */ + list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { + for (i = 0; i < auxtrace_index->nr; i++) { + ent = &auxtrace_index->entries[i]; + ret = cs_etm__queue_aux_fragment(session, ent->file_offset, + ent->sz, &event->aux, &sample); + /* + * Stop search on error or successful values. Continue search on + * 1 ('not found') + */ + if (ret != 1) + return ret; + } + } + + /* + * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but + * don't exit with an error because it will still be possible to decode other aux records. + */ + pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); + return 0; +} + +static int cs_etm__queue_aux_records(struct perf_session *session) +{ + struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, + struct auxtrace_index, list); + if (index && index->nr > 0) + return perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + cs_etm__queue_aux_records_cb, NULL); + + /* + * We would get here if there are no entries in the index (either no auxtrace + * buffers or no index at all). Fail silently as there is the possibility of + * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still + * false. + * + * In that scenario, buffers will not be split by AUX records. + */ + return 0; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2883,7 +3049,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_delete_thread; - err = auxtrace_queues__process_index(&etm->queues, session); + err = cs_etm__queue_aux_records(session); if (err) goto err_delete_thread; -- Gitee From 138bf868bb6cb90869a41c748578849a9dde67b4 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:20 +0800 Subject: [PATCH 15/42] perf cs-etm: Split --dump-raw-trace by AUX records mainline inclusion from mainline-v5.13-rc1 commit 48e8a7b5a551 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=48e8a7b5a551f956002b60d2095bdfb58db96e59 -------------------------------------------------------------------------- Currently --dump-raw-trace skips queueing and splitting buffers because of an early exit condition in cs_etm__process_auxtrace_info(). Once that is removed we can print the split data by using the queues and searching for split buffers with the same reference as the one that is currently being processed. This keeps the same behaviour of dumping in file order when an AUXTRACE event appears, rather than moving trace dump to where AUX records are in the file. There will be a newline and size printout for each fragment. For example this buffer is comprised of two AUX records, but was printed as one: 0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0 offset: 0 ref: 0x491a4dfc52fc0e6e idx: 0 t . ... CoreSight ETM Trace data: size 160 bytes Idx:0; ID:10; I_ASYNC : Alignment Synchronisation. Idx:12; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:17; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000; Idx:80; ID:10; I_ASYNC : Alignment Synchronisation. Idx:92; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:97; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4; But is now printed as two fragments: 0 0 0x8098 [0x30]: PERF_RECORD_AUXTRACE size: 0xa0 offset: 0 ref: 0x491a4dfc52fc0e6e idx: 0 t . ... CoreSight ETM Trace data: size 80 bytes Idx:0; ID:10; I_ASYNC : Alignment Synchronisation. Idx:12; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:17; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0x0000000000000000; . ... CoreSight ETM Trace data: size 80 bytes Idx:80; ID:10; I_ASYNC : Alignment Synchronisation. Idx:92; ID:10; I_TRACE_INFO : Trace Info.; INFO=0x0 { CC.0 } Idx:97; ID:10; I_ADDR_L_64IS0 : Address, Long, 64 bit, IS0.; Addr=0xFFFFDE2AD3FD76D4; Decoding errors that appeared in problematic files are now not present, for example: Idx:808; ID:1c; I_BAD_SEQUENCE : Invalid Sequence in packet.[I_ASYNC] ... PKTP_ETMV4I_0016 : 0x0014 (OCSD_ERR_INVALID_PCKT_HDR) [Invalid packet header]; TrcIdx=822 Signed-off-by: James Clark Reviewed-by: Mathieu Poirier Tested-by: Leo Yan Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Branislav Rankov Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210624164303.28632-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 8907fcb150e3..927f3ab2b485 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session, return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, cs_etm__dump_event(etm, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; } err = cs_etm__synth_events(etm, session); -- Gitee From 227762e6df11ac2eb026049d0356347dd09febc0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 25 Nov 2022 17:50:21 +0800 Subject: [PATCH 16/42] tools headers UAPI: Sync perf_event.h with the kernel sources mainline inclusion from mainline-v5.13-rc1 commit 71d7924b3e8a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=71d7924b3e8acaca6a3b0fc3261170031ada3b70 -------------------------------------------------------------------------- To pick up the changes in: 2b26f0aa004995f4 ("perf: Support only inheriting events if cloned with CLONE_THREAD") 2e498d0a74e5b88a ("perf: Add support for event removal on exec") 547b60988e631f74 ("perf: aux: Add flags for the buffer format") 55bcf6ef314ae8ba ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE") 7dde51767ca5339e ("perf: aux: Add CoreSight PMU buffer formats") 97ba62b278674293 ("perf: Add support for SIGTRAP on perf events") d0d1dd628527c77d ("perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event") Also change the expected sizeof(struct perf_event_attr) from 120 to 128 due to fields being added for the SIGTRAP changes. Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Kan Liang Cc: Marco Elver Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki K Poulose Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/include/uapi/linux/perf_event.h | 74 +++++++++++++++++++++++-- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/base-stat | 2 +- tools/perf/tests/attr/system-wide-dummy | 2 +- 4 files changed, 71 insertions(+), 9 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b95d3c485d27..e0b41a42c524 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,12 +143,16 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, + PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; +#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * @@ -307,6 +311,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -888,7 +893,24 @@ enum perf_event_type { * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * - * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { union perf_sample_weight + * { + * u64 full; && PERF_SAMPLE_WEIGHT + * #if defined(__LITTLE_ENDIAN_BITFIELD) + * struct { + * u32 var1_dw; + * u16 var2_w; + * u16 var3_w; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #elif defined(__BIG_ENDIAN_BITFIELD) + * struct { + * u16 var3_w; + * u16 var2_w; + * u32 var1_dw; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #endif + * } + * } * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi @@ -1101,10 +1123,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) @@ -1123,14 +1150,24 @@ union perf_mem_data_src { mem_lvl_num:4, /* memory hierarchy level number */ mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ +#ifdef __GENKSYMS__ mem_rsvd:24; +#else + mem_blk:3, /* access blocked */ + mem_rsvd:21; +#endif }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { +#ifdef __GENKSYMS__ __u64 mem_rsvd:24, +#else + __u64 mem_rsvd:21, + mem_blk:3, /* access blocked */ +#endif mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ mem_lvl_num:4, /* memory hierarchy level number */ @@ -1213,6 +1250,12 @@ union perf_mem_data_src { #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ #define PERF_MEM_TLB_SHIFT 26 +/* Access blocked */ +#define PERF_MEM_BLK_NA 0x01 /* not available */ +#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) @@ -1244,4 +1287,23 @@ struct perf_branch_entry { reserved:40; }; +union perf_sample_weight { + __u64 full; +#if defined(__LITTLE_ENDIAN_BITFIELD) + struct { + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; + }; +#elif defined(__BIG_ENDIAN_BITFIELD) + struct { + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; + }; +#else +#error "Unknown endianness" +#endif +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 645009c08b3c..4a7b8deef3fd 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=120 +size=128 config=0 sample_period=* sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index b0f42c34882e..408164456530 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=120 +size=128 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy index eba723cc0d38..86a15dd359d9 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/attr/system-wide-dummy @@ -7,7 +7,7 @@ cpu=* pid=-1 flags=8 type=1 -size=120 +size=128 config=9 sample_period=4000 sample_type=455 -- Gitee From 44cbffd07745cf1fd3ac2982caecbf71d82c8910 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:22 +0800 Subject: [PATCH 17/42] perf cs-etm: Refactor initialisation of kernel start address mainline inclusion from mainline-v5.14-rc4 commit 6f38e1158bba category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=6f38e1158bba17c5e45236ac7eedb0a6cbbc2ded -------------------------------------------------------------------------- The kernel start address is already cached in the machine struct once it is initialised, so storing it in the cs_etm struct is unnecessary. It also depends on kernel maps being available to be initialised. Therefore cs_etm__setup_queues() isn't an appropriate place to call it because it could be called before processing starts. It would be better to initialise it at the point when it is needed, then we can be sure that all the necessary maps are available. Also by calling machine__kernel_start() multiple times it can be initialised at some point, even if it failed to initialise previously due to missing maps. In a later commit cs_etm__setup_queues() will be moved which is the motivation for this change. Reviewed-by: Mathieu Poirier Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Al Grant Cc: Anshuman Khandual Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: coresight@lists.linaro.org Link: https://lore.kernel.org/r/20210721150202.32065-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 927f3ab2b485..127b8c734003 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -62,7 +62,6 @@ struct cs_etm_auxtrace { u64 instructions_sample_period; u64 instructions_id; u64 **metadata; - u64 kernel_start; unsigned int pmu_type; }; @@ -691,7 +690,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) { + if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) return PERF_RECORD_MISC_KERNEL; else @@ -901,9 +900,6 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) unsigned int i; int ret; - if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - for (i = 0; i < etm->queues.nr_queues; i++) { ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); if (ret) -- Gitee From 89430d9b2873cc7086821cade96129f4fe6d03a2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:23 +0800 Subject: [PATCH 18/42] perf cs-etm: Split setup and timestamp search functions mainline inclusion from mainline-v5.14-rc4 commit 9ac8afd500e4 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=9ac8afd500e4597edd52b9469368c0126bb42124 -------------------------------------------------------------------------- This refactoring has some benefits: * Decoding is done to find the timestamp. If we want to print errors when maps aren't available, then doing it from cs_etm__setup_queue() may cause warnings to be printed. * The cs_etm__setup_queue() flow is shared between timed and timeless modes, so it needs to be guarded by an if statement which can now be removed. * Allows moving the setup queues function earlier. * If data was piped in, then not all queues would be filled so it wouldn't have worked properly anyway. Now it waits for flush so data in all queues will be available. The motivation for this is to decouple setup functions with ones that involve decoding. That way we can move the setup function earlier when the formatted/unformatted trace information is available. Reviewed-by: Mathieu Poirier Signed-off-by: James Clark Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210721150202.32065-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 41 ++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 127b8c734003..3603606c8b44 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -809,29 +809,32 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, unsigned int queue_nr) { - int ret = 0; - unsigned int cs_queue_nr; - u8 trace_chan_id; - u64 cs_timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) - goto out; + return 0; etmq = cs_etm__alloc_queue(etm); - if (!etmq) { - ret = -ENOMEM; - goto out; - } + if (!etmq) + return -ENOMEM; queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; etmq->offset = 0; - if (etm->timeless_decoding) - goto out; + return 0; +} + +static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, + struct cs_etm_queue *etmq, + unsigned int queue_nr) +{ + int ret = 0; + unsigned int cs_queue_nr; + u8 trace_chan_id; + u64 cs_timestamp; /* * We are under a CPU-wide trace scenario. As such we need to know @@ -2218,13 +2221,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) { int ret = 0; - unsigned int cs_queue_nr, queue_nr; + unsigned int cs_queue_nr, queue_nr, i; u8 trace_chan_id; u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq; + /* + * Pre-populate the heap with one entry from each queue so that we can + * start processing in time order across all queues. + */ + for (i = 0; i < etm->queues.nr_queues; i++) { + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); + if (ret) + return ret; + } + while (1) { if (!etm->heap.heap_cnt) goto out; -- Gitee From 280b3a2663a53e022944ce458a9c33a88b34f238 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:24 +0800 Subject: [PATCH 19/42] perf cs-etm: Only setup queues when they are modified mainline inclusion from mainline-v5.14-rc4 commit ca50db5917cb category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=ca50db5917cbb963e87c925422bee5851f55bd7f -------------------------------------------------------------------------- Continually creating queues in cs_etm__process_event() is unnecessary. They only need to be created when a buffer for a new CPU or thread is encountered. This can be in two places, when building the queues in advance in cs_etm__process_auxtrace_info(), or in cs_etm__process_auxtrace_event() when data_queued is false and the index wasn't available (pipe mode). This change will allow the 'formatted' decoder setting to applied when iterating over aux records in a later commit. Reviewed-by: Mathieu Poirier Signed-off-by: James Clark Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210721150202.32065-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 54 +++++++++++----------------------------- 1 file changed, 14 insertions(+), 40 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 3603606c8b44..dbea207eb596 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -96,7 +96,6 @@ struct cs_etm_queue { /* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list; -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -564,7 +563,6 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - int ret; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -574,11 +572,6 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL; - ret = cs_etm__update_queues(etm); - - if (ret < 0) - return ret; - if (etm->timeless_decoding) return cs_etm__process_timeless_queues(etm, -1); @@ -898,30 +891,6 @@ static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, return ret; } -static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -{ - unsigned int i; - int ret; - - for (i = 0; i < etm->queues.nr_queues; i++) { - ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); - if (ret) - return ret; - } - - return 0; -} - -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) -{ - if (etm->queues.new_data) { - etm->queues.new_data = false; - return cs_etm__setup_queues(etm); - } - - return 0; -} - static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) @@ -2395,7 +2364,6 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - int err = 0; u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2414,12 +2382,6 @@ static int cs_etm__process_event(struct perf_session *session, else sample_kernel_timestamp = 0; - if (sample_kernel_timestamp || etm->timeless_decoding) { - err = cs_etm__update_queues(etm); - if (err) - return err; - } - /* * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because @@ -2476,6 +2438,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, int fd = perf_data__fd(session->data); bool is_pipe = perf_data__is_pipe(session->data); int err; + int idx = event->auxtrace.idx; if (is_pipe) data_offset = 0; @@ -2490,6 +2453,11 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err; + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx); + if (err) + return err; + if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { cs_etm__dump_event(etm, buffer); @@ -2732,6 +2700,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o struct perf_record_auxtrace *auxtrace_event; union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; + __u32 idx; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2793,8 +2762,13 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); - return auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, - file_offset, NULL); + err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + if (err) + return err; + + idx = auxtrace_event->idx; + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx); } /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ -- Gitee From da837ec4bbf4762a9bfbdeaf92db56117d93983f Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:25 +0800 Subject: [PATCH 20/42] perf cs-etm: Suppress printing when resetting decoder mainline inclusion from mainline-v5.14-rc4 commit b8324f490be8 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=b8324f490be886b0e9026f1258fc2db3c9556d86 -------------------------------------------------------------------------- The decoder is quite noisy when being reset. In a future commit, dump-raw-trace will use a code path that resets the decoder rather than creating a new one, so printing has to be suppressed to not flood the output. Reviewed-by: Mathieu Poirier Signed-off-by: James Clark Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210721150202.32065-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 662df00d7bee..f361091e5e98 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -35,6 +35,7 @@ struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); + bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; @@ -74,9 +75,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) ocsd_datapath_resp_t dp_ret; decoder->prev_return = OCSD_RESP_CONT; - + decoder->suppress_printing = true; dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); + decoder->suppress_printing = false; if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) return -1; @@ -146,8 +148,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const char *msg, const int str_len) { - if (p_context && str_len) - ((struct cs_etm_decoder *)p_context)->packet_printer(msg); + const struct cs_etm_decoder *decoder = p_context; + + if (p_context && str_len && !decoder->suppress_printing) + decoder->packet_printer(msg); } static int -- Gitee From 775866ca1b104800f3fc895e4910822454e95253 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:26 +0800 Subject: [PATCH 21/42] perf cs-etm: Use existing decoder instead of resetting it mainline inclusion from mainline-v5.14-rc4 commit 04aaad262c9a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=04aaad262c9aae822c9f108bf6b0ac131e0ec690 -------------------------------------------------------------------------- When dumping trace, the decoder is continually deleted and recreated to decode each buffer. To support both formatted and unformatted trace in a later commit, the decoder will be configured in advance. This commit removes the deletion of the decoder and allows the formatted/unformatted setting to persist. Reviewed-by: Mathieu Poirier Signed-off-by: James Clark Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210721150202.32065-6-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index dbea207eb596..caa198be3102 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -508,14 +508,11 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, return ret; } -static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, +static void cs_etm__dump_event(struct cs_etm_queue *etmq, struct auxtrace_buffer *buffer) { int ret; const char *color = PERF_COLOR_BLUE; - struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; - struct cs_etm_decoder *decoder; size_t buffer_used = 0; fprintf(stdout, "\n"); @@ -523,29 +520,11 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, ". ... CoreSight ETM Trace data: size %zu bytes\n", buffer->size); - /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); - - if (!t_params) - return; - - if (cs_etm__init_trace_params(t_params, etm)) - goto out_free; - - /* Set decoder parameters to simply print the trace packets */ - if (cs_etm__init_decoder_params(&d_params, NULL, - CS_ETM_OPERATION_PRINT)) - goto out_free; - - decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - - if (!decoder) - goto out_free; do { size_t consumed; ret = cs_etm_decoder__process_data_block( - decoder, buffer->offset, + etmq->decoder, buffer->offset, &((u8 *)buffer->data)[buffer_used], buffer->size - buffer_used, &consumed); if (ret) @@ -554,10 +533,7 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, buffer_used += consumed; } while (buffer_used < buffer->size); - cs_etm_decoder__free(decoder); - -out_free: - zfree(&t_params); + cs_etm_decoder__reset(etmq->decoder); } static int cs_etm__flush_events(struct perf_session *session, @@ -769,7 +745,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, - CS_ETM_OPERATION_DECODE)) + dump_trace ? CS_ETM_OPERATION_PRINT : + CS_ETM_OPERATION_DECODE)) goto out_free; etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); @@ -2422,7 +2399,7 @@ static void dump_queued_data(struct cs_etm_auxtrace *etm, for (i = 0; i < etm->queues.nr_queues; ++i) list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) if (buf->reference == event->reference) - cs_etm__dump_event(etm, buf); + cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); } static int cs_etm__process_auxtrace_event(struct perf_session *session, @@ -2460,7 +2437,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { - cs_etm__dump_event(etm, buffer); + cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); auxtrace_buffer__put_data(buffer); } } else if (dump_trace) -- Gitee From abc2922bfd61ecad579aa8cf69ff4b9f124ddf95 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:27 +0800 Subject: [PATCH 22/42] perf cs-etm: Pass unformatted flag to decoder mainline inclusion from mainline-v5.14-rc4 commit 9182f04a85b2 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=9182f04a85b2c2062bbf4f8e53bec9ce698d55f0 -------------------------------------------------------------------------- The TRBE (Trace Buffer Extension) feature allows a separate trace buffer for each trace source, therefore the trace wouldn't need to be formatted. The driver was introduced in commit 3fbf7f011f24 ("coresight: sink: Add TRBE driver"). The formatted/unformatted mode is encoded in one of the flags of the AUX record. The first AUX record encountered for each event is used to determine the mode, and this will persist for the remaining trace that is either decoded or dumped. Reviewed-by: Mathieu Poirier Signed-off-by: James Clark Cc: Al Grant Cc: Alexander Shishkin Cc: Anshuman Khandual Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210721150202.32065-7-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 +- tools/perf/util/cs-etm.c | 53 ++++++++++++++----- 2 files changed, 42 insertions(+), 15 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index f361091e5e98..1e5daf7250df 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -686,7 +686,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, } struct cs_etm_decoder * -cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, +cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params t_params[]) { struct cs_etm_decoder *decoder; @@ -731,7 +731,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, /* init raw frame logging if required */ cs_etm_decoder__init_raw_frame_logging(d_params, decoder); - for (i = 0; i < num_cpu; i++) { + for (i = 0; i < decoders; i++) { ret = cs_etm_decoder__create_etm_decoder(d_params, &t_params[i], decoder); diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index caa198be3102..d7c1421b7b72 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -461,13 +461,14 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, } static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm) + struct cs_etm_auxtrace *etm, + int decoders) { int i; u32 etmidr; u64 architecture; - for (i = 0; i < etm->num_cpu; i++) { + for (i = 0; i < decoders; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC]; switch (architecture) { @@ -488,7 +489,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, - enum cs_etm_decoder_operation mode) + enum cs_etm_decoder_operation mode, + bool formatted) { int ret = -EINVAL; @@ -498,7 +500,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq; - d_params->formatted = true; + d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true; @@ -720,11 +722,17 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + bool formatted) { struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; + /* + * Each queue can only contain data from one CPU when unformatted, so only one decoder is + * needed. + */ + int decoders = formatted ? etm->num_cpu : 1; etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -735,21 +743,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + t_params = zalloc(sizeof(*t_params) * decoders); if (!t_params) goto out_free; - if (cs_etm__init_trace_params(t_params, etm)) + if (cs_etm__init_trace_params(t_params, etm, decoders)) goto out_free; /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, dump_trace ? CS_ETM_OPERATION_PRINT : - CS_ETM_OPERATION_DECODE)) + CS_ETM_OPERATION_DECODE, + formatted)) goto out_free; - etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + etmq->decoder = cs_etm_decoder__new(decoders, &d_params, + t_params); if (!etmq->decoder) goto out_free; @@ -777,14 +787,15 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, - unsigned int queue_nr) + unsigned int queue_nr, + bool formatted) { struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) return 0; - etmq = cs_etm__alloc_queue(etm); + etmq = cs_etm__alloc_queue(etm, formatted); if (!etmq) return -ENOMEM; @@ -2430,8 +2441,14 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err; + /* + * Knowing if the trace is formatted or not requires a lookup of + * the aux record so only works in non-piped mode where data is + * queued in cs_etm__queue_aux_records(). Always assume + * formatted in piped mode (true). + */ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], - idx); + idx, true); if (err) return err; @@ -2678,6 +2695,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o union perf_event auxtrace_fragment; __u64 aux_offset, aux_size; __u32 idx; + bool formatted; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, @@ -2745,7 +2763,9 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o return err; idx = auxtrace_event->idx; - return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx); + formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, formatted); } /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ @@ -3034,6 +3054,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event, goto err_delete_thread; etm->data_queued = etm->queues.populated; + /* + * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and + * cs_etm__queue_aux_fragment() for details relating to limitations. + */ + if (!etm->data_queued) + pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n" + "Continuing with best effort decoding in piped mode.\n\n"); return 0; -- Gitee From f570289e95a9128193b0aec869bba40bf504a2f7 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:28 +0800 Subject: [PATCH 23/42] perf tools: Add flag for tracking warnings of missing DSOs mainline inclusion from mainline-v5.14-rc4 commit 115520495015 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=115520495015591d6a98a5965644cfffd218e399 -------------------------------------------------------------------------- Auxtrace support may need DSOs for decoding (for example Arm Coresight). If one of these is missing it would make sense to warn once for each one that's missing, but not flood the output with every address as there could be thousands of lookups. This flag will allow tracking whether a warning was shown for each DSO. Signed-off-by: James Clark Reviewed-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210729155805.2830-5-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/dso.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index d8cb4f5680a4..b5cfd28d6fc4 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -170,6 +170,7 @@ struct dso { u8 has_srcline:1; u8 hit:1; u8 annotate_warned:1; + u8 auxtrace_warned:1; u8 short_name_allocated:1; u8 long_name_allocated:1; u8 is_64_bit:1; -- Gitee From eab6624fe1911d1b2c396331bfdee7155b0375bc Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:29 +0800 Subject: [PATCH 24/42] perf tools: Add WARN_ONCE equivalent for UI warnings mainline inclusion from mainline-v5.14-rc4 commit 1094795eb9f2 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1094795eb9f2b17678d944eb38d45af8f8f67362 -------------------------------------------------------------------------- Currently WARN_ONCE prints to stderr and corrupts the TUI. Add equivalent methods for UI warnings. Signed-off-by: James Clark Reviewed-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210729155805.2830-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/debug.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f1734abd98dd..5b04117fe0d3 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -21,6 +21,13 @@ extern int debug_data_convert; eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning_once(fmt, ...) ({ \ + static int __warned; \ + if (unlikely(!__warned)) { \ + pr_warning(fmt, ##__VA_ARGS__); \ + __warned = 1; \ + } \ +}) #define pr_info(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug(fmt, ...) \ @@ -54,6 +61,13 @@ void trace_event(union perf_event *event); int ui__error(const char *format, ...) __printf(1, 2); int ui__warning(const char *format, ...) __printf(1, 2); +#define ui__warning_once(format, ...) ({ \ + static int __warned; \ + if (unlikely(!__warned)) { \ + ui__warning(format, ##__VA_ARGS__); \ + __warned = 1; \ + } \ +}) void pr_stat(const char *fmt, ...); -- Gitee From 7bad5246009598588418343626c2fd194f978456 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:30 +0800 Subject: [PATCH 25/42] perf annotate: Re-add annotate_warned functionality mainline inclusion from mainline-v5.14-rc4 commit 3d8b92472ae7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=3d8b92472ae7ba91d759cadb4670bd492ef97d04 -------------------------------------------------------------------------- Setting annotate_warned to true on errors was removed in commit ee51d851392e ("perf annotate: Introduce strerror for handling symbol__disassemble() errors") which means when 'perf annotate --skip-missing' is used warnings are shown multiple times for the same DSO. Setting this again restores the original behavior of only one warning each. Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210729155805.2830-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/ui/browsers/annotate.c | 1 + tools/perf/ui/gtk/annotate.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index bd77825fd5a1..79649b2aed5d 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -919,6 +919,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, err = symbol__annotate2(ms, evsel, opts, &browser.arch); if (err) { char msg[BUFSIZ]; + ms->map->dso->annotate_warned = true; symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); ui__error("Couldn't annotate %s:\n%s", sym->name, msg); goto out_free_offsets; diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index a7dff77f2018..5a1d30d14c1c 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -177,6 +177,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, err = symbol__annotate(ms, evsel, &annotation__default_options, NULL); if (err) { char msg[BUFSIZ]; + ms->map->dso->annotate_warned = true; symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); ui__error("Couldn't annotate %s: %s\n", sym->name, msg); return -1; -- Gitee From 72705c2060cab27ae7af74bb35cf4bd031ed8da6 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:31 +0800 Subject: [PATCH 26/42] perf annotate: Add disassembly warnings for annotate --stdio mainline inclusion from mainline-v5.14-rc4 commit 243c3a3eb4e0 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=243c3a3eb4e03201dec36f2edfc4fe604d8b9078 -------------------------------------------------------------------------- Currently 'perf annotate --stdio' (and --stdio2) will exit without printing anything if there are disassembly errors. Apply the same error handler that's used for TUI and GTK modes. This makes comparing disassembly across the different modes more consistent. Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210729155805.2830-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/annotate.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e8f188c2de8b..264e5223ac40 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2778,9 +2778,17 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct rb_root source_line = RB_ROOT; struct hists *hists = evsel__hists(evsel); char buf[1024]; + int err; + + err = symbol__annotate2(ms, evsel, opts, NULL); + if (err) { + char msg[BUFSIZ]; - if (symbol__annotate2(ms, evsel, opts, NULL) < 0) + dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; + } if (opts->print_lines) { srcline_full_filename = opts->full_path; @@ -2804,9 +2812,17 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct dso *dso = ms->map->dso; struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; + int err; + + err = symbol__annotate(ms, evsel, opts, NULL); + if (err) { + char msg[BUFSIZ]; - if (symbol__annotate(ms, evsel, opts, NULL) < 0) + dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; + } symbol__calc_percent(sym, evsel); -- Gitee From ddce40427a4d8478bb60a05ecdab282ec1df36a6 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:32 +0800 Subject: [PATCH 27/42] perf cs-etm: Improve Coresight zero timestamp warning mainline inclusion from mainline-v5.14-rc4 commit f3c33cbd9221 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=f3c33cbd9221e543bb4dd3c9028aef758fa2aa0e -------------------------------------------------------------------------- Only show the warning if the user hasn't already set timeless mode and improve the text because there was ambiguity around the meaning of '...' Change the warning to a UI warning instead of printing straight to stderr because this corrupts the UI when perf report TUI is used. The UI warning function also handles printing to stderr when in perf script mode. Suggested-by: Leo Yan Signed-off-by: James Clark Reviewed-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210729155805.2830-6-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 1e5daf7250df..098ac4978790 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -327,8 +327,11 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * underflow. */ packet_queue->cs_timestamp = 0; - WARN_ONCE(true, "Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR - ". Decoding may be improved with --itrace=Z...\n", indx); + if (!cs_etm__etmq_is_timeless(etmq)) + pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR + ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n", + indx); + } else if (packet_queue->instr_count > elem->timestamp) { /* * Sanity check that the elem->timestamp - packet_queue->instr_count would not -- Gitee From 4f0091a36027e0d65080a9171bd1dcebae75ca66 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:33 +0800 Subject: [PATCH 28/42] perf cs-etm: Add warnings for missing DSOs mainline inclusion from mainline-v5.14-rc4 commit 9c38b671ebd5 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=9c38b671ebd5297d861522806e09cf9e639d0af6 -------------------------------------------------------------------------- Currently decode will silently fail if no binary data is available for the decode. This is made worse if only partial data is available because the decode will appear to work, but any trace from that missing DSO will silently not be generated. Add a UI popup once if there is any data missing, and then warn in the bottom left for each individual DSO that's missing. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: http //lore.kernel.org/lkml/20210805130354.878120-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index d7c1421b7b72..3322201a10f9 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -716,8 +716,17 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); - if (len <= 0) + if (len <= 0) { + ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" + " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); + if (!al.map->dso->auxtrace_warned) { + pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", + address, + al.map->dso->long_name ? al.map->dso->long_name : "Unknown"); + al.map->dso->auxtrace_warned = true; + } return 0; + } return len; } -- Gitee From 437fafa1291f386484c4ea98e5b07d32ae0926a9 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:34 +0800 Subject: [PATCH 29/42] perf cs-etm: Refactor initialisation of decoder params. mainline inclusion from mainline-v5.14-rc4 commit 991f69e9e0bb category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=991f69e9e0bb33e4917485bead62b51f0afefac0 -------------------------------------------------------------------------- The initialisation of the decoder params is duplicated between creation of the packet printer and packet decoder. Put them both into one function so that future changes only need to be made in one place. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 99 +++++-------------- 1 file changed, 25 insertions(+), 74 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 098ac4978790..40178bc10d0e 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -227,55 +227,6 @@ cs_etm_decoder__init_raw_frame_logging( } #endif -static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, - const char *decoder_name, - void *trace_config) -{ - u8 csid; - - if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, - OCSD_CREATE_FLG_PACKET_PROC, - trace_config, &csid)) - return -1; - - if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) - return -1; - - return 0; -} - -static int -cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - const char *decoder_name; - ocsd_etmv3_cfg config_etmv3; - ocsd_etmv4_cfg trace_config_etmv4; - void *trace_config; - - switch (t_params->protocol) { - case CS_ETM_PROTO_ETMV3: - case CS_ETM_PROTO_PTM: - cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? - OCSD_BUILTIN_DCD_ETMV3 : - OCSD_BUILTIN_DCD_PTM; - trace_config = &config_etmv3; - break; - case CS_ETM_PROTO_ETMV4i: - cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; - trace_config = &trace_config_etmv4; - break; - default: - return -1; - } - - return cs_etm_decoder__create_packet_printer(decoder, - decoder_name, - trace_config); -} - static ocsd_datapath_resp_t cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, struct cs_etm_packet_queue *packet_queue, @@ -631,9 +582,10 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( return resp; } -static int cs_etm_decoder__create_etm_packet_decoder( - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) { const char *decoder_name; ocsd_etmv3_cfg config_etmv3; @@ -659,31 +611,30 @@ static int cs_etm_decoder__create_etm_packet_decoder( return -1; } - if (ocsd_dt_create_decoder(decoder->dcd_tree, - decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, - trace_config, &csid)) - return -1; + if (d_params->operation == CS_ETM_OPERATION_DECODE) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, + decoder_name, + OCSD_CREATE_FLG_FULL_DECODER, + trace_config, &csid)) + return -1; - if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, - cs_etm_decoder__gen_trace_elem_printer, - decoder)) - return -1; + if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, + decoder)) + return -1; - return 0; -} + return 0; + } else if (d_params->operation == CS_ETM_OPERATION_PRINT) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, + OCSD_CREATE_FLG_PACKET_PROC, + trace_config, &csid)) + return -1; -static int -cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - if (d_params->operation == CS_ETM_OPERATION_PRINT) - return cs_etm_decoder__create_etm_packet_printer(t_params, - decoder); - else if (d_params->operation == CS_ETM_OPERATION_DECODE) - return cs_etm_decoder__create_etm_packet_decoder(t_params, - decoder); + if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) + return -1; + + return 0; + } return -1; } -- Gitee From b3279e0670e8c690c5fbea4e040a20e241c998b2 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:35 +0800 Subject: [PATCH 30/42] perf cs-etm: Initialise architecture based on TRCIDR1 mainline inclusion from mainline-v5.14-rc4 commit f4aef1ea2663 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=f4aef1ea2663ac7efddd796970678911e56ea1f7 -------------------------------------------------------------------------- Currently the architecture is hard coded as ARCH_V8, but from ETMv4.4 onwards this should be ARCH_AA64. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 40178bc10d0e..48e32c5b68fa 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -126,6 +126,21 @@ static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, return 0; } +#define TRCIDR1_TRCARCHMIN_SHIFT 4 +#define TRCIDR1_TRCARCHMIN_MASK GENMASK(7, 4) +#define TRCIDR1_TRCARCHMIN(x) (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT) + +static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1) +{ + /* + * For ETMv4 if the trace minor version is 4 or more then we can assume + * the architecture is ARCH_AA64 rather than just V8. + * ARCH_V8 = V8 architecture + * ARCH_AA64 = Min v8r3 plus additional AA64 PE features + */ + return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8; +} + static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, ocsd_etmv4_cfg *config) { @@ -140,7 +155,7 @@ static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, config->reg_idr11 = 0; config->reg_idr12 = 0; config->reg_idr13 = 0; - config->arch_ver = ARCH_V8; + config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1); config->core_prof = profile_CortexA; } -- Gitee From 74b93285b06e6abb506642ab0ed25124b55884ef Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:36 +0800 Subject: [PATCH 31/42] perf cs-etm: Refactor out ETMv4 header saving mainline inclusion from mainline-v5.14-rc4 commit c9ccc96bf6f2 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=c9ccc96bf6f28d83ef497ee985b8f6ffd493de2a -------------------------------------------------------------------------- Extract a function for saving the ETMv4 header because this will be used for ETE in a later commit. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/arch/arm/util/cs-etm.c | 46 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 435aaf7e7de1..d9305a850c0d 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -603,6 +603,28 @@ static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path) return val; } +static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + /* Get trace configuration register */ + data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr); + /* Get traceID from the framework */ + data[CS_ETMV4_TRCTRACEIDR] = coresight_get_trace_id(cpu); + /* Get read-only information from sysFS */ + data[CS_ETMV4_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); + data[CS_ETMV4_TRCIDR1] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR1]); + data[CS_ETMV4_TRCIDR2] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); + data[CS_ETMV4_TRCIDR8] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); + data[CS_ETMV4_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]); +} + static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) @@ -616,29 +638,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* first see what kind of tracer this cpu is affined to */ if (cs_etm_is_etmv4(itr, cpu)) { magic = __perf_cs_etmv4_magic; - /* Get trace configuration register */ - info->priv[*offset + CS_ETMV4_TRCCONFIGR] = - cs_etmv4_get_config(itr); - /* Get traceID from the framework */ - info->priv[*offset + CS_ETMV4_TRCTRACEIDR] = - coresight_get_trace_id(cpu); - /* Get read-only information from sysFS */ - info->priv[*offset + CS_ETMV4_TRCIDR0] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); - info->priv[*offset + CS_ETMV4_TRCIDR1] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR1]); - info->priv[*offset + CS_ETMV4_TRCIDR2] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); - info->priv[*offset + CS_ETMV4_TRCIDR8] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); - info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro - [CS_ETMV4_TRCAUTHSTATUS]); + cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); /* How much space was used */ increment = CS_ETMV4_PRIV_MAX; -- Gitee From 35fea7218bca131db323e0f04d5817e21af11c46 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:37 +0800 Subject: [PATCH 32/42] perf cs-etm: Save TRCDEVARCH register mainline inclusion from mainline-v5.14-rc4 commit 51ba8811318a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=51ba8811318ac9a8f800c1d446af3f1ce81ec911 -------------------------------------------------------------------------- When ETE is present save the TRCDEVARCH register and set a new magic number. It will be used to configure the decoder in a later commit. Old versions of perf will not be able to open files with this new magic number, but old files will still work with newer versions of perf. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-5-james.clark@arm.com [ Addressed some cosmetic suggestions by Suzuki Poulouse ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/arch/arm/util/cs-etm.c | 49 +++++++++++++++++++++++++++---- tools/perf/util/cs-etm.c | 15 ++++++++-- tools/perf/util/cs-etm.h | 11 +++++++ 3 files changed, 68 insertions(+), 7 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index d9305a850c0d..755c26488d5c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -47,15 +47,17 @@ static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { [CS_ETM_ETMIDR] = "mgmt/etmidr", }; -static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { +static const char * const metadata_etmv4_ro[] = { [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", + [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch" }; static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); +static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu); static int cs_etm_set_context_id(struct auxtrace_record *itr, struct evsel *evsel, int cpu) @@ -529,7 +531,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, struct evlist *evlist __maybe_unused) { int i; - int etmv3 = 0, etmv4 = 0; + int etmv3 = 0, etmv4 = 0, ete = 0; struct perf_cpu_map *event_cpus = evlist->core.cpus; struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); @@ -540,7 +542,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, !cpu_map__has(online_cpus, i)) continue; - if (cs_etm_is_etmv4(itr, i)) + if (cs_etm_is_ete(itr, i)) + ete++; + else if (cs_etm_is_etmv4(itr, i)) etmv4++; else etmv3++; @@ -551,7 +555,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, if (!cpu_map__has(online_cpus, i)) continue; - if (cs_etm_is_etmv4(itr, i)) + if (cs_etm_is_ete(itr, i)) + ete++; + else if (cs_etm_is_etmv4(itr, i)) etmv4++; else etmv3++; @@ -561,6 +567,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, perf_cpu_map__put(online_cpus); return (CS_ETM_HEADER_SIZE + + (ete * CS_ETE_PRIV_SIZE) + (etmv4 * CS_ETMV4_PRIV_SIZE) + (etmv3 * CS_ETMV3_PRIV_SIZE)); } @@ -603,6 +610,27 @@ static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path) return val; } +#define TRCDEVARCH_ARCHPART_SHIFT 0 +#define TRCDEVARCH_ARCHPART_MASK GENMASK(11, 0) +#define TRCDEVARCH_ARCHPART(x) (((x) & TRCDEVARCH_ARCHPART_MASK) >> TRCDEVARCH_ARCHPART_SHIFT) + +#define TRCDEVARCH_ARCHVER_SHIFT 12 +#define TRCDEVARCH_ARCHVER_MASK GENMASK(15, 12) +#define TRCDEVARCH_ARCHVER(x) (((x) & TRCDEVARCH_ARCHVER_MASK) >> TRCDEVARCH_ARCHVER_SHIFT) + +static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + + /* + * ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13. + * See ETM_DEVARCH_ETE_ARCH in coresight-etm4x.h + */ + return TRCDEVARCH_ARCHVER(trcdevarch) == 5 && TRCDEVARCH_ARCHPART(trcdevarch) == 0xA13; +} + static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, int cpu) { struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); @@ -636,7 +664,18 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; /* first see what kind of tracer this cpu is affined to */ - if (cs_etm_is_etmv4(itr, cpu)) { + if (cs_etm_is_ete(itr, cpu)) { + magic = __perf_cs_ete_magic; + /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ + cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); + info->priv[*offset + CS_ETE_TRCDEVARCH] = + cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + + /* How much space was used */ + increment = CS_ETE_PRIV_MAX; + nr_trc_params = CS_ETE_PRIV_MAX - CS_ETM_COMMON_BLK_MAX_V1; + } else if (cs_etm_is_etmv4(itr, cpu)) { magic = __perf_cs_etmv4_magic; cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 3322201a10f9..927bedd4c83a 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2521,6 +2521,7 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n" }; static const char * const param_unk_fmt = @@ -2580,10 +2581,15 @@ static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); } - } else if (magic == __perf_cs_etmv4_magic) { + } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) { + /* + * ETE and ETMv4 can be printed in the same block because the number of parameters + * is saved and they share the list of parameter names. ETE is also only supported + * in V1 files. + */ for (j = 0; j < total_params; j++, i++) { /* if newer record - could be excess params */ - if (j >= CS_ETMV4_PRIV_MAX) + if (j >= CS_ETE_PRIV_MAX) fprintf(stdout, param_unk_fmt, j, val[i]); else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); @@ -2952,6 +2958,11 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* The traceID is our handle */ trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else if (ptr[i] == __perf_cs_ete_magic) { + metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); + + /* ETE shares first part of metadata with ETMv4 */ + trcidr_idx = CS_ETMV4_TRCTRACEIDR; } if (!metadata[j]) { diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index e2841ac9676d..f54834f6f9e3 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -75,6 +75,15 @@ enum { /* define fixed version 0 length - allow new format reader to read old files. */ #define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) +/* + * ETE metadata is ETMv4 plus TRCDEVARCH register and doesn't support header V0 since it was + * added in header V1 + */ +enum { + CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX, + CS_ETE_PRIV_MAX +}; + /* * ETMv3 exception encoding number: * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) @@ -186,8 +195,10 @@ struct cs_etm_packet_queue { #define __perf_cs_etmv3_magic 0x3030303030303030ULL #define __perf_cs_etmv4_magic 0x4040404040404040ULL +#define __perf_cs_ete_magic 0x5050505050505050ULL #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +#define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64)) #ifdef HAVE_CSTRACE_SUPPORT int cs_etm__process_auxtrace_info(union perf_event *event, -- Gitee From f2977cfe1df01bbc06c5ebb0226db4521d35dd77 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:38 +0800 Subject: [PATCH 33/42] perf cs-etm: Fix typo mainline inclusion from mainline-v5.14-rc4 commit 050a0fc4edc7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=050a0fc4edc75a97b4dba7b834fd6bf243bf06ed -------------------------------------------------------------------------- TRCIRD2 should be TRCIDR2 Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-6-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/arch/arm/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 755c26488d5c..c80c34e71a82 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -75,7 +75,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, if (!cs_etm_is_etmv4(itr, cpu)) goto out; - /* Get a handle on TRCIRD2 */ + /* Get a handle on TRCIDR2 */ snprintf(path, PATH_MAX, "cpu%d/%s", cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); -- Gitee From 80a5f5bccba7678b0b4998707240da1b30fb4741 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:39 +0800 Subject: [PATCH 34/42] perf cs-etm: Update OpenCSD decoder for ETE mainline inclusion from mainline-v5.14-rc4 commit 212095f7ca4a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=212095f7ca4a5182487ae12b62a8616ed87d617e -------------------------------------------------------------------------- OpenCSD v1.1.1 has a bug fix for the installation of the ETE decoder headers. This also means that including headers separately for each decoder is unnecessary so remove these. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-7-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/build/feature/test-libopencsd.c | 4 ++-- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index 52c790b0317b..eb6303ff446e 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((1 << 16) | (0 << 8) | (0)) +#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 1.0.0 is required" +#error "OpenCSD >= 1.1.1 is required" #endif int main(void) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 48e32c5b68fa..eae3ec6725c0 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -13,8 +13,6 @@ #include #include #include -#include -#include #include "cs-etm.h" #include "cs-etm-decoder.h" -- Gitee From e1e7319627583338db486604ff61ed158a7e2dcd Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:40 +0800 Subject: [PATCH 35/42] perf cs-etm: Create ETE decoder mainline inclusion from mainline-v5.14-rc4 commit 779f414a4849 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=779f414a4849fb3e650cad8525c84e76fdd3c0ea -------------------------------------------------------------------------- If the magic number indicates ETE instantiate a OCSD_BUILTIN_DCD_ETE decoder instead of OCSD_BUILTIN_DCD_ETMV4I. ETE is the new trace feature for Armv9. Testing performed ================= * Old files with v0 and v1 headers for ETMv4 still open correctly * New files with new magic number open on new versions of perf * New files with new magic number fail to open on old versions of perf * Decoding with the ETE decoder results in the same output as the ETMv4 decoder as long as there are no new ETE packet types Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Suzuki Poulouse Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-8-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 20 +++++++++++++++++++ .../perf/util/cs-etm-decoder/cs-etm-decoder.h | 12 +++++++++++ tools/perf/util/cs-etm.c | 18 +++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index eae3ec6725c0..5cabe2e06d69 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -157,6 +157,20 @@ static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, config->core_prof = profile_CortexA; } +static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params, + ocsd_ete_cfg *config) +{ + config->reg_configr = params->ete.reg_configr; + config->reg_traceidr = params->ete.reg_traceidr; + config->reg_idr0 = params->ete.reg_idr0; + config->reg_idr1 = params->ete.reg_idr1; + config->reg_idr2 = params->ete.reg_idr2; + config->reg_idr8 = params->ete.reg_idr8; + config->reg_devarch = params->ete.reg_devarch; + config->arch_ver = ARCH_AA64; + config->core_prof = profile_CortexA; +} + static void cs_etm_decoder__print_str_cb(const void *p_context, const char *msg, const int str_len) @@ -603,6 +617,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, const char *decoder_name; ocsd_etmv3_cfg config_etmv3; ocsd_etmv4_cfg trace_config_etmv4; + ocsd_ete_cfg trace_config_ete; void *trace_config; u8 csid; @@ -620,6 +635,11 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, decoder_name = OCSD_BUILTIN_DCD_ETMV4I; trace_config = &trace_config_etmv4; break; + case CS_ETM_PROTO_ETE: + cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete); + decoder_name = OCSD_BUILTIN_DCD_ETE; + trace_config = &trace_config_ete; + break; default: return -1; } diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 11f3391d06f2..0102ece5ca3e 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -37,11 +37,22 @@ struct cs_etmv4_trace_params { u32 reg_traceidr; }; +struct cs_ete_trace_params { + u32 reg_idr0; + u32 reg_idr1; + u32 reg_idr2; + u32 reg_idr8; + u32 reg_configr; + u32 reg_traceidr; + u32 reg_devarch; +}; + struct cs_etm_trace_params { int protocol; union { struct cs_etmv3_trace_params etmv3; struct cs_etmv4_trace_params etmv4; + struct cs_ete_trace_params ete; }; }; @@ -65,6 +76,7 @@ enum { CS_ETM_PROTO_ETMV4i, CS_ETM_PROTO_ETMV4d, CS_ETM_PROTO_PTM, + CS_ETM_PROTO_ETE }; enum cs_etm_decoder_operation { diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 927bedd4c83a..cb9cae0536bc 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -460,6 +460,21 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; } +static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = CS_ETM_PROTO_ETE; + t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; + t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; + t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; + t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; + t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; + t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; +} + static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, struct cs_etm_auxtrace *etm, int decoders) @@ -479,6 +494,9 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, case __perf_cs_etmv4_magic: cs_etm__set_trace_param_etmv4(t_params, etm, i); break; + case __perf_cs_ete_magic: + cs_etm__set_trace_param_ete(t_params, etm, i); + break; default: return -EINVAL; } -- Gitee From acf442a2f21bb36aa952b5d75883f52809df508f Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:41 +0800 Subject: [PATCH 36/42] perf cs-etm: Print the decoder name mainline inclusion from mainline-v5.14-rc4 commit 56c62f52b6f2 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=56c62f52b6f25802a2154243b6dacdc6ff4f75bd -------------------------------------------------------------------------- Use the real name of the decoder instead of hard-coding "ETM" to avoid confusion when the trace is ETE. This also now distinguishes between ETMv3 and ETMv4. Reviewed-by: Leo Yan Reviewed-by: Suzuki Poulouse Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-9-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 17 +++++++++++------ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 + tools/perf/util/cs-etm.c | 4 ++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 5cabe2e06d69..0b369969b26c 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -37,6 +37,7 @@ struct cs_etm_decoder { dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; + const char *decoder_name; }; static u32 @@ -614,7 +615,6 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params *t_params, struct cs_etm_decoder *decoder) { - const char *decoder_name; ocsd_etmv3_cfg config_etmv3; ocsd_etmv4_cfg trace_config_etmv4; ocsd_ete_cfg trace_config_ete; @@ -625,19 +625,19 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, case CS_ETM_PROTO_ETMV3: case CS_ETM_PROTO_PTM: cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? + decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? OCSD_BUILTIN_DCD_ETMV3 : OCSD_BUILTIN_DCD_PTM; trace_config = &config_etmv3; break; case CS_ETM_PROTO_ETMV4i: cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I; trace_config = &trace_config_etmv4; break; case CS_ETM_PROTO_ETE: cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete); - decoder_name = OCSD_BUILTIN_DCD_ETE; + decoder->decoder_name = OCSD_BUILTIN_DCD_ETE; trace_config = &trace_config_ete; break; default: @@ -646,7 +646,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, if (d_params->operation == CS_ETM_OPERATION_DECODE) { if (ocsd_dt_create_decoder(decoder->dcd_tree, - decoder_name, + decoder->decoder_name, OCSD_CREATE_FLG_FULL_DECODER, trace_config, &csid)) return -1; @@ -658,7 +658,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, return 0; } else if (d_params->operation == CS_ETM_OPERATION_PRINT) { - if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, OCSD_CREATE_FLG_PACKET_PROC, trace_config, &csid)) return -1; @@ -790,3 +790,8 @@ void cs_etm_decoder__free(struct cs_etm_decoder *decoder) decoder->dcd_tree = NULL; free(decoder); } + +const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder) +{ + return decoder->decoder_name; +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 0102ece5ca3e..92a855fbe5b8 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -104,5 +104,6 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, struct cs_etm_packet *packet); int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); +const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder); #endif /* INCLUDE__CS_ETM_DECODER_H__ */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index cb9cae0536bc..925c56e1bf9d 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -537,8 +537,8 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq, fprintf(stdout, "\n"); color_fprintf(stdout, color, - ". ... CoreSight ETM Trace data: size %zu bytes\n", - buffer->size); + ". ... CoreSight %s Trace data: size %zu bytes\n", + cs_etm_decoder__get_name(etmq->decoder), buffer->size); do { size_t consumed; -- Gitee From a944d97e87611410cdf7e4c4ed2734601ff73e72 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:42 +0800 Subject: [PATCH 37/42] perf cs-etm: Show a warning for an unknown magic number mainline inclusion from mainline-v5.14-rc4 commit a80aea64aa07 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=a80aea64aa07361a57f2245a0695878f2ae67ee7 -------------------------------------------------------------------------- Currently perf reports "Cannot allocate memory" which isn't very helpful for a potentially user facing issue. If we add a new magic number in the future, perf will be able to report unrecognised magic numbers. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https //lore.kernel.org/r/20210806134109.1182235-10-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 925c56e1bf9d..76e1b033fdde 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2981,6 +2981,11 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* ETE shares first part of metadata with ETMv4 */ trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else { + ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", + ptr[i]); + err = -EINVAL; + goto err_free_metadata; } if (!metadata[j]) { -- Gitee From e4cf0d62c9fd04d60b384440eefe76d21eca46fd Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Fri, 25 Nov 2022 17:50:43 +0800 Subject: [PATCH 38/42] perf cs-etm: Print size using consistent format mainline inclusion from mainline-v5.15 commit d54e50b7c9a4 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=d54e50b7c9a4a3d30eba5075528c7b0a187667bb -------------------------------------------------------------------------- Since the size is already printed earlier in hex, print the same data using the same format, in hex. Reviewed-by: James Clark Reviewed-by: Leo Yan Reviewed-by: Mathieu Poirier Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211109142153.56546-2-german.gomez@arm.com Signed-off-by: German Gomez Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 76e1b033fdde..d79ce6a5d858 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -537,7 +537,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq, fprintf(stdout, "\n"); color_fprintf(stdout, color, - ". ... CoreSight %s Trace data: size %zu bytes\n", + ". ... CoreSight %s Trace data: size %#zx bytes\n", cs_etm_decoder__get_name(etmq->decoder), buffer->size); do { -- Gitee From 84348fff604cafc2e8b949e3e4471129ff4913d5 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:44 +0800 Subject: [PATCH 39/42] perf cs-etm: Remove duplicate and incorrect aux size checks mainline inclusion from mainline-v5.16-rc5 commit 7cc9680c4be7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=7cc9680c4be7c1da7a3570711f01273b781b936b -------------------------------------------------------------------------- There are two checks, one is for size when running without admin, but this one is covered by the driver and reported on in more detail here (builtin-record.c): pr_err("Permission error mapping pages.\n" "Consider increasing " "/proc/sys/kernel/perf_event_mlock_kb,\n" "or try again with a smaller value of -m/--mmap_pages.\n" "(current value: %u,%u)\n", This had the effect of artificially limiting the aux buffer size to a value smaller than what was allowed because perf_event_mlock_kb wasn't taken into account. The second is to check for a power of two, but this is covered here (evlist.c): pr_info("rounding mmap pages size to %s (%lu pages)\n", buf, pages); Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211208115435.610101-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/arch/arm/util/cs-etm.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index c80c34e71a82..6348f14b5ee2 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -403,25 +403,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, } - /* Validate auxtrace_mmap_pages provided by user */ - if (opts->auxtrace_mmap_pages) { - unsigned int max_page = (KiB(128) / page_size); - size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; - - if (!privileged && - opts->auxtrace_mmap_pages > max_page) { - opts->auxtrace_mmap_pages = max_page; - pr_err("auxtrace too big, truncating to %d\n", - max_page); - } - - if (!is_power_of_2(sz)) { - pr_err("Invalid mmap size for %s: must be a power of 2\n", - CORESIGHT_ETM_PMU_NAME); - return -EINVAL; - } - } - if (opts->auxtrace_snapshot_mode) pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); -- Gitee From 7b8c0a66943e453627425c42a8a6d856602a4341 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:45 +0800 Subject: [PATCH 40/42] perf cs-etm: Update deduction of TRCCONFIGR register for branch broadcast mainline inclusion from mainline-v5.17-rc3 commit aca8af3c2e8c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=aca8af3c2e8cb57662e6035c0ccb3c111a11d97a -------------------------------------------------------------------------- Now that a config flag for branch broadcast has been added, take it into account when trying to deduce what the driver would have programmed the TRCCONFIGR register to. Reviewed-by: Leo Yan Reviewed-by: Mike Leach Reviewed-by: Suzuki Poulouse Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: https://lore.kernel.org/r/20220113091056.1297982-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/include/linux/coresight-pmu.h | 2 ++ tools/perf/arch/arm/util/cs-etm.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 301991bec285..3c52e434bf3a 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -12,6 +12,7 @@ #define CORESIGHT_ETM_CSID_MAX 0x70 /* ETMv3.5/PTM's ETMCR config bit */ +#define ETM_OPT_BRANCH_BROADCAST 8 #define ETM_OPT_CYCACC 12 #define ETM_OPT_CTXTID 14 #define ETM_OPT_CTXTID2 15 @@ -19,6 +20,7 @@ #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ +#define ETM4_CFG_BIT_BB 3 #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 #define ETM4_CFG_BIT_VMID 7 diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 6348f14b5ee2..7af0130b3040 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -504,6 +504,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) if (config_opts & BIT(ETM_OPT_CTXTID2)) config |= BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT); + if (config_opts & BIT(ETM_OPT_BRANCH_BROADCAST)) + config |= BIT(ETM4_CFG_BIT_BB); + return config; } -- Gitee From 8112520e2c46086b9b0af1738db6908dd89160f8 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:46 +0800 Subject: [PATCH 41/42] perf cs-etm: No-op refactor of synth opt usage mainline inclusion from mainline-v5.17-rc4 commit 0b31ea6613ad category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=0b31ea6613ad1e6b9b84d877c1be18e39934e90a -------------------------------------------------------------------------- sample_branches and sample_instructions are already saved in the synth_opts struct. Other usages like synth_opts.last_branch don't save a value, so make this more consistent by always going through synth_opts and not saving duplicate values. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220210200620.1227232-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index d79ce6a5d858..be699e61f1bf 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -50,8 +50,6 @@ struct cs_etm_auxtrace { u8 timeless_decoding; u8 snapshot_mode; u8 data_queued; - u8 sample_branches; - u8 sample_instructions; int num_cpu; u64 latest_kernel_timestamp; @@ -410,8 +408,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, { struct cs_etm_packet *tmp; - if (etm->sample_branches || etm->synth_opts.last_branch || - etm->sample_instructions) { + if (etm->synth_opts.branches || etm->synth_opts.last_branch || + etm->synth_opts.instructions) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. @@ -1365,7 +1363,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; id += 1; @@ -1389,7 +1386,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_instructions = true; etm->instructions_sample_type = attr.sample_type; etm->instructions_id = id; id += 1; @@ -1420,7 +1416,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, tidq->prev_packet->last_instr_taken_branch) cs_etm__update_last_branch_rb(etmq, tidq); - if (etm->sample_instructions && + if (etm->synth_opts.instructions && tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically @@ -1503,7 +1499,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->sample_branches) { + if (etm->synth_opts.branches) { bool generate_sample = false; /* Generate sample for tracing on packet */ @@ -1582,7 +1578,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } - if (etm->sample_branches && + if (etm->synth_opts.branches && tidq->prev_packet->sample_type == CS_ETM_RANGE) { err = cs_etm__synth_branch_sample(etmq, tidq); if (err) -- Gitee From 59ac15aa5d5a04d5f3fb248e74aed4fc2673b3e1 Mon Sep 17 00:00:00 2001 From: James Clark Date: Fri, 25 Nov 2022 17:50:47 +0800 Subject: [PATCH 42/42] perf cs-etm: Fix corrupt inject files when only last branch option is enabled mainline inclusion from mainline-v5.17-rc4 commit 9de0736973dd category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I636PS CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=9de0736973dd7f0f710d0f5e0a3dfd9fa9ffeb3f -------------------------------------------------------------------------- 'perf inject' with Coresight data generates files that cannot be opened when only the last branch option is specified: perf inject -i perf.data --itrace=l -o inject.data perf script -i inject.data 0x33faa8 [0x8]: failed to process type: 9 [Bad address] This is because cs_etm__synth_instruction_sample() is called even when the sample type for instructions hasn't been setup. Last branch records are attached to instruction samples so it doesn't make sense to generate them when --itrace=i isn't specified anyway. This change disables all calls of cs_etm__synth_instruction_sample() unless --itrace=i is specified, resulting in a file with no samples if only --itrace=l is provided, rather than a bad file. Reviewed-by: Leo Yan Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Mathieu Poirier Cc: Mike Leach Cc: Namhyung Kim Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220210200620.1227232-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Junhao He --- tools/perf/util/cs-etm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index be699e61f1bf..88fee0935036 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -1553,6 +1553,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, goto swap_packet; if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; @@ -1610,6 +1611,7 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, * the trace. */ if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; -- Gitee