diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index 52c790b0317b1ee6506c2bfc5b3bb86745213539..eb6303ff446ed93aadaeaf1ee553515eb8f39a05 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((1 << 16) | (0 << 8) | (0)) +#define OCSD_MIN_VER ((1 << 16) | (1 << 8) | (1)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 1.0.0 is required" +#error "OpenCSD >= 1.1.1 is required" #endif int main(void) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 0c2cee05c4e3046badf95ad3bcdc2ac6b425822f..3c52e434bf3acf6a778932ed94ffa22885fdd910 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -12,16 +12,21 @@ #define CORESIGHT_ETM_CSID_MAX 0x70 /* ETMv3.5/PTM's ETMCR config bit */ +#define ETM_OPT_BRANCH_BROADCAST 8 #define ETM_OPT_CYCACC 12 #define ETM_OPT_CTXTID 14 +#define ETM_OPT_CTXTID2 15 #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ +#define ETM4_CFG_BIT_BB 3 #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 +#define ETM4_CFG_BIT_VMID 7 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 +#define ETM4_CFG_BIT_VMID_OPT 15 static inline int coresight_get_trace_id(int cpu) { diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b95d3c485d27e10f5a1b04354a1bb4ba7cc41146..e0b41a42c524ac7ca477c1b3bfc4b4854dcf7d55 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -143,12 +143,16 @@ enum perf_event_sample_format { PERF_SAMPLE_PHYS_ADDR = 1U << 19, PERF_SAMPLE_AUX = 1U << 20, PERF_SAMPLE_CGROUP = 1U << 21, + PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22, + PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23, + PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24, - PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */ }; +#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) /* * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set * @@ -307,6 +311,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -888,7 +893,24 @@ enum perf_event_type { * char data[size]; * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * - * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { union perf_sample_weight + * { + * u64 full; && PERF_SAMPLE_WEIGHT + * #if defined(__LITTLE_ENDIAN_BITFIELD) + * struct { + * u32 var1_dw; + * u16 var2_w; + * u16 var3_w; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #elif defined(__BIG_ENDIAN_BITFIELD) + * struct { + * u16 var3_w; + * u16 var2_w; + * u32 var1_dw; + * } && PERF_SAMPLE_WEIGHT_STRUCT + * #endif + * } + * } * { u64 data_src; } && PERF_SAMPLE_DATA_SRC * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi @@ -1101,10 +1123,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) @@ -1123,14 +1150,24 @@ union perf_mem_data_src { mem_lvl_num:4, /* memory hierarchy level number */ mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ +#ifdef __GENKSYMS__ mem_rsvd:24; +#else + mem_blk:3, /* access blocked */ + mem_rsvd:21; +#endif }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { +#ifdef __GENKSYMS__ __u64 mem_rsvd:24, +#else + __u64 mem_rsvd:21, + mem_blk:3, /* access blocked */ +#endif mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ mem_lvl_num:4, /* memory hierarchy level number */ @@ -1213,6 +1250,12 @@ union perf_mem_data_src { #define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ #define PERF_MEM_TLB_SHIFT 26 +/* Access blocked */ +#define PERF_MEM_BLK_NA 0x01 /* not available */ +#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) @@ -1244,4 +1287,23 @@ struct perf_branch_entry { reserved:40; }; +union perf_sample_weight { + __u64 full; +#if defined(__LITTLE_ENDIAN_BITFIELD) + struct { + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; + }; +#elif defined(__BIG_ENDIAN_BITFIELD) + struct { + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; + }; +#else +#error "Unknown endianness" +#endif +}; + #endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 079cdfabb35202968d44322ef7b49eb09504ea1f..9d5a2a222fa0d5e939c02ae02efc58bce97c2419 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -20,6 +20,7 @@ L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + Z prefer to ignore timestamps (so-called "timeless" decoding) The default is all events i.e. the same as --itrace=ibxwpe, except for perf script where it is --itrace=ce diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index cad7bf78341381649ec4e253a0c976461238c1e0..7af0130b3040a39bb675b7d73b709cb82a50b5d4 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -38,8 +38,6 @@ struct cs_etm_recording { struct auxtrace_record itr; struct perf_pmu *cs_etm_pmu; struct evlist *evlist; - int wrapped_cnt; - bool *wrapped; bool snapshot_mode; size_t snapshot_size; }; @@ -49,15 +47,17 @@ static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = { [CS_ETM_ETMIDR] = "mgmt/etmidr", }; -static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = { +static const char * const metadata_etmv4_ro[] = { [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0", [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1", [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", + [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch" }; static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); +static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu); static int cs_etm_set_context_id(struct auxtrace_record *itr, struct evsel *evsel, int cpu) @@ -67,6 +67,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, char path[PATH_MAX]; int err = -EINVAL; u32 val; + u64 contextid; ptr = container_of(itr, struct cs_etm_recording, itr); cs_etm_pmu = ptr->cs_etm_pmu; @@ -74,7 +75,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, if (!cs_etm_is_etmv4(itr, cpu)) goto out; - /* Get a handle on TRCIRD2 */ + /* Get a handle on TRCIDR2 */ snprintf(path, PATH_MAX, "cpu%d/%s", cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val); @@ -86,25 +87,59 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, goto out; } + /* User has configured for PID tracing, respects it. */ + contextid = evsel->core.attr.config & + (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2)); + /* - * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing - * is supported: - * 0b00000 Context ID tracing is not supported. - * 0b00100 Maximum of 32-bit Context ID size. - * All other values are reserved. + * If user doesn't configure the contextid format, parse PMU format and + * enable PID tracing according to the "contextid" format bits: + * + * If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1; + * If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2. */ - val = BMVAL(val, 5, 9); - if (!val || val != 0x4) { - err = -EINVAL; - goto out; + if (!contextid) + contextid = perf_pmu__format_bits(&cs_etm_pmu->format, + "contextid"); + + if (contextid & BIT(ETM_OPT_CTXTID)) { + /* + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID + * tracing is supported: + * 0b00000 Context ID tracing is not supported. + * 0b00100 Maximum of 32-bit Context ID size. + * All other values are reserved. + */ + val = BMVAL(val, 5, 9); + if (!val || val != 0x4) { + pr_err("%s: CONTEXTIDR_EL1 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } + } + + if (contextid & BIT(ETM_OPT_CTXTID2)) { + /* + * TRCIDR2.VMIDOPT[30:29] != 0 and + * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid) + * We can't support CONTEXTIDR in VMID if the size of the + * virtual context id is < 32bit. + * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us. + */ + if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) { + pr_err("%s: CONTEXTIDR_EL2 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } } /* All good, let the kernel know */ - evsel->core.attr.config |= (1 << ETM_OPT_CTXTID); + evsel->core.attr.config |= contextid; err = 0; out: - return err; } @@ -169,17 +204,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_OPT_CTXTID) { + if (option & BIT(ETM_OPT_CTXTID)) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_OPT_TS) { + if (option & BIT(ETM_OPT_TS)) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) + if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS))) /* Nothing else is currently supported */ goto out; } @@ -368,25 +403,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, } - /* Validate auxtrace_mmap_pages provided by user */ - if (opts->auxtrace_mmap_pages) { - unsigned int max_page = (KiB(128) / page_size); - size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; - - if (!privileged && - opts->auxtrace_mmap_pages > max_page) { - opts->auxtrace_mmap_pages = max_page; - pr_err("auxtrace too big, truncating to %d\n", - max_page); - } - - if (!is_power_of_2(sz)) { - pr_err("Invalid mmap size for %s: must be a power of 2\n", - CORESIGHT_ETM_PMU_NAME); - return -EINVAL; - } - } - if (opts->auxtrace_snapshot_mode) pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME, opts->auxtrace_snapshot_size); @@ -406,7 +422,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_OPT_CTXTID | ETM_OPT_TS); + BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); if (err) goto out; } @@ -485,6 +501,11 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) config |= BIT(ETM4_CFG_BIT_TS); if (config_opts & BIT(ETM_OPT_RETSTK)) config |= BIT(ETM4_CFG_BIT_RETSTK); + if (config_opts & BIT(ETM_OPT_CTXTID2)) + config |= BIT(ETM4_CFG_BIT_VMID) | + BIT(ETM4_CFG_BIT_VMID_OPT); + if (config_opts & BIT(ETM_OPT_BRANCH_BROADCAST)) + config |= BIT(ETM4_CFG_BIT_BB); return config; } @@ -494,7 +515,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, struct evlist *evlist __maybe_unused) { int i; - int etmv3 = 0, etmv4 = 0; + int etmv3 = 0, etmv4 = 0, ete = 0; struct perf_cpu_map *event_cpus = evlist->core.cpus; struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL); @@ -505,7 +526,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, !cpu_map__has(online_cpus, i)) continue; - if (cs_etm_is_etmv4(itr, i)) + if (cs_etm_is_ete(itr, i)) + ete++; + else if (cs_etm_is_etmv4(itr, i)) etmv4++; else etmv3++; @@ -516,7 +539,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, if (!cpu_map__has(online_cpus, i)) continue; - if (cs_etm_is_etmv4(itr, i)) + if (cs_etm_is_ete(itr, i)) + ete++; + else if (cs_etm_is_etmv4(itr, i)) etmv4++; else etmv3++; @@ -526,6 +551,7 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused, perf_cpu_map__put(online_cpus); return (CS_ETM_HEADER_SIZE + + (ete * CS_ETE_PRIV_SIZE) + (etmv4 * CS_ETMV4_PRIV_SIZE) + (etmv3 * CS_ETMV3_PRIV_SIZE)); } @@ -568,45 +594,78 @@ static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path) return val; } +#define TRCDEVARCH_ARCHPART_SHIFT 0 +#define TRCDEVARCH_ARCHPART_MASK GENMASK(11, 0) +#define TRCDEVARCH_ARCHPART(x) (((x) & TRCDEVARCH_ARCHPART_MASK) >> TRCDEVARCH_ARCHPART_SHIFT) + +#define TRCDEVARCH_ARCHVER_SHIFT 12 +#define TRCDEVARCH_ARCHVER_MASK GENMASK(15, 12) +#define TRCDEVARCH_ARCHVER(x) (((x) & TRCDEVARCH_ARCHVER_MASK) >> TRCDEVARCH_ARCHVER_SHIFT) + +static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + + /* + * ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13. + * See ETM_DEVARCH_ETE_ARCH in coresight-etm4x.h + */ + return TRCDEVARCH_ARCHVER(trcdevarch) == 5 && TRCDEVARCH_ARCHPART(trcdevarch) == 0xA13; +} + +static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + /* Get trace configuration register */ + data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr); + /* Get traceID from the framework */ + data[CS_ETMV4_TRCTRACEIDR] = coresight_get_trace_id(cpu); + /* Get read-only information from sysFS */ + data[CS_ETMV4_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); + data[CS_ETMV4_TRCIDR1] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR1]); + data[CS_ETMV4_TRCIDR2] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); + data[CS_ETMV4_TRCIDR8] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); + data[CS_ETMV4_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]); +} + static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) { - u32 increment; + u32 increment, nr_trc_params; u64 magic; struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; /* first see what kind of tracer this cpu is affined to */ - if (cs_etm_is_etmv4(itr, cpu)) { - magic = __perf_cs_etmv4_magic; - /* Get trace configuration register */ - info->priv[*offset + CS_ETMV4_TRCCONFIGR] = - cs_etmv4_get_config(itr); - /* Get traceID from the framework */ - info->priv[*offset + CS_ETMV4_TRCTRACEIDR] = - coresight_get_trace_id(cpu); - /* Get read-only information from sysFS */ - info->priv[*offset + CS_ETMV4_TRCIDR0] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); - info->priv[*offset + CS_ETMV4_TRCIDR1] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR1]); - info->priv[*offset + CS_ETMV4_TRCIDR2] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR2]); - info->priv[*offset + CS_ETMV4_TRCIDR8] = + if (cs_etm_is_ete(itr, cpu)) { + magic = __perf_cs_ete_magic; + /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ + cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); + info->priv[*offset + CS_ETE_TRCDEVARCH] = cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETMV4_TRCIDR8]); - info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro - [CS_ETMV4_TRCAUTHSTATUS]); + metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + + /* How much space was used */ + increment = CS_ETE_PRIV_MAX; + nr_trc_params = CS_ETE_PRIV_MAX - CS_ETM_COMMON_BLK_MAX_V1; + } else if (cs_etm_is_etmv4(itr, cpu)) { + magic = __perf_cs_etmv4_magic; + cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); /* How much space was used */ increment = CS_ETMV4_PRIV_MAX; + nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR; } else { magic = __perf_cs_etmv3_magic; /* Get configuration register */ @@ -624,11 +683,13 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETM_PRIV_MAX; + nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR; } /* Build generic header portion */ info->priv[*offset + CS_ETM_MAGIC] = magic; info->priv[*offset + CS_ETM_CPU] = cpu; + info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params; /* Where the next CPU entry should start from */ *offset += increment; } @@ -674,7 +735,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, /* First fill out the session header */ info->type = PERF_AUXTRACE_CS_ETM; - info->priv[CS_HEADER_VERSION_0] = 0; + info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION; info->priv[CS_PMU_TYPE_CPUS] = type << 32; info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu; info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode; @@ -690,135 +751,6 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, return 0; } -static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx) -{ - bool *wrapped; - int cnt = ptr->wrapped_cnt; - - /* Make @ptr->wrapped as big as @idx */ - while (cnt <= idx) - cnt++; - - /* - * Free'ed in cs_etm_recording_free(). Using realloc() to avoid - * cross compilation problems where the host's system supports - * reallocarray() but not the target. - */ - wrapped = realloc(ptr->wrapped, cnt * sizeof(bool)); - if (!wrapped) - return -ENOMEM; - - wrapped[cnt - 1] = false; - ptr->wrapped_cnt = cnt; - ptr->wrapped = wrapped; - - return 0; -} - -static bool cs_etm_buffer_has_wrapped(unsigned char *buffer, - size_t buffer_size, u64 head) -{ - u64 i, watermark; - u64 *buf = (u64 *)buffer; - size_t buf_size = buffer_size; - - /* - * We want to look the very last 512 byte (chosen arbitrarily) in - * the ring buffer. - */ - watermark = buf_size - 512; - - /* - * @head is continuously increasing - if its value is equal or greater - * than the size of the ring buffer, it has wrapped around. - */ - if (head >= buffer_size) - return true; - - /* - * The value of @head is somewhere within the size of the ring buffer. - * This can be that there hasn't been enough data to fill the ring - * buffer yet or the trace time was so long that @head has numerically - * wrapped around. To find we need to check if we have data at the very - * end of the ring buffer. We can reliably do this because mmap'ed - * pages are zeroed out and there is a fresh mapping with every new - * session. - */ - - /* @head is less than 512 byte from the end of the ring buffer */ - if (head > watermark) - watermark = head; - - /* - * Speed things up by using 64 bit transactions (see "u64 *buf" above) - */ - watermark >>= 3; - buf_size >>= 3; - - /* - * If we find trace data at the end of the ring buffer, @head has - * been there and has numerically wrapped around at least once. - */ - for (i = watermark; i < buf_size; i++) - if (buf[i]) - return true; - - return false; -} - -static int cs_etm_find_snapshot(struct auxtrace_record *itr, - int idx, struct auxtrace_mmap *mm, - unsigned char *data, - u64 *head, u64 *old) -{ - int err; - bool wrapped; - struct cs_etm_recording *ptr = - container_of(itr, struct cs_etm_recording, itr); - - /* - * Allocate memory to keep track of wrapping if this is the first - * time we deal with this *mm. - */ - if (idx >= ptr->wrapped_cnt) { - err = cs_etm_alloc_wrapped_array(ptr, idx); - if (err) - return err; - } - - /* - * Check to see if *head has wrapped around. If it hasn't only the - * amount of data between *head and *old is snapshot'ed to avoid - * bloating the perf.data file with zeros. But as soon as *head has - * wrapped around the entire size of the AUX ring buffer it taken. - */ - wrapped = ptr->wrapped[idx]; - if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) { - wrapped = true; - ptr->wrapped[idx] = true; - } - - pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", - __func__, idx, (size_t)*old, (size_t)*head, mm->len); - - /* No wrap has occurred, we can just use *head and *old. */ - if (!wrapped) - return 0; - - /* - * *head has wrapped around - adjust *head and *old to pickup the - * entire content of the AUX buffer. - */ - if (*head >= mm->len) { - *old = *head - mm->len; - } else { - *head += mm->len; - *old = *head - mm->len; - } - - return 0; -} - static int cs_etm_snapshot_start(struct auxtrace_record *itr) { struct cs_etm_recording *ptr = @@ -856,7 +788,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr) struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); - zfree(&ptr->wrapped); free(ptr); } @@ -884,7 +815,6 @@ struct auxtrace_record *cs_etm_record_init(int *err) ptr->itr.recording_options = cs_etm_recording_options; ptr->itr.info_priv_size = cs_etm_info_priv_size; ptr->itr.info_fill = cs_etm_info_fill; - ptr->itr.find_snapshot = cs_etm_find_snapshot; ptr->itr.snapshot_start = cs_etm_snapshot_start; ptr->itr.snapshot_finish = cs_etm_snapshot_finish; ptr->itr.reference = cs_etm_reference; diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 645009c08b3cb63f5dd9018ef40640ac6dcf4322..4a7b8deef3fdd15222d80c3f3cac8732f0c7d12e 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=120 +size=128 config=0 sample_period=* sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index b0f42c34882e86d03bd016cdb6a905431a1502a5..408164456530697d7a761c10c9e97264adb58600 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=120 +size=128 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy index eba723cc0d380ecc2661a98986cf3c3bb6f8deb6..86a15dd359d93179519603c12cbba37757f64765 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/attr/system-wide-dummy @@ -7,7 +7,7 @@ cpu=* pid=-1 flags=8 type=1 -size=120 +size=128 config=9 sample_period=4000 sample_type=455 diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index bd77825fd5a158bb3e9548a6c711cc35a820d75c..79649b2aed5d5e20783c4420fb23508a49b97574 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -919,6 +919,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel, err = symbol__annotate2(ms, evsel, opts, &browser.arch); if (err) { char msg[BUFSIZ]; + ms->map->dso->annotate_warned = true; symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); ui__error("Couldn't annotate %s:\n%s", sym->name, msg); goto out_free_offsets; diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index a7dff77f20184f1fdade65a9535733b498383c94..5a1d30d14c1cf07f80af62e1432dc5f11dfc1f56 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -177,6 +177,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, err = symbol__annotate(ms, evsel, &annotation__default_options, NULL); if (err) { char msg[BUFSIZ]; + ms->map->dso->annotate_warned = true; symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); ui__error("Couldn't annotate %s: %s\n", sym->name, msg); return -1; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e8f188c2de8bf73f39bc554796aa51b504831681..264e5223ac40a88787eb07a07ec2d257eac0455c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2778,9 +2778,17 @@ int symbol__tty_annotate2(struct map_symbol *ms, struct evsel *evsel, struct rb_root source_line = RB_ROOT; struct hists *hists = evsel__hists(evsel); char buf[1024]; + int err; + + err = symbol__annotate2(ms, evsel, opts, NULL); + if (err) { + char msg[BUFSIZ]; - if (symbol__annotate2(ms, evsel, opts, NULL) < 0) + dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; + } if (opts->print_lines) { srcline_full_filename = opts->full_path; @@ -2804,9 +2812,17 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct dso *dso = ms->map->dso; struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; + int err; + + err = symbol__annotate(ms, evsel, opts, NULL); + if (err) { + char msg[BUFSIZ]; - if (symbol__annotate(ms, evsel, opts, NULL) < 0) + dso->annotate_warned = true; + symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); + ui__error("Couldn't annotate %s:\n%s", sym->name, msg); return -1; + } symbol__calc_percent(sym, evsel); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ead536d021607e4af74a18f90416aa4da3b0ad29..5df645f1ef13bc12429e36c303f36d9e359ced93 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1561,6 +1561,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'q': synth_opts->quick += 1; break; + case 'Z': + synth_opts->timeless_decoding = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index ac94d3974e16e336512f692b95e61605d38be3e3..3a4e921f9d487653d415659844c33aac104b5c7e 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -90,6 +90,7 @@ enum itrace_period_type { * @tlb: whether to synthesize TLB events * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events + * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -129,6 +130,7 @@ struct itrace_synth_opts { bool tlb; bool remote_access; bool mem; + bool timeless_decoding; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3f4bc40504772acc94ac6cfcc38a000f42854bbe..0b369969b26cb9cb70438b9154ca77bc1a1553ac 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,16 +6,17 @@ * Author: Mathieu Poirier */ +#include +#include #include #include #include #include #include -#include -#include #include "cs-etm.h" #include "cs-etm-decoder.h" +#include "debug.h" #include "intlist.h" /* use raw logging */ @@ -32,9 +33,11 @@ struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); + bool suppress_printing; dcd_tree_handle_t dcd_tree; cs_etm_mem_cb_type mem_access; ocsd_datapath_resp_t prev_return; + const char *decoder_name; }; static u32 @@ -71,9 +74,10 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) ocsd_datapath_resp_t dp_ret; decoder->prev_return = OCSD_RESP_CONT; - + decoder->suppress_printing = true; dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 0, 0, NULL, NULL); + decoder->suppress_printing = false; if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) return -1; @@ -121,6 +125,21 @@ static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params, return 0; } +#define TRCIDR1_TRCARCHMIN_SHIFT 4 +#define TRCIDR1_TRCARCHMIN_MASK GENMASK(7, 4) +#define TRCIDR1_TRCARCHMIN(x) (((x) & TRCIDR1_TRCARCHMIN_MASK) >> TRCIDR1_TRCARCHMIN_SHIFT) + +static enum _ocsd_arch_version cs_etm_decoder__get_etmv4_arch_ver(u32 reg_idr1) +{ + /* + * For ETMv4 if the trace minor version is 4 or more then we can assume + * the architecture is ARCH_AA64 rather than just V8. + * ARCH_V8 = V8 architecture + * ARCH_AA64 = Min v8r3 plus additional AA64 PE features + */ + return TRCIDR1_TRCARCHMIN(reg_idr1) >= 4 ? ARCH_AA64 : ARCH_V8; +} + static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, ocsd_etmv4_cfg *config) { @@ -135,7 +154,21 @@ static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, config->reg_idr11 = 0; config->reg_idr12 = 0; config->reg_idr13 = 0; - config->arch_ver = ARCH_V8; + config->arch_ver = cs_etm_decoder__get_etmv4_arch_ver(params->etmv4.reg_idr1); + config->core_prof = profile_CortexA; +} + +static void cs_etm_decoder__gen_ete_config(struct cs_etm_trace_params *params, + ocsd_ete_cfg *config) +{ + config->reg_configr = params->ete.reg_configr; + config->reg_traceidr = params->ete.reg_traceidr; + config->reg_idr0 = params->ete.reg_idr0; + config->reg_idr1 = params->ete.reg_idr1; + config->reg_idr2 = params->ete.reg_idr2; + config->reg_idr8 = params->ete.reg_idr8; + config->reg_devarch = params->ete.reg_devarch; + config->arch_ver = ARCH_AA64; config->core_prof = profile_CortexA; } @@ -143,8 +176,10 @@ static void cs_etm_decoder__print_str_cb(const void *p_context, const char *msg, const int str_len) { - if (p_context && str_len) - ((struct cs_etm_decoder *)p_context)->packet_printer(msg); + const struct cs_etm_decoder *decoder = p_context; + + if (p_context && str_len && !decoder->suppress_printing) + decoder->packet_printer(msg); } static int @@ -220,68 +255,19 @@ cs_etm_decoder__init_raw_frame_logging( } #endif -static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, - const char *decoder_name, - void *trace_config) -{ - u8 csid; - - if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, - OCSD_CREATE_FLG_PACKET_PROC, - trace_config, &csid)) - return -1; - - if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) - return -1; - - return 0; -} - -static int -cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - const char *decoder_name; - ocsd_etmv3_cfg config_etmv3; - ocsd_etmv4_cfg trace_config_etmv4; - void *trace_config; - - switch (t_params->protocol) { - case CS_ETM_PROTO_ETMV3: - case CS_ETM_PROTO_PTM: - cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? - OCSD_BUILTIN_DCD_ETMV3 : - OCSD_BUILTIN_DCD_PTM; - trace_config = &config_etmv3; - break; - case CS_ETM_PROTO_ETMV4i: - cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; - trace_config = &trace_config_etmv4; - break; - default: - return -1; - } - - return cs_etm_decoder__create_packet_printer(decoder, - decoder_name, - trace_config); -} - static ocsd_datapath_resp_t cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, struct cs_etm_packet_queue *packet_queue, const uint8_t trace_chan_id) { /* No timestamp packet has been received, nothing to do */ - if (!packet_queue->timestamp) + if (!packet_queue->cs_timestamp) return OCSD_RESP_CONT; - packet_queue->timestamp = packet_queue->next_timestamp; + packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; /* Estimate the timestamp for the next range packet */ - packet_queue->next_timestamp += packet_queue->instr_count; + packet_queue->next_cs_timestamp += packet_queue->instr_count; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -293,7 +279,8 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, - const uint8_t trace_chan_id) + const uint8_t trace_chan_id, + const ocsd_trc_index_t indx) { struct cs_etm_packet_queue *packet_queue; @@ -307,20 +294,41 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * Function do_soft_timestamp() will report the value to the front end, * hence asking the decoder to keep decoding rather than stopping. */ - if (packet_queue->timestamp) { - packet_queue->next_timestamp = elem->timestamp; + if (packet_queue->cs_timestamp) { + packet_queue->next_cs_timestamp = elem->timestamp; return OCSD_RESP_CONT; } - /* - * This is the first timestamp we've seen since the beginning of traces - * or a discontinuity. Since timestamps packets are generated *after* - * range packets have been generated, we need to estimate the time at - * which instructions started by substracting the number of instructions - * executed to the timestamp. - */ - packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; - packet_queue->next_timestamp = elem->timestamp; + if (!elem->timestamp) { + /* + * Zero timestamps can be seen due to misconfiguration or hardware bugs. + * Warn once, and don't try to subtract instr_count as it would result in an + * underflow. + */ + packet_queue->cs_timestamp = 0; + if (!cs_etm__etmq_is_timeless(etmq)) + pr_warning_once("Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR + ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n", + indx); + + } else if (packet_queue->instr_count > elem->timestamp) { + /* + * Sanity check that the elem->timestamp - packet_queue->instr_count would not + * result in an underflow. Warn and clamp at 0 if it would. + */ + packet_queue->cs_timestamp = 0; + pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx); + } else { + /* + * This is the first timestamp we've seen since the beginning of traces + * or a discontinuity. Since timestamps packets are generated *after* + * range packets have been generated, we need to estimate the time at + * which instructions started by subtracting the number of instructions + * executed to the timestamp. + */ + packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count; + } + packet_queue->next_cs_timestamp = elem->timestamp; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -333,8 +341,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, static void cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) { - packet_queue->timestamp = 0; - packet_queue->next_timestamp = 0; + packet_queue->cs_timestamp = 0; + packet_queue->next_cs_timestamp = 0; packet_queue->instr_count = 0; } @@ -491,13 +499,42 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { - pid_t tid; + pid_t tid = -1; + static u64 pid_fmt; + int ret; + + /* + * As all the ETMs run at the same exception level, the system should + * have the same PID format crossing CPUs. So cache the PID format + * and reuse it for sequential decoding. + */ + if (!pid_fmt) { + ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt); + if (ret) + return OCSD_RESP_FATAL_SYS_ERR; + } + + /* + * Process the PE_CONTEXT packets if we have a valid contextID or VMID. + * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 + * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + */ + switch (pid_fmt) { + case BIT(ETM_OPT_CTXTID): + if (elem->context.ctxt_id_valid) + tid = elem->context.context_id; + break; + case BIT(ETM_OPT_CTXTID2): + if (elem->context.vmid_valid) + tid = elem->context.vmid; + break; + default: + break; + } - /* Ignore PE_CONTEXT packets that don't have a valid contextID */ - if (!elem->context.ctxt_id_valid) + if (tid == -1) return OCSD_RESP_CONT; - tid = elem->context.context_id; if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) return OCSD_RESP_FATAL_SYS_ERR; @@ -512,7 +549,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, - const ocsd_trc_index_t indx __maybe_unused, + const ocsd_trc_index_t indx, const u8 trace_chan_id __maybe_unused, const ocsd_generic_trace_elem *elem) { @@ -549,7 +586,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( break; case OCSD_GEN_TRC_ELEM_TIMESTAMP: resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, - trace_chan_id); + trace_chan_id, + indx); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: resp = cs_etm_decoder__set_tid(etmq, packet_queue, @@ -572,13 +610,14 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( return resp; } -static int cs_etm_decoder__create_etm_packet_decoder( - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) { - const char *decoder_name; ocsd_etmv3_cfg config_etmv3; ocsd_etmv4_cfg trace_config_etmv4; + ocsd_ete_cfg trace_config_ete; void *trace_config; u8 csid; @@ -586,51 +625,55 @@ static int cs_etm_decoder__create_etm_packet_decoder( case CS_ETM_PROTO_ETMV3: case CS_ETM_PROTO_PTM: cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); - decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? + decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? OCSD_BUILTIN_DCD_ETMV3 : OCSD_BUILTIN_DCD_PTM; trace_config = &config_etmv3; break; case CS_ETM_PROTO_ETMV4i: cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); - decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I; trace_config = &trace_config_etmv4; break; + case CS_ETM_PROTO_ETE: + cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete); + decoder->decoder_name = OCSD_BUILTIN_DCD_ETE; + trace_config = &trace_config_ete; + break; default: return -1; } - if (ocsd_dt_create_decoder(decoder->dcd_tree, - decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, - trace_config, &csid)) - return -1; + if (d_params->operation == CS_ETM_OPERATION_DECODE) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, + decoder->decoder_name, + OCSD_CREATE_FLG_FULL_DECODER, + trace_config, &csid)) + return -1; - if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, - cs_etm_decoder__gen_trace_elem_printer, - decoder)) - return -1; + if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, + decoder)) + return -1; - return 0; -} + return 0; + } else if (d_params->operation == CS_ETM_OPERATION_PRINT) { + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, + OCSD_CREATE_FLG_PACKET_PROC, + trace_config, &csid)) + return -1; -static int -cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, - struct cs_etm_trace_params *t_params, - struct cs_etm_decoder *decoder) -{ - if (d_params->operation == CS_ETM_OPERATION_PRINT) - return cs_etm_decoder__create_etm_packet_printer(t_params, - decoder); - else if (d_params->operation == CS_ETM_OPERATION_DECODE) - return cs_etm_decoder__create_etm_packet_decoder(t_params, - decoder); + if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) + return -1; + + return 0; + } return -1; } struct cs_etm_decoder * -cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, +cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params t_params[]) { struct cs_etm_decoder *decoder; @@ -675,7 +718,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, /* init raw frame logging if required */ cs_etm_decoder__init_raw_frame_logging(d_params, decoder); - for (i = 0; i < num_cpu; i++) { + for (i = 0; i < decoders; i++) { ret = cs_etm_decoder__create_etm_decoder(d_params, &t_params[i], decoder); @@ -747,3 +790,8 @@ void cs_etm_decoder__free(struct cs_etm_decoder *decoder) decoder->dcd_tree = NULL; free(decoder); } + +const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder) +{ + return decoder->decoder_name; +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 11f3391d06f2b1471ba89649d14d11ad22fe90ce..92a855fbe5b8eccab066481d6f2fc43735b08707 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -37,11 +37,22 @@ struct cs_etmv4_trace_params { u32 reg_traceidr; }; +struct cs_ete_trace_params { + u32 reg_idr0; + u32 reg_idr1; + u32 reg_idr2; + u32 reg_idr8; + u32 reg_configr; + u32 reg_traceidr; + u32 reg_devarch; +}; + struct cs_etm_trace_params { int protocol; union { struct cs_etmv3_trace_params etmv3; struct cs_etmv4_trace_params etmv4; + struct cs_ete_trace_params ete; }; }; @@ -65,6 +76,7 @@ enum { CS_ETM_PROTO_ETMV4i, CS_ETM_PROTO_ETMV4d, CS_ETM_PROTO_PTM, + CS_ETM_PROTO_ETE }; enum cs_etm_decoder_operation { @@ -92,5 +104,6 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue, struct cs_etm_packet *packet); int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); +const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder); #endif /* INCLUDE__CS_ETM_DECODER_H__ */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a2a369e2fbb67eadbf1474d87a1b55e335bb0f8b..88fee0935036dccc776732df557c294581893929 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -37,8 +38,6 @@ #include #include "util/synthetic-events.h" -#define MAX_TIMESTAMP (~0ULL) - struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -51,10 +50,9 @@ struct cs_etm_auxtrace { u8 timeless_decoding; u8 snapshot_mode; u8 data_queued; - u8 sample_branches; - u8 sample_instructions; int num_cpu; + u64 latest_kernel_timestamp; u32 auxtrace_type; u64 branches_sample_type; u64 branches_id; @@ -62,7 +60,6 @@ struct cs_etm_auxtrace { u64 instructions_sample_period; u64 instructions_id; u64 **metadata; - u64 kernel_start; unsigned int pmu_type; }; @@ -85,7 +82,7 @@ struct cs_etm_queue { struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; - u8 pending_timestamp; + u8 pending_timestamp_chan_id; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; @@ -97,7 +94,6 @@ struct cs_etm_queue { /* RB tree for quick conversion between traceID and metadata pointers */ static struct intlist *traceid_list; -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_queues(struct cs_etm_auxtrace *etm); static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, pid_t tid); @@ -156,6 +152,47 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +/* + * The returned PID format is presented by two bits: + * + * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; + * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. + * + * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * are enabled at the same time when the session runs on an EL2 kernel. + * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be + * recorded in the trace data, the tool will selectively use + * CONTEXTIDR_EL2 as PID. + */ +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) +{ + struct int_node *inode; + u64 *metadata, val; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + val = metadata[CS_ETM_ETMCR]; + /* CONTEXTIDR is traced */ + if (val & BIT(ETM_OPT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } else { + val = metadata[CS_ETMV4_TRCCONFIGR]; + /* CONTEXTIDR_EL2 is traced */ + if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + *pid_fmt = BIT(ETM_OPT_CTXTID2); + /* CONTEXTIDR_EL1 is traced */ + else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } + + return 0; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { @@ -166,7 +203,7 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, * be more than one channel per cs_etm_queue, we need to specify * what traceID queue needs servicing. */ - etmq->pending_timestamp = trace_chan_id; + etmq->pending_timestamp_chan_id = trace_chan_id; } static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, @@ -174,22 +211,22 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, { struct cs_etm_packet_queue *packet_queue; - if (!etmq->pending_timestamp) + if (!etmq->pending_timestamp_chan_id) return 0; if (trace_chan_id) - *trace_chan_id = etmq->pending_timestamp; + *trace_chan_id = etmq->pending_timestamp_chan_id; packet_queue = cs_etm__etmq_get_packet_queue(etmq, - etmq->pending_timestamp); + etmq->pending_timestamp_chan_id); if (!packet_queue) return 0; /* Acknowledge pending status */ - etmq->pending_timestamp = 0; + etmq->pending_timestamp_chan_id = 0; /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ - return packet_queue->timestamp; + return packet_queue->cs_timestamp; } static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) @@ -371,8 +408,8 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, { struct cs_etm_packet *tmp; - if (etm->sample_branches || etm->synth_opts.last_branch || - etm->sample_instructions) { + if (etm->synth_opts.branches || etm->synth_opts.last_branch || + etm->synth_opts.instructions) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. @@ -421,14 +458,30 @@ static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; } +static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, + struct cs_etm_auxtrace *etm, int idx) +{ + u64 **metadata = etm->metadata; + + t_params[idx].protocol = CS_ETM_PROTO_ETE; + t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; + t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; + t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; + t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; + t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; + t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; +} + static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, - struct cs_etm_auxtrace *etm) + struct cs_etm_auxtrace *etm, + int decoders) { int i; u32 etmidr; u64 architecture; - for (i = 0; i < etm->num_cpu; i++) { + for (i = 0; i < decoders; i++) { architecture = etm->metadata[i][CS_ETM_MAGIC]; switch (architecture) { @@ -439,6 +492,9 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, case __perf_cs_etmv4_magic: cs_etm__set_trace_param_etmv4(t_params, etm, i); break; + case __perf_cs_ete_magic: + cs_etm__set_trace_param_ete(t_params, etm, i); + break; default: return -EINVAL; } @@ -449,7 +505,8 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, struct cs_etm_queue *etmq, - enum cs_etm_decoder_operation mode) + enum cs_etm_decoder_operation mode, + bool formatted) { int ret = -EINVAL; @@ -459,7 +516,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, d_params->packet_printer = cs_etm__packet_dump; d_params->operation = mode; d_params->data = etmq; - d_params->formatted = true; + d_params->formatted = formatted; d_params->fsyncs = false; d_params->hsyncs = false; d_params->frame_aligned = true; @@ -469,44 +526,23 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, return ret; } -static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, +static void cs_etm__dump_event(struct cs_etm_queue *etmq, struct auxtrace_buffer *buffer) { int ret; const char *color = PERF_COLOR_BLUE; - struct cs_etm_decoder_params d_params; - struct cs_etm_trace_params *t_params; - struct cs_etm_decoder *decoder; size_t buffer_used = 0; fprintf(stdout, "\n"); color_fprintf(stdout, color, - ". ... CoreSight ETM Trace data: size %zu bytes\n", - buffer->size); - - /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); - - if (!t_params) - return; - - if (cs_etm__init_trace_params(t_params, etm)) - goto out_free; + ". ... CoreSight %s Trace data: size %#zx bytes\n", + cs_etm_decoder__get_name(etmq->decoder), buffer->size); - /* Set decoder parameters to simply print the trace packets */ - if (cs_etm__init_decoder_params(&d_params, NULL, - CS_ETM_OPERATION_PRINT)) - goto out_free; - - decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); - - if (!decoder) - goto out_free; do { size_t consumed; ret = cs_etm_decoder__process_data_block( - decoder, buffer->offset, + etmq->decoder, buffer->offset, &((u8 *)buffer->data)[buffer_used], buffer->size - buffer_used, &consumed); if (ret) @@ -515,16 +551,12 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, buffer_used += consumed; } while (buffer_used < buffer->size); - cs_etm_decoder__free(decoder); - -out_free: - zfree(&t_params); + cs_etm_decoder__reset(etmq->decoder); } static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - int ret; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -534,11 +566,6 @@ static int cs_etm__flush_events(struct perf_session *session, if (!tool->ordered_events) return -EINVAL; - ret = cs_etm__update_queues(etm); - - if (ret < 0) - return ret; - if (etm->timeless_decoding) return cs_etm__process_timeless_queues(etm, -1); @@ -650,7 +677,7 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) { + if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) return PERF_RECORD_MISC_KERNEL; else @@ -705,17 +732,32 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); - if (len <= 0) + if (len <= 0) { + ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" + " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); + if (!al.map->dso->auxtrace_warned) { + pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n", + address, + al.map->dso->long_name ? al.map->dso->long_name : "Unknown"); + al.map->dso->auxtrace_warned = true; + } return 0; + } return len; } -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + bool formatted) { struct cs_etm_decoder_params d_params; struct cs_etm_trace_params *t_params = NULL; struct cs_etm_queue *etmq; + /* + * Each queue can only contain data from one CPU when unformatted, so only one decoder is + * needed. + */ + int decoders = formatted ? etm->num_cpu : 1; etmq = zalloc(sizeof(*etmq)); if (!etmq) @@ -726,20 +768,23 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) goto out_free; /* Use metadata to fill in trace parameters for trace decoder */ - t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + t_params = zalloc(sizeof(*t_params) * decoders); if (!t_params) goto out_free; - if (cs_etm__init_trace_params(t_params, etm)) + if (cs_etm__init_trace_params(t_params, etm, decoders)) goto out_free; /* Set decoder parameters to decode trace packets */ if (cs_etm__init_decoder_params(&d_params, etmq, - CS_ETM_OPERATION_DECODE)) + dump_trace ? CS_ETM_OPERATION_PRINT : + CS_ETM_OPERATION_DECODE, + formatted)) goto out_free; - etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + etmq->decoder = cs_etm_decoder__new(decoders, &d_params, + t_params); if (!etmq->decoder) goto out_free; @@ -767,31 +812,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, struct auxtrace_queue *queue, - unsigned int queue_nr) + unsigned int queue_nr, + bool formatted) { - int ret = 0; - unsigned int cs_queue_nr; - u8 trace_chan_id; - u64 timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) - goto out; + return 0; - etmq = cs_etm__alloc_queue(etm); + etmq = cs_etm__alloc_queue(etm, formatted); - if (!etmq) { - ret = -ENOMEM; - goto out; - } + if (!etmq) + return -ENOMEM; queue->priv = etmq; etmq->etm = etm; etmq->queue_nr = queue_nr; etmq->offset = 0; - if (etm->timeless_decoding) - goto out; + return 0; +} + +static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, + struct cs_etm_queue *etmq, + unsigned int queue_nr) +{ + int ret = 0; + unsigned int cs_queue_nr; + u8 trace_chan_id; + u64 cs_timestamp; /* * We are under a CPU-wide trace scenario. As such we need to know @@ -812,7 +861,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, /* * Run decoder on the trace block. The decoder will stop when - * encountering a timestamp, a full packet queue or the end of + * encountering a CS timestamp, a full packet queue or the end of * trace for that block. */ ret = cs_etm__decode_data_block(etmq); @@ -823,10 +872,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all * the timestamp calculation for us. */ - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); /* We found a timestamp, no need to continue. */ - if (timestamp) + if (cs_timestamp) break; /* @@ -850,38 +899,11 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * queue and will be processed in cs_etm__process_queues(). */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); out: return ret; } -static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) -{ - unsigned int i; - int ret; - - if (!etm->kernel_start) - etm->kernel_start = machine__kernel_start(etm->machine); - - for (i = 0; i < etm->queues.nr_queues; i++) { - ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); - if (ret) - return ret; - } - - return 0; -} - -static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) -{ - if (etm->queues.new_data) { - etm->queues.new_data = false; - return cs_etm__setup_queues(etm); - } - - return 0; -} - static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) @@ -1152,6 +1174,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = addr; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1208,6 +1232,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = ip; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1337,7 +1363,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_branches = true; etm->branches_sample_type = attr.sample_type; etm->branches_id = id; id += 1; @@ -1361,7 +1386,6 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, err = cs_etm__synth_event(session, &attr, id); if (err) return err; - etm->sample_instructions = true; etm->instructions_sample_type = attr.sample_type; etm->instructions_id = id; id += 1; @@ -1392,7 +1416,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, tidq->prev_packet->last_instr_taken_branch) cs_etm__update_last_branch_rb(etmq, tidq); - if (etm->sample_instructions && + if (etm->synth_opts.instructions && tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically @@ -1475,7 +1499,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->sample_branches) { + if (etm->synth_opts.branches) { bool generate_sample = false; /* Generate sample for tracing on packet */ @@ -1529,6 +1553,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, goto swap_packet; if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; @@ -1554,7 +1579,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } - if (etm->sample_branches && + if (etm->synth_opts.branches && tidq->prev_packet->sample_type == CS_ETM_RANGE) { err = cs_etm__synth_branch_sample(etmq, tidq); if (err) @@ -1586,6 +1611,7 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, * the trace. */ if (etmq->etm->synth_opts.last_branch && + etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; @@ -2177,13 +2203,27 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) { int ret = 0; - unsigned int cs_queue_nr, queue_nr; + unsigned int cs_queue_nr, queue_nr, i; u8 trace_chan_id; - u64 timestamp; + u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq; + /* + * Pre-populate the heap with one entry from each queue so that we can + * start processing in time order across all queues. + */ + for (i = 0; i < etm->queues.nr_queues; i++) { + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i); + if (ret) + return ret; + } + while (1) { if (!etm->heap.heap_cnt) goto out; @@ -2241,9 +2281,9 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) if (ret) goto out; - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); - if (!timestamp) { + if (!cs_timestamp) { /* * Function cs_etm__decode_data_block() returns when * there is no more traces to decode in the current @@ -2266,7 +2306,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) * this queue/traceID. */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } out: @@ -2337,8 +2377,7 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_sample *sample, struct perf_tool *tool) { - int err = 0; - u64 timestamp; + u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -2352,16 +2391,15 @@ static int cs_etm__process_event(struct perf_session *session, } if (sample->time && (sample->time != (u64) -1)) - timestamp = sample->time; + sample_kernel_timestamp = sample->time; else - timestamp = 0; - - if (timestamp || etm->timeless_decoding) { - err = cs_etm__update_queues(etm); - if (err) - return err; - } + sample_kernel_timestamp = 0; + /* + * Don't wait for cs_etm__flush_events() in per-thread/timeless mode to start the decode. We + * need the tid of the PERF_RECORD_EXIT event to assign to the synthesised samples because + * ETM_OPT_CTXTID is not enabled. + */ if (etm->timeless_decoding && event->header.type == PERF_RECORD_EXIT) return cs_etm__process_timeless_queues(etm, @@ -2372,13 +2410,34 @@ static int cs_etm__process_event(struct perf_session *session, else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) return cs_etm__process_switch_cpu_wide(etm, event); - if (!etm->timeless_decoding && - event->header.type == PERF_RECORD_AUX) - return cs_etm__process_queues(etm); + if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) { + /* + * Record the latest kernel timestamp available in the header + * for samples so that synthesised samples occur from this point + * onwards. + */ + etm->latest_kernel_timestamp = sample_kernel_timestamp; + } return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm->queues.queue_array[i].priv, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2392,6 +2451,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, int fd = perf_data__fd(session->data); bool is_pipe = perf_data__is_pipe(session->data); int err; + int idx = event->auxtrace.idx; if (is_pipe) data_offset = 0; @@ -2406,12 +2466,24 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, if (err) return err; + /* + * Knowing if the trace is formatted or not requires a lookup of + * the aux record so only works in non-piped mode where data is + * queued in cs_etm__queue_aux_records(). Always assume + * formatted in piped mode (true). + */ + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, true); + if (err) + return err; + if (dump_trace) if (auxtrace_buffer__get_data(buffer, fd)) { - cs_etm__dump_event(etm, buffer); + cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -2422,6 +2494,10 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) struct evlist *evlist = etm->session->evlist; bool timeless_decoding = true; + /* Override timeless mode with user input from --itrace=Z */ + if (etm->synth_opts.timeless_decoding) + return true; + /* * Circle through the list of event and complain if we find one * with the time bit set. @@ -2435,7 +2511,7 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) } static const char * const cs_etm_global_header_fmts[] = { - [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_HEADER_VERSION] = " Header version %llx\n", [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", }; @@ -2443,6 +2519,7 @@ static const char * const cs_etm_global_header_fmts[] = { static const char * const cs_etm_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETM_ETMCR] = " ETMCR %llx\n", [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", [CS_ETM_ETMCCER] = " ETMCCER %llx\n", @@ -2452,6 +2529,7 @@ static const char * const cs_etm_priv_fmts[] = { static const char * const cs_etmv4_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", @@ -2459,28 +2537,350 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n" }; -static void cs_etm__print_auxtrace_info(__u64 *val, int num) +static const char * const param_unk_fmt = + " Unknown parameter [%d] %llx\n"; +static const char * const magic_unk_fmt = + " Magic number Unknown %llx\n"; + +static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) { - int i, j, cpu = 0; + int i = *offset, j, nr_params = 0, fmt_offset; + __u64 magic; - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) - fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + /* check magic value */ + magic = val[i + CS_ETM_MAGIC]; + if ((magic != __perf_cs_etmv3_magic) && + (magic != __perf_cs_etmv4_magic)) { + /* failure - note bad magic value */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + + /* print common header block */ + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); + + if (magic == __perf_cs_etmv3_magic) { + nr_params = CS_ETM_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETM_ETMCR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } else if (magic == __perf_cs_etmv4_magic) { + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETMV4_TRCCONFIGR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + *offset = i; + return 0; +} + +static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) +{ + int i = *offset, j, total_params = 0; + __u64 magic; - for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { - if (val[i] == __perf_cs_etmv3_magic) - for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + magic = val[i + CS_ETM_MAGIC]; + /* total params to print is NR_PARAMS + common block size for v1 */ + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; + + if (magic == __perf_cs_etmv3_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETM_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - else if (val[i] == __perf_cs_etmv4_magic) - for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + } + } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) { + /* + * ETE and ETMv4 can be printed in the same block because the number of parameters + * is saved and they share the list of parameter names. ETE is also only supported + * in V1 files. + */ + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETE_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - else - /* failure.. return */ + } + } else { + /* failure - note bad magic value and error out */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + *offset = i; + return 0; +} + +static void cs_etm__print_auxtrace_info(__u64 *val, int num) +{ + int i, cpu = 0, version, err; + + /* bail out early on bad header version */ + version = val[0]; + if (version > CS_HEADER_CURRENT_VERSION) { + /* failure.. return */ + fprintf(stdout, " Unknown Header Version = %x, ", version); + fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); + return; + } + + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { + if (version == 0) + err = cs_etm__print_cpu_metadata_v0(val, &i); + else if (version == 1) + err = cs_etm__print_cpu_metadata_v1(val, &i); + if (err) return; } } +/* + * Read a single cpu parameter block from the auxtrace_info priv block. + * + * For version 1 there is a per cpu nr_params entry. If we are handling + * version 1 file, then there may be less, the same, or more params + * indicated by this value than the compile time number we understand. + * + * For a version 0 info block, there are a fixed number, and we need to + * fill out the nr_param value in the metadata we create. + */ +static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, + int out_blk_size, int nr_params_v0) +{ + u64 *metadata = NULL; + int hdr_version; + int nr_in_params, nr_out_params, nr_cmn_params; + int i, k; + + metadata = zalloc(sizeof(*metadata) * out_blk_size); + if (!metadata) + return NULL; + + /* read block current index & version */ + i = *buff_in_offset; + hdr_version = buff_in[CS_HEADER_VERSION]; + + if (!hdr_version) { + /* read version 0 info block into a version 1 metadata block */ + nr_in_params = nr_params_v0; + metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; + metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; + metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; + /* remaining block params at offset +1 from source */ + for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) + metadata[k + 1] = buff_in[i + k]; + /* version 0 has 2 common params */ + nr_cmn_params = 2; + } else { + /* read version 1 info block - input and output nr_params may differ */ + /* version 1 has 3 common params */ + nr_cmn_params = 3; + nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; + + /* if input has more params than output - skip excess */ + nr_out_params = nr_in_params + nr_cmn_params; + if (nr_out_params > out_blk_size) + nr_out_params = out_blk_size; + + for (k = CS_ETM_MAGIC; k < nr_out_params; k++) + metadata[k] = buff_in[i + k]; + + /* record the actual nr params we copied */ + metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; + } + + /* adjust in offset by number of in params used */ + i += nr_in_params + nr_cmn_params; + *buff_in_offset = i; + return metadata; +} + +/** + * Puts a fragment of an auxtrace buffer into the auxtrace queues based + * on the bounds of aux_event, if it matches with the buffer that's at + * file_offset. + * + * Normally, whole auxtrace buffers would be added to the queue. But we + * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder + * is reset across each buffer, so splitting the buffers up in advance has + * the same effect. + */ +static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz, + struct perf_record_aux *aux_event, struct perf_sample *sample) +{ + int err; + char buf[PERF_SAMPLE_MAX_SIZE]; + union perf_event *auxtrace_event_union; + struct perf_record_auxtrace *auxtrace_event; + union perf_event auxtrace_fragment; + __u64 aux_offset, aux_size; + __u32 idx; + bool formatted; + + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + /* + * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got + * from looping through the auxtrace index. + */ + err = perf_session__peek_event(session, file_offset, buf, + PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL); + if (err) + return err; + auxtrace_event = &auxtrace_event_union->auxtrace; + if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE) + return -EINVAL; + + if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) || + auxtrace_event->header.size != sz) { + return -EINVAL; + } + + /* + * In per-thread mode, CPU is set to -1, but TID will be set instead. See + * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match. + */ + if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) || + auxtrace_event->cpu != sample->cpu) + return 1; + + if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { + /* + * Clamp size in snapshot mode. The buffer size is clamped in + * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect + * the buffer size. + */ + aux_size = min(aux_event->aux_size, auxtrace_event->size); + + /* + * In this mode, the head also points to the end of the buffer so aux_offset + * needs to have the size subtracted so it points to the beginning as in normal mode + */ + aux_offset = aux_event->aux_offset - aux_size; + } else { + aux_size = aux_event->aux_size; + aux_offset = aux_event->aux_offset; + } + + if (aux_offset >= auxtrace_event->offset && + aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) { + /* + * If this AUX event was inside this buffer somewhere, create a new auxtrace event + * based on the sizes of the aux event, and queue that fragment. + */ + auxtrace_fragment.auxtrace = *auxtrace_event; + auxtrace_fragment.auxtrace.size = aux_size; + auxtrace_fragment.auxtrace.offset = aux_offset; + file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size; + + pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu); + err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment, + file_offset, NULL); + if (err) + return err; + + idx = auxtrace_event->idx; + formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW); + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], + idx, formatted); + } + + /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */ + return 1; +} + +static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, + u64 offset __maybe_unused, void *data __maybe_unused) +{ + struct perf_sample sample; + int ret; + struct auxtrace_index_entry *ent; + struct auxtrace_index *auxtrace_index; + struct evsel *evsel; + size_t i; + + /* Don't care about any other events, we're only queuing buffers for AUX events */ + if (event->header.type != PERF_RECORD_AUX) + return 0; + + if (event->header.size < sizeof(struct perf_record_aux)) + return -EINVAL; + + /* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */ + if (!event->aux.aux_size) + return 0; + + /* + * Parse the sample, we need the sample_id_all data that comes after the event so that the + * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID. + */ + evsel = perf_evlist__event2evsel(session->evlist, event); + if (!evsel) + return -EINVAL; + ret = evsel__parse_sample(evsel, event, &sample); + if (ret) + return ret; + + /* + * Loop through the auxtrace index to find the buffer that matches up with this aux event. + */ + list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { + for (i = 0; i < auxtrace_index->nr; i++) { + ent = &auxtrace_index->entries[i]; + ret = cs_etm__queue_aux_fragment(session, ent->file_offset, + ent->sz, &event->aux, &sample); + /* + * Stop search on error or successful values. Continue search on + * 1 ('not found') + */ + if (ret != 1) + return ret; + } + } + + /* + * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but + * don't exit with an error because it will still be possible to decode other aux records. + */ + pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64 + " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu); + return 0; +} + +static int cs_etm__queue_aux_records(struct perf_session *session) +{ + struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index, + struct auxtrace_index, list); + if (index && index->nr > 0) + return perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + cs_etm__queue_aux_records_cb, NULL); + + /* + * We would get here if there are no entries in the index (either no auxtrace + * buffers or no index at all). Fail silently as there is the possibility of + * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still + * false. + * + * In that scenario, buffers will not be split by AUX records. + */ + return 0; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2492,11 +2892,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, int info_header_size; int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; - int err = 0, idx = -1; - int i, j, k; + int num_cpu, trcidr_idx; + int err = 0; + int i, j; u64 *ptr, *hdr = NULL; u64 **metadata = NULL; + u64 hdr_version; /* * sizeof(auxtrace_info_event::type) + @@ -2512,16 +2913,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* First the global part */ ptr = (u64 *) auxtrace_info->priv; - /* Look for version '0' of the header */ - if (ptr[0] != 0) + /* Look for version of the header */ + hdr_version = ptr[0]; + if (hdr_version > CS_HEADER_CURRENT_VERSION) { + /* print routine will print an error on bad version */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); return -EINVAL; + } - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); if (!hdr) return -ENOMEM; /* Extract header information - see cs-etm.h for format */ - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) hdr[i] = ptr[i]; num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & @@ -2552,35 +2958,41 @@ int cs_etm__process_auxtrace_info(union perf_event *event, */ for (j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETM_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETM_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETM_PRIV_MAX, + CS_ETM_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETM_ETMTRACEIDR]; - i += CS_ETM_PRIV_MAX; + trcidr_idx = CS_ETM_ETMTRACEIDR; + } else if (ptr[i] == __perf_cs_etmv4_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETMV4_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETMV4_PRIV_MAX, + CS_ETMV4_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; - i += CS_ETMV4_PRIV_MAX; + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else if (ptr[i] == __perf_cs_ete_magic) { + metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); + + /* ETE shares first part of metadata with ETMv4 */ + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } else { + ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", + ptr[i]); + err = -EINVAL; + goto err_free_metadata; + } + + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; } /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, idx); + inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); /* Something went wrong, no need to continue */ if (!inode) { @@ -2601,7 +3013,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, } /* - * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and * CS_ETMV4_PRIV_MAX mark how many double words are in the * global metadata, and each cpu's metadata respectively. * The following tests if the correct number of double words was @@ -2623,6 +3035,14 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_free_etm; + if (session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); + etm->synth_opts.callchain = false; + } + etm->session = session; etm->machine = &session->machines.host; @@ -2664,26 +3084,24 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; - } - - if (session->itrace_synth_opts->set) { - etm->synth_opts = *session->itrace_synth_opts; - } else { - itrace_synth_opts__set_default(&etm->synth_opts, - session->itrace_synth_opts->default_no_sample); - etm->synth_opts.callchain = false; } err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread; - err = auxtrace_queues__process_index(&etm->queues, session); + err = cs_etm__queue_aux_records(session); if (err) goto err_delete_thread; etm->data_queued = etm->queues.populated; + /* + * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and + * cs_etm__queue_aux_fragment() for details relating to limitations. + */ + if (!etm->data_queued) + pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n" + "Continuing with best effort decoding in piped mode.\n\n"); return 0; @@ -2703,6 +3121,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, intlist__delete(traceid_list); err_free_hdr: zfree(&hdr); - + /* + * At this point, as a minimum we have valid header. Dump the rest of + * the info section - the print routines will error out on structural + * issues. + */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 4ad925d6d79966aa913e30cbccf80ccbef70abaa..f54834f6f9e35e95c379b3a1b357d8f609359480 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -17,23 +17,37 @@ struct perf_session; */ enum { /* Starting with 0x0 */ - CS_HEADER_VERSION_0, + CS_HEADER_VERSION, /* PMU->type (32 bit), total # of CPUs (32 bit) */ CS_PMU_TYPE_CPUS, CS_ETM_SNAPSHOT, - CS_HEADER_VERSION_0_MAX, + CS_HEADER_VERSION_MAX, }; +/* + * Update the version for new format. + * + * New version 1 format adds a param count to the per cpu metadata. + * This allows easy adding of new metadata parameters. + * Requires that new params always added after current ones. + * Also allows client reader to handle file versions that are different by + * checking the number of params in the file vs the number expected. + */ +#define CS_HEADER_CURRENT_VERSION 1 + /* Beginning of header common to both ETMv3 and V4 */ enum { CS_ETM_MAGIC, CS_ETM_CPU, + /* Number of trace config params in following ETM specific block */ + CS_ETM_NR_TRC_PARAMS, + CS_ETM_COMMON_BLK_MAX_V1, }; /* ETMv3/PTM metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETM_ETMCR = CS_ETM_CPU + 1, + CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETM_ETMTRACEIDR, /* RO, taken from sysFS */ CS_ETM_ETMCCER, @@ -41,10 +55,13 @@ enum { CS_ETM_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1) + /* ETMv4 metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1, + CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETMV4_TRCTRACEIDR, /* RO, taken from sysFS */ CS_ETMV4_TRCIDR0, @@ -55,6 +72,18 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) + +/* + * ETE metadata is ETMv4 plus TRCDEVARCH register and doesn't support header V0 since it was + * added in header V1 + */ +enum { + CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX, + CS_ETE_PRIV_MAX +}; + /* * ETMv3 exception encoding number: * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) @@ -150,8 +179,8 @@ struct cs_etm_packet_queue { u32 head; u32 tail; u32 instr_count; - u64 timestamp; - u64 next_timestamp; + u64 cs_timestamp; + u64 next_cs_timestamp; struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; }; @@ -162,17 +191,20 @@ struct cs_etm_packet_queue { #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) -#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) +#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64)) #define __perf_cs_etmv3_magic 0x3030303030303030ULL #define __perf_cs_etmv4_magic 0x4040404040404040ULL +#define __perf_cs_ete_magic 0x5050505050505050ULL #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +#define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64)) #ifdef HAVE_CSTRACE_SUPPORT int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id); bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index f1734abd98dd05bd790745fb57e5e3835047e241..5b04117fe0d3154bccb717dc0500f17b4cb8d1b5 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -21,6 +21,13 @@ extern int debug_data_convert; eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) #define pr_warning(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning_once(fmt, ...) ({ \ + static int __warned; \ + if (unlikely(!__warned)) { \ + pr_warning(fmt, ##__VA_ARGS__); \ + __warned = 1; \ + } \ +}) #define pr_info(fmt, ...) \ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) #define pr_debug(fmt, ...) \ @@ -54,6 +61,13 @@ void trace_event(union perf_event *event); int ui__error(const char *format, ...) __printf(1, 2); int ui__warning(const char *format, ...) __printf(1, 2); +#define ui__warning_once(format, ...) ({ \ + static int __warned; \ + if (unlikely(!__warned)) { \ + ui__warning(format, ##__VA_ARGS__); \ + __warned = 1; \ + } \ +}) void pr_stat(const char *fmt, ...); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index d8cb4f5680a4ae85b408f05525e638fc9edee35a..b5cfd28d6fc4367c94e3ac9f57429eb96987feca 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -170,6 +170,7 @@ struct dso { u8 has_srcline:1; u8 hit:1; u8 annotate_warned:1; + u8 auxtrace_warned:1; u8 short_name_allocated:1; u8 long_name_allocated:1; u8 is_64_bit:1;