diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x index 651602a61eac82af31d878a9ba1670aa18bab798..234c33fbdb55f85c4e736d372bb948d5554295bd 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x @@ -236,7 +236,7 @@ What: /sys/bus/coresight/devices/.[etm|ptm]/traceid Date: November 2014 KernelVersion: 3.19 Contact: Mathieu Poirier -Description: (RW) Holds the trace ID that will appear in the trace stream +Description: (RO) Holds the trace ID that will appear in the trace stream coming from this trace entity. What: /sys/bus/coresight/devices/.[etm|ptm]/trigger_event diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 648a92e6d013cfc96510ea05303640bd5324a1b9..c33c90bd7c4dbc3f374b64050cdfdff4465a73a5 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -65,6 +65,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all, *x86_pmu.enable_all); DEFINE_STATIC_CALL_NULL(x86_pmu_enable, *x86_pmu.enable); DEFINE_STATIC_CALL_NULL(x86_pmu_disable, *x86_pmu.disable); +DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign); + DEFINE_STATIC_CALL_NULL(x86_pmu_add, *x86_pmu.add); DEFINE_STATIC_CALL_NULL(x86_pmu_del, *x86_pmu.del); DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read); @@ -1177,6 +1179,8 @@ static inline void x86_assign_hw_event(struct perf_event *event, hwc->last_cpu = smp_processor_id(); hwc->last_tag = ++cpuc->tags[i]; + static_call_cond(x86_pmu_assign)(event, idx); + switch (hwc->idx) { case INTEL_PMC_IDX_FIXED_BTS: case INTEL_PMC_IDX_FIXED_VLBR: @@ -1936,6 +1940,8 @@ static void x86_pmu_static_call_update(void) static_call_update(x86_pmu_enable, x86_pmu.enable); static_call_update(x86_pmu_disable, x86_pmu.disable); + static_call_update(x86_pmu_assign, x86_pmu.assign); + static_call_update(x86_pmu_add, x86_pmu.add); static_call_update(x86_pmu_del, x86_pmu.del); static_call_update(x86_pmu_read, x86_pmu.read); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 8de383e4273018a6ca32e39b0ad74f79598b70cd..82591493fb0424f08ed4b09f48b5f7f0d86f5295 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2393,6 +2393,12 @@ static void intel_pmu_disable_event(struct perf_event *event) intel_pmu_pebs_disable(event); } +static void intel_pmu_assign_event(struct perf_event *event, int idx) +{ + if (is_pebs_pt(event)) + perf_report_aux_output_id(event, idx); +} + static void intel_pmu_del_event(struct perf_event *event) { if (needs_branch_stack(event)) @@ -4455,8 +4461,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value) return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0; } +static void intel_aux_output_init(void) +{ + /* Refer also intel_pmu_aux_output_match() */ + if (x86_pmu.intel_cap.pebs_output_pt_available) + x86_pmu.assign = intel_pmu_assign_event; +} + static int intel_pmu_aux_output_match(struct perf_event *event) { + /* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */ if (!x86_pmu.intel_cap.pebs_output_pt_available) return 0; @@ -5958,6 +5972,8 @@ __init int intel_pmu_init(void) if (x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + intel_aux_output_init(); + return 0; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 11d9ab7bc792b4adb15a0d44a7dba80911086e87..830317dc57207d75aaf8d134b82696fea75c4ad7 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -650,6 +650,7 @@ struct x86_pmu { void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); + void (*assign)(struct perf_event *event, int idx); void (*add)(struct perf_event *); void (*del)(struct perf_event *); void (*read)(struct perf_event *event); diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile index c2d6456953ad7410781629926d3086a55dd85769..a0984ed8899647096590fdbf46fbb7e58208d7e2 100644 --- a/drivers/hwtracing/coresight/Makefile +++ b/drivers/hwtracing/coresight/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_CORESIGHT) += coresight.o coresight-y := coresight-core.o coresight-etm-perf.o coresight-platform.o \ - coresight-sysfs.o + coresight-sysfs.o coresight-trace-id.o obj-$(CONFIG_CORESIGHT_LINK_AND_SINK_TMC) += coresight-tmc.o coresight-tmc-y := coresight-tmc-core.o coresight-tmc-etf.o \ coresight-tmc-etr.o diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index ee7b5cfcf786ba5d4a68d8c93ed2b87c4f1356a1..f7e2a684793abae1bea73c4686b711308f14a8a9 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -111,45 +111,6 @@ struct coresight_device *coresight_get_percpu_sink(int cpu) } EXPORT_SYMBOL_GPL(coresight_get_percpu_sink); -static int coresight_id_match(struct device *dev, void *data) -{ - int trace_id, i_trace_id; - struct coresight_device *csdev, *i_csdev; - - csdev = data; - i_csdev = to_coresight_device(dev); - - /* - * No need to care about oneself and components that are not - * sources or not enabled - */ - if (i_csdev == csdev || !i_csdev->enable || - i_csdev->type != CORESIGHT_DEV_TYPE_SOURCE) - return 0; - - /* Get the source ID for both compoment */ - trace_id = source_ops(csdev)->trace_id(csdev); - i_trace_id = source_ops(i_csdev)->trace_id(i_csdev); - - /* All you need is one */ - if (trace_id == i_trace_id) - return 1; - - return 0; -} - -static int coresight_source_is_unique(struct coresight_device *csdev) -{ - int trace_id = source_ops(csdev)->trace_id(csdev); - - /* this shouldn't happen */ - if (trace_id < 0) - return 0; - - return !bus_for_each_dev(&coresight_bustype, NULL, - csdev, coresight_id_match); -} - static int coresight_find_link_inport(struct coresight_device *csdev, struct coresight_device *parent) { @@ -458,12 +419,6 @@ static int coresight_enable_source(struct coresight_device *csdev, u32 mode) { int ret; - if (!coresight_source_is_unique(csdev)) { - dev_warn(&csdev->dev, "traceID %d not unique\n", - source_ops(csdev)->trace_id(csdev)); - return -EINVAL; - } - if (!csdev->enable) { if (source_ops(csdev)->enable) { ret = coresight_control_assoc_ectdev(csdev, true); diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 34a920fbb08d45b6ca9b9d9686d63488c65839f9..04829701c67559f9c91d5747a38ffaacf08ec9ba 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -4,6 +4,7 @@ * Author: Mathieu Poirier */ +#include #include #include #include @@ -20,6 +21,7 @@ #include "coresight-etm-perf.h" #include "coresight-priv.h" +#include "coresight-trace-id.h" static struct pmu etm_pmu; static bool etm_perf_up; @@ -175,8 +177,12 @@ static void free_event_data(struct work_struct *work) if (!(IS_ERR_OR_NULL(*ppath))) coresight_release_path(*ppath); *ppath = NULL; + coresight_trace_id_put_cpu_id(cpu); } + /* mark perf event as done for trace id allocator */ + coresight_trace_id_perf_stop(); + free_percpu(event_data->path); kfree(event_data); } @@ -228,6 +234,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, { u32 id; int cpu = event->cpu; + int trace_id; cpumask_t *mask; struct coresight_device *sink = NULL; struct etm_event_data *event_data = NULL; @@ -243,6 +250,9 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, sink = coresight_get_sink_by_id(id); } + /* tell the trace ID allocator that a perf event is starting up */ + coresight_trace_id_perf_start(); + mask = &event_data->mask; /* @@ -288,6 +298,14 @@ static void *etm_setup_aux(struct perf_event *event, void **pages, continue; } + /* ensure we can allocate a trace ID for this CPU */ + trace_id = coresight_trace_id_get_cpu_id(cpu); + if (!IS_VALID_CS_TRACE_ID(trace_id)) { + cpumask_clear_cpu(cpu, mask); + coresight_release_path(path); + continue; + } + *etm_event_cpu_path_ptr(event_data, cpu) = path; } @@ -327,6 +345,7 @@ static void etm_event_start(struct perf_event *event, int flags) struct perf_output_handle *handle = &ctxt->handle; struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); struct list_head *path; + u64 hw_id; if (!csdev) goto fail; @@ -372,6 +391,19 @@ static void etm_event_start(struct perf_event *event, int flags) if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) goto fail_disable_path; + /* + * output cpu / trace ID in perf record, once for the lifetime + * of the event. + */ + if (!cpumask_test_cpu(cpu, &event_data->aux_hwid_done)) { + cpumask_set_cpu(cpu, &event_data->aux_hwid_done); + hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK, + CS_AUX_HW_ID_CURR_VERSION); + hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK, + coresight_trace_id_read_cpu_id(cpu)); + perf_report_aux_output_id(event, hw_id); + } + out: /* Tell the perf core the event is alive */ event->hw.state = 0; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index 3e4f2ad5e193dc4ff0e23ad14b4d598eee9672eb..edeb915515501883602a5274723e383f0be4d41e 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -47,12 +47,14 @@ struct etm_filters { * struct etm_event_data - Coresight specifics associated to an event * @work: Handle to free allocated memory outside IRQ context. * @mask: Hold the CPU(s) this event was set for. + * @aux_hwid_done: Whether a CPU has emitted the TraceID packet or not. * @snk_config: The sink configuration. * @path: An array of path, each slot for one CPU. */ struct etm_event_data { struct work_struct work; cpumask_t mask; + cpumask_t aux_hwid_done; void *snk_config; struct list_head * __percpu *path; }; diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h index f3ab96eaf44e5446e389d64d7ba836be070198cd..9a0d08b092ae7f5a26a515d89428be4c00f1cac1 100644 --- a/drivers/hwtracing/coresight/coresight-etm.h +++ b/drivers/hwtracing/coresight/coresight-etm.h @@ -283,8 +283,9 @@ static inline unsigned int etm_readl(struct etm_drvdata *drvdata, u32 off) } extern const struct attribute_group *coresight_etm_groups[]; -int etm_get_trace_id(struct etm_drvdata *drvdata); void etm_set_default(struct etm_config *config); void etm_config_trace_mode(struct etm_config *config); struct etm_config *get_etm_config(struct etm_drvdata *drvdata); +int etm_read_alloc_trace_id(struct etm_drvdata *drvdata); +void etm_release_trace_id(struct etm_drvdata *drvdata); #endif diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c index cf64ce73a7412136a358ade02ab0cc7bacaddbf9..d970d578b5be8e912b0f244b958458467d7fab04 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c @@ -32,6 +32,7 @@ #include "coresight-etm.h" #include "coresight-etm-perf.h" +#include "coresight-trace-id.h" /* * Not really modular but using module_param is the easiest way to @@ -450,52 +451,59 @@ static int etm_cpu_id(struct coresight_device *csdev) return drvdata->cpu; } -int etm_get_trace_id(struct etm_drvdata *drvdata) +int etm_read_alloc_trace_id(struct etm_drvdata *drvdata) { - unsigned long flags; - int trace_id = -1; - struct device *etm_dev; + int trace_id; - if (!drvdata) - goto out; - - etm_dev = drvdata->csdev->dev.parent; - if (!local_read(&drvdata->mode)) - return drvdata->traceid; - - pm_runtime_get_sync(etm_dev); - - spin_lock_irqsave(&drvdata->spinlock, flags); - - CS_UNLOCK(drvdata->base); - trace_id = (etm_readl(drvdata, ETMTRACEIDR) & ETM_TRACEID_MASK); - CS_LOCK(drvdata->base); - - spin_unlock_irqrestore(&drvdata->spinlock, flags); - pm_runtime_put(etm_dev); - -out: + /* + * This will allocate a trace ID to the cpu, + * or return the one currently allocated. + * + * trace id function has its own lock + */ + trace_id = coresight_trace_id_get_cpu_id(drvdata->cpu); + if (IS_VALID_CS_TRACE_ID(trace_id)) + drvdata->traceid = (u8)trace_id; + else + dev_err(&drvdata->csdev->dev, + "Failed to allocate trace ID for %s on CPU%d\n", + dev_name(&drvdata->csdev->dev), drvdata->cpu); return trace_id; - } -static int etm_trace_id(struct coresight_device *csdev) +void etm_release_trace_id(struct etm_drvdata *drvdata) { - struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - return etm_get_trace_id(drvdata); + coresight_trace_id_put_cpu_id(drvdata->cpu); } static int etm_enable_perf(struct coresight_device *csdev, struct perf_event *event) { struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int trace_id; if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) return -EINVAL; /* Configure the tracer based on the session's specifics */ etm_parse_event_config(drvdata, event); + + /* + * perf allocates cpu ids as part of _setup_aux() - device needs to use + * the allocated ID. This reads the current version without allocation. + * + * This does not use the trace id lock to prevent lock_dep issues + * with perf locks - we know the ID cannot change until perf shuts down + * the session + */ + trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu); + if (!IS_VALID_CS_TRACE_ID(trace_id)) { + dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n", + dev_name(&drvdata->csdev->dev), drvdata->cpu); + return -EINVAL; + } + drvdata->traceid = (u8)trace_id; + /* And enable it */ return etm_enable_hw(drvdata); } @@ -508,6 +516,11 @@ static int etm_enable_sysfs(struct coresight_device *csdev) spin_lock(&drvdata->spinlock); + /* sysfs needs to allocate and set a trace ID */ + ret = etm_read_alloc_trace_id(drvdata); + if (ret < 0) + goto unlock_enable_sysfs; + /* * Configure the ETM only if the CPU is online. If it isn't online * hw configuration will take place on the local CPU during bring up. @@ -524,6 +537,10 @@ static int etm_enable_sysfs(struct coresight_device *csdev) ret = -ENODEV; } + if (ret) + etm_release_trace_id(drvdata); + +unlock_enable_sysfs: spin_unlock(&drvdata->spinlock); if (!ret) @@ -607,6 +624,12 @@ static void etm_disable_perf(struct coresight_device *csdev) coresight_disclaim_device_unlocked(csdev); CS_LOCK(drvdata->base); + + /* + * perf will release trace ids when _free_aux() + * is called at the end of the session + */ + } static void etm_disable_sysfs(struct coresight_device *csdev) @@ -631,6 +654,13 @@ static void etm_disable_sysfs(struct coresight_device *csdev) spin_unlock(&drvdata->spinlock); cpus_read_unlock(); + /* + * we only release trace IDs when resetting sysfs. + * This permits sysfs users to read the trace ID after the trace + * session has completed. This maintains operational behaviour with + * prior trace id allocation method + */ + dev_dbg(&csdev->dev, "ETM tracing disabled\n"); } @@ -667,7 +697,6 @@ static void etm_disable(struct coresight_device *csdev, static const struct coresight_ops_source etm_source_ops = { .cpu_id = etm_cpu_id, - .trace_id = etm_trace_id, .enable = etm_enable, .disable = etm_disable, }; @@ -777,11 +806,6 @@ static void etm_init_arch_data(void *info) CS_LOCK(drvdata->base); } -static void etm_init_trace_id(struct etm_drvdata *drvdata) -{ - drvdata->traceid = coresight_get_trace_id(drvdata->cpu); -} - static int __init etm_hp_setup(void) { int ret; @@ -867,7 +891,6 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id) if (etm_arch_supported(drvdata->arch) == false) return -EINVAL; - etm_init_trace_id(drvdata); etm_set_default(&drvdata->config); pdata = coresight_get_platform_data(dev); diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c index 8125dafe39288b9fcb36b3b6261e553512aba825..9257934a733fd047208e73b4dadb7d6c0f00e4e3 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c @@ -85,6 +85,7 @@ static ssize_t reset_store(struct device *dev, } etm_set_default(config); + etm_release_trace_id(drvdata); spin_unlock(&drvdata->spinlock); } @@ -1189,30 +1190,16 @@ static DEVICE_ATTR_RO(cpu); static ssize_t traceid_show(struct device *dev, struct device_attribute *attr, char *buf) { - unsigned long val; - struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); - - val = etm_get_trace_id(drvdata); - - return sprintf(buf, "%#lx\n", val); -} - -static ssize_t traceid_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - int ret; - unsigned long val; + int trace_id; struct etm_drvdata *drvdata = dev_get_drvdata(dev->parent); - ret = kstrtoul(buf, 16, &val); - if (ret) - return ret; + trace_id = etm_read_alloc_trace_id(drvdata); + if (trace_id < 0) + return trace_id; - drvdata->traceid = val & ETM_TRACEID_MASK; - return size; + return sysfs_emit(buf, "%#x\n", trace_id); } -static DEVICE_ATTR_RW(traceid); +static DEVICE_ATTR_RO(traceid); static struct attribute *coresight_etm_attrs[] = { &dev_attr_nr_addr_cmp.attr, diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 46522839e68434d4465b6cd98f0175a0593329e4..870c628047b4183d669445640208724570ed527b 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -40,6 +40,7 @@ #include "coresight-etm4x.h" #include "coresight-etm-perf.h" #include "coresight-self-hosted-trace.h" +#include "coresight-trace-id.h" static int boot_enable; module_param(boot_enable, int, 0444); @@ -208,11 +209,28 @@ static int etm4_cpu_id(struct coresight_device *csdev) return drvdata->cpu; } -static int etm4_trace_id(struct coresight_device *csdev) +int etm4_read_alloc_trace_id(struct etmv4_drvdata *drvdata) { - struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int trace_id; + + /* + * This will allocate a trace ID to the cpu, + * or return the one currently allocated. + * The trace id function has its own lock + */ + trace_id = coresight_trace_id_get_cpu_id(drvdata->cpu); + if (IS_VALID_CS_TRACE_ID(trace_id)) + drvdata->trcid = (u8)trace_id; + else + dev_err(&drvdata->csdev->dev, + "Failed to allocate trace ID for %s on CPU%d\n", + dev_name(&drvdata->csdev->dev), drvdata->cpu); + return trace_id; +} - return drvdata->trcid; +void etm4_release_trace_id(struct etmv4_drvdata *drvdata) +{ + coresight_trace_id_put_cpu_id(drvdata->cpu); } struct etm4_enable_arg { @@ -692,7 +710,7 @@ static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, static int etm4_enable_perf(struct coresight_device *csdev, struct perf_event *event) { - int ret = 0; + int ret = 0, trace_id; struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) { @@ -704,6 +722,24 @@ static int etm4_enable_perf(struct coresight_device *csdev, ret = etm4_parse_event_config(drvdata, event); if (ret) goto out; + + /* + * perf allocates cpu ids as part of _setup_aux() - device needs to use + * the allocated ID. This reads the current version without allocation. + * + * This does not use the trace id lock to prevent lock_dep issues + * with perf locks - we know the ID cannot change until perf shuts down + * the session + */ + trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu); + if (!IS_VALID_CS_TRACE_ID(trace_id)) { + dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n", + dev_name(&drvdata->csdev->dev), drvdata->cpu); + ret = -EINVAL; + goto out; + } + drvdata->trcid = (u8)trace_id; + /* And enable it */ ret = etm4_enable_hw(drvdata); @@ -719,6 +755,11 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) spin_lock(&drvdata->spinlock); + /* sysfs needs to read and allocate a trace ID */ + ret = etm4_read_alloc_trace_id(drvdata); + if (ret < 0) + goto unlock_sysfs_enable; + /* * Executing etm4_enable_hw on the cpu whose ETM is being enabled * ensures that register writes occur when cpu is powered. @@ -730,6 +771,11 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) ret = arg.rc; if (!ret) drvdata->sticky_enable = true; + + if (ret) + etm4_release_trace_id(drvdata); + +unlock_sysfs_enable: spin_unlock(&drvdata->spinlock); if (!ret) @@ -854,6 +900,11 @@ static int etm4_disable_perf(struct coresight_device *csdev, /* TRCVICTLR::SSSTATUS, bit[9] */ filters->ssstatus = (control & BIT(9)); + /* + * perf will release trace ids when _free_aux() is + * called at the end of the session. + */ + return 0; } @@ -879,6 +930,13 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) spin_unlock(&drvdata->spinlock); cpus_read_unlock(); + /* + * we only release trace IDs when resetting sysfs. + * This permits sysfs users to read the trace ID after the trace + * session has completed. This maintains operational behaviour with + * prior trace id allocation method + */ + dev_dbg(&csdev->dev, "ETM tracing disabled\n"); } @@ -912,7 +970,6 @@ static void etm4_disable(struct coresight_device *csdev, static const struct coresight_ops_source etm4_source_ops = { .cpu_id = etm4_cpu_id, - .trace_id = etm4_trace_id, .enable = etm4_enable, .disable = etm4_disable, }; @@ -1567,11 +1624,6 @@ static int etm4_dying_cpu(unsigned int cpu) return 0; } -static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) -{ - drvdata->trcid = coresight_get_trace_id(drvdata->cpu); -} - static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) { int i, ret = 0; @@ -1977,7 +2029,6 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid) if (!desc.name) return -ENOMEM; - etm4_init_trace_id(drvdata); etm4_set_default(&drvdata->config); pdata = coresight_get_platform_data(dev); diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index 4ce3927024aef282db4b9b9cdd14384b9c2a51d7..29479488004af377f6335c85903e665a6ef26928 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -265,10 +265,11 @@ static ssize_t reset_store(struct device *dev, config->vmid_mask0 = 0x0; config->vmid_mask1 = 0x0; - drvdata->trcid = drvdata->cpu + 1; - spin_unlock(&drvdata->spinlock); + /* for sysfs - only release trace id when resetting */ + etm4_release_trace_id(drvdata); + return size; } static DEVICE_ATTR_WO(reset); @@ -2322,6 +2323,25 @@ static struct attribute *coresight_etmv4_attrs[] = { NULL, }; +/* + * Trace ID allocated dynamically on enable - but also allocate on read + * in case sysfs or perf read before enable to ensure consistent metadata + * information for trace decode + */ +static ssize_t trctraceid_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int trace_id; + struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent); + + trace_id = etm4_read_alloc_trace_id(drvdata); + if (trace_id < 0) + return trace_id; + + return sysfs_emit(buf, "0x%x\n", trace_id); +} + struct etmv4_reg { struct coresight_device *csdev; u32 offset; @@ -2437,13 +2457,23 @@ coresight_etm4x_attr_reg_implemented(struct kobject *kobj, return 0; } -#define coresight_etm4x_reg(name, offset) \ - &((struct dev_ext_attribute[]) { \ - { \ - __ATTR(name, 0444, coresight_etm4x_reg_show, NULL), \ - (void *)(unsigned long)offset \ - } \ - })[0].attr.attr +/* + * Macro to set an RO ext attribute with offset and show function. + * Offset is used in mgmt group to ensure only correct registers for + * the ETM / ETE variant are visible. + */ +#define coresight_etm4x_reg_showfn(name, offset, showfn) ( \ + &((struct dev_ext_attribute[]) { \ + { \ + __ATTR(name, 0444, showfn, NULL), \ + (void *)(unsigned long)offset \ + } \ + })[0].attr.attr \ + ) + +/* macro using the default coresight_etm4x_reg_show function */ +#define coresight_etm4x_reg(name, offset) \ + coresight_etm4x_reg_showfn(name, offset, coresight_etm4x_reg_show) static struct attribute *coresight_etmv4_mgmt_attrs[] = { coresight_etm4x_reg(trcpdcr, TRCPDCR), @@ -2458,7 +2488,7 @@ static struct attribute *coresight_etmv4_mgmt_attrs[] = { coresight_etm4x_reg(trcpidr3, TRCPIDR3), coresight_etm4x_reg(trcoslsr, TRCOSLSR), coresight_etm4x_reg(trcconfig, TRCCONFIGR), - coresight_etm4x_reg(trctraceid, TRCTRACEIDR), + coresight_etm4x_reg_showfn(trctraceid, TRCTRACEIDR, trctraceid_show), coresight_etm4x_reg(trcdevarch, TRCDEVARCH), NULL, }; diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 894041f69842430d833ded3baf6f72cf26cbb1d7..97669ea32653e33e751fb0f74a35b3eea6f41078 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -987,4 +987,7 @@ static inline bool etm4x_is_ete(struct etmv4_drvdata *drvdata) { return drvdata->arch >= ETM_ARCH_ETE; } + +int etm4_read_alloc_trace_id(struct etmv4_drvdata *drvdata); +void etm4_release_trace_id(struct etmv4_drvdata *drvdata); #endif diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index be473655ac86ab4c9855a0dfcd909073898ba6cc..3d95da870e639b7d2a170dc7f234931411f89cd8 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -31,6 +31,7 @@ #include #include "coresight-priv.h" +#include "coresight-trace-id.h" #define STMDMASTARTR 0xc04 #define STMDMASTOPR 0xc08 @@ -280,15 +281,7 @@ static void stm_disable(struct coresight_device *csdev, } } -static int stm_trace_id(struct coresight_device *csdev) -{ - struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - - return drvdata->traceid; -} - static const struct coresight_ops_source stm_source_ops = { - .trace_id = stm_trace_id, .enable = stm_enable, .disable = stm_disable, }; @@ -615,24 +608,7 @@ static ssize_t traceid_show(struct device *dev, val = drvdata->traceid; return sprintf(buf, "%#lx\n", val); } - -static ssize_t traceid_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t size) -{ - int ret; - unsigned long val; - struct stm_drvdata *drvdata = dev_get_drvdata(dev->parent); - - ret = kstrtoul(buf, 16, &val); - if (ret) - return ret; - - /* traceid field is 7bit wide on STM32 */ - drvdata->traceid = val & 0x7f; - return size; -} -static DEVICE_ATTR_RW(traceid); +static DEVICE_ATTR_RO(traceid); static struct attribute *coresight_stm_attrs[] = { &dev_attr_hwevent_enable.attr, @@ -803,14 +779,6 @@ static void stm_init_default_data(struct stm_drvdata *drvdata) */ drvdata->stmsper = ~0x0; - /* - * The trace ID value for *ETM* tracers start at CPU_ID * 2 + 0x10 and - * anything equal to or higher than 0x70 is reserved. Since 0x00 is - * also reserved the STM trace ID needs to be higher than 0x00 and - * lowner than 0x10. - */ - drvdata->traceid = 0x1; - /* Set invariant transaction timing on all channels */ bitmap_clear(drvdata->chs.guaranteed, 0, drvdata->numsp); } @@ -838,7 +806,7 @@ static void stm_init_generic_data(struct stm_drvdata *drvdata, static int stm_probe(struct amba_device *adev, const struct amba_id *id) { - int ret; + int ret, trace_id; void __iomem *base; unsigned long *guaranteed; struct device *dev = &adev->dev; @@ -926,12 +894,22 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id) goto stm_unregister; } + trace_id = coresight_trace_id_get_system_id(); + if (trace_id < 0) { + ret = trace_id; + goto cs_unregister; + } + drvdata->traceid = (u8)trace_id; + pm_runtime_put(&adev->dev); dev_info(&drvdata->csdev->dev, "%s initialized\n", (char *)coresight_get_uci_data(id)); return 0; +cs_unregister: + coresight_unregister(drvdata->csdev); + stm_unregister: stm_unregister_device(&drvdata->stm); return ret; @@ -941,6 +919,7 @@ static void stm_remove(struct amba_device *adev) { struct stm_drvdata *drvdata = dev_get_drvdata(&adev->dev); + coresight_trace_id_put_system_id(drvdata->traceid); coresight_unregister(drvdata->csdev); stm_unregister_device(&drvdata->stm); diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c new file mode 100644 index 0000000000000000000000000000000000000000..af5b4ef59ceab83a1a3e18239f370307e53660e3 --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-trace-id.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022, Linaro Limited, All rights reserved. + * Author: Mike Leach + */ +#include +#include +#include +#include +#include + +#include "coresight-trace-id.h" + +/* Default trace ID map. Used on systems that don't require per sink mappings */ +static struct coresight_trace_id_map id_map_default; + +/* maintain a record of the mapping of IDs and pending releases per cpu */ +static DEFINE_PER_CPU(atomic_t, cpu_id) = ATOMIC_INIT(0); +static cpumask_t cpu_id_release_pending; + +/* perf session active counter */ +static atomic_t perf_cs_etm_session_active = ATOMIC_INIT(0); + +/* lock to protect id_map and cpu data */ +static DEFINE_SPINLOCK(id_map_lock); + +/* #define TRACE_ID_DEBUG 1 */ +#if defined(TRACE_ID_DEBUG) || defined(CONFIG_COMPILE_TEST) + +static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map, + const char *func_name) +{ + pr_debug("%s id_map::\n", func_name); + pr_debug("Used = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->used_ids); + pr_debug("Pend = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->pend_rel_ids); +} +#define DUMP_ID_MAP(map) coresight_trace_id_dump_table(map, __func__) +#define DUMP_ID_CPU(cpu, id) pr_debug("%s called; cpu=%d, id=%d\n", __func__, cpu, id) +#define DUMP_ID(id) pr_debug("%s called; id=%d\n", __func__, id) +#define PERF_SESSION(n) pr_debug("%s perf count %d\n", __func__, n) +#else +#define DUMP_ID_MAP(map) +#define DUMP_ID(id) +#define DUMP_ID_CPU(cpu, id) +#define PERF_SESSION(n) +#endif + +/* unlocked read of current trace ID value for given CPU */ +static int _coresight_trace_id_read_cpu_id(int cpu) +{ + return atomic_read(&per_cpu(cpu_id, cpu)); +} + +/* look for next available odd ID, return 0 if none found */ +static int coresight_trace_id_find_odd_id(struct coresight_trace_id_map *id_map) +{ + int found_id = 0, bit = 1, next_id; + + while ((bit < CORESIGHT_TRACE_ID_RES_TOP) && !found_id) { + /* + * bitmap length of CORESIGHT_TRACE_ID_RES_TOP, + * search from offset `bit`. + */ + next_id = find_next_zero_bit(id_map->used_ids, + CORESIGHT_TRACE_ID_RES_TOP, bit); + if ((next_id < CORESIGHT_TRACE_ID_RES_TOP) && (next_id & 0x1)) + found_id = next_id; + else + bit = next_id + 1; + } + return found_id; +} + +/* + * Allocate new ID and set in use + * + * if @preferred_id is a valid id then try to use that value if available. + * if @preferred_id is not valid and @prefer_odd_id is true, try for odd id. + * + * Otherwise allocate next available ID. + */ +static int coresight_trace_id_alloc_new_id(struct coresight_trace_id_map *id_map, + int preferred_id, bool prefer_odd_id) +{ + int id = 0; + + /* for backwards compatibility, cpu IDs may use preferred value */ + if (IS_VALID_CS_TRACE_ID(preferred_id) && + !test_bit(preferred_id, id_map->used_ids)) { + id = preferred_id; + goto trace_id_allocated; + } else if (prefer_odd_id) { + /* may use odd ids to avoid preferred legacy cpu IDs */ + id = coresight_trace_id_find_odd_id(id_map); + if (id) + goto trace_id_allocated; + } + + /* + * skip reserved bit 0, look at bitmap length of + * CORESIGHT_TRACE_ID_RES_TOP from offset of bit 1. + */ + id = find_next_zero_bit(id_map->used_ids, CORESIGHT_TRACE_ID_RES_TOP, 1); + if (id >= CORESIGHT_TRACE_ID_RES_TOP) + return -EINVAL; + + /* mark as used */ +trace_id_allocated: + set_bit(id, id_map->used_ids); + return id; +} + +static void coresight_trace_id_free(int id, struct coresight_trace_id_map *id_map) +{ + if (WARN(!IS_VALID_CS_TRACE_ID(id), "Invalid Trace ID %d\n", id)) + return; + if (WARN(!test_bit(id, id_map->used_ids), "Freeing unused ID %d\n", id)) + return; + clear_bit(id, id_map->used_ids); +} + +static void coresight_trace_id_set_pend_rel(int id, struct coresight_trace_id_map *id_map) +{ + if (WARN(!IS_VALID_CS_TRACE_ID(id), "Invalid Trace ID %d\n", id)) + return; + set_bit(id, id_map->pend_rel_ids); +} + +/* + * release all pending IDs for all current maps & clear CPU associations + * + * This currently operates on the default id map, but may be extended to + * operate on all registered id maps if per sink id maps are used. + */ +static void coresight_trace_id_release_all_pending(void) +{ + struct coresight_trace_id_map *id_map = &id_map_default; + unsigned long flags; + int cpu, bit; + + spin_lock_irqsave(&id_map_lock, flags); + for_each_set_bit(bit, id_map->pend_rel_ids, CORESIGHT_TRACE_ID_RES_TOP) { + clear_bit(bit, id_map->used_ids); + clear_bit(bit, id_map->pend_rel_ids); + } + for_each_cpu(cpu, &cpu_id_release_pending) { + atomic_set(&per_cpu(cpu_id, cpu), 0); + cpumask_clear_cpu(cpu, &cpu_id_release_pending); + } + spin_unlock_irqrestore(&id_map_lock, flags); + DUMP_ID_MAP(id_map); +} + +static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_map *id_map) +{ + unsigned long flags; + int id; + + spin_lock_irqsave(&id_map_lock, flags); + + /* check for existing allocation for this CPU */ + id = _coresight_trace_id_read_cpu_id(cpu); + if (id) + goto get_cpu_id_clr_pend; + + /* + * Find a new ID. + * + * Use legacy values where possible in the dynamic trace ID allocator to + * allow older tools to continue working if they are not upgraded at the + * same time as the kernel drivers. + * + * If the generated legacy ID is invalid, or not available then the next + * available dynamic ID will be used. + */ + id = coresight_trace_id_alloc_new_id(id_map, + CORESIGHT_LEGACY_CPU_TRACE_ID(cpu), + false); + if (!IS_VALID_CS_TRACE_ID(id)) + goto get_cpu_id_out_unlock; + + /* allocate the new id to the cpu */ + atomic_set(&per_cpu(cpu_id, cpu), id); + +get_cpu_id_clr_pend: + /* we are (re)using this ID - so ensure it is not marked for release */ + cpumask_clear_cpu(cpu, &cpu_id_release_pending); + clear_bit(id, id_map->pend_rel_ids); + +get_cpu_id_out_unlock: + spin_unlock_irqrestore(&id_map_lock, flags); + + DUMP_ID_CPU(cpu, id); + DUMP_ID_MAP(id_map); + return id; +} + +static void coresight_trace_id_map_put_cpu_id(int cpu, struct coresight_trace_id_map *id_map) +{ + unsigned long flags; + int id; + + /* check for existing allocation for this CPU */ + id = _coresight_trace_id_read_cpu_id(cpu); + if (!id) + return; + + spin_lock_irqsave(&id_map_lock, flags); + + if (atomic_read(&perf_cs_etm_session_active)) { + /* set release at pending if perf still active */ + coresight_trace_id_set_pend_rel(id, id_map); + cpumask_set_cpu(cpu, &cpu_id_release_pending); + } else { + /* otherwise clear id */ + coresight_trace_id_free(id, id_map); + atomic_set(&per_cpu(cpu_id, cpu), 0); + } + + spin_unlock_irqrestore(&id_map_lock, flags); + DUMP_ID_CPU(cpu, id); + DUMP_ID_MAP(id_map); +} + +static int coresight_trace_id_map_get_system_id(struct coresight_trace_id_map *id_map) +{ + unsigned long flags; + int id; + + spin_lock_irqsave(&id_map_lock, flags); + /* prefer odd IDs for system components to avoid legacy CPU IDS */ + id = coresight_trace_id_alloc_new_id(id_map, 0, true); + spin_unlock_irqrestore(&id_map_lock, flags); + + DUMP_ID(id); + DUMP_ID_MAP(id_map); + return id; +} + +static void coresight_trace_id_map_put_system_id(struct coresight_trace_id_map *id_map, int id) +{ + unsigned long flags; + + spin_lock_irqsave(&id_map_lock, flags); + coresight_trace_id_free(id, id_map); + spin_unlock_irqrestore(&id_map_lock, flags); + + DUMP_ID(id); + DUMP_ID_MAP(id_map); +} + +/* API functions */ + +int coresight_trace_id_get_cpu_id(int cpu) +{ + return coresight_trace_id_map_get_cpu_id(cpu, &id_map_default); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_get_cpu_id); + +void coresight_trace_id_put_cpu_id(int cpu) +{ + coresight_trace_id_map_put_cpu_id(cpu, &id_map_default); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_put_cpu_id); + +int coresight_trace_id_read_cpu_id(int cpu) +{ + return _coresight_trace_id_read_cpu_id(cpu); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_read_cpu_id); + +int coresight_trace_id_get_system_id(void) +{ + return coresight_trace_id_map_get_system_id(&id_map_default); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_get_system_id); + +void coresight_trace_id_put_system_id(int id) +{ + coresight_trace_id_map_put_system_id(&id_map_default, id); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_put_system_id); + +void coresight_trace_id_perf_start(void) +{ + atomic_inc(&perf_cs_etm_session_active); + PERF_SESSION(atomic_read(&perf_cs_etm_session_active)); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_perf_start); + +void coresight_trace_id_perf_stop(void) +{ + if (!atomic_dec_return(&perf_cs_etm_session_active)) + coresight_trace_id_release_all_pending(); + PERF_SESSION(atomic_read(&perf_cs_etm_session_active)); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_perf_stop); diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h new file mode 100644 index 0000000000000000000000000000000000000000..3797777d367e6fb9ad18a43b9ce07d411fb115ee --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-trace-id.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright(C) 2022 Linaro Limited. All rights reserved. + * Author: Mike Leach + */ + +#ifndef _CORESIGHT_TRACE_ID_H +#define _CORESIGHT_TRACE_ID_H + +/* + * Coresight trace ID allocation API + * + * With multi cpu systems, and more additional trace sources a scalable + * trace ID reservation system is required. + * + * The system will allocate Ids on a demand basis, and allow them to be + * released when done. + * + * In order to ensure that a consistent cpu / ID matching is maintained + * throughout a perf cs_etm event session - a session in progress flag will + * be maintained, and released IDs not cleared until the perf session is + * complete. This allows the same CPU to be re-allocated its prior ID. + * + * + * Trace ID maps will be created and initialised to prevent architecturally + * reserved IDs from being allocated. + * + * API permits multiple maps to be maintained - for large systems where + * different sets of cpus trace into different independent sinks. + */ + +#include +#include + + +/* architecturally we have 128 IDs some of which are reserved */ +#define CORESIGHT_TRACE_IDS_MAX 128 + +/* ID 0 is reserved */ +#define CORESIGHT_TRACE_ID_RES_0 0 + +/* ID 0x70 onwards are reserved */ +#define CORESIGHT_TRACE_ID_RES_TOP 0x70 + +/* check an ID is in the valid range */ +#define IS_VALID_CS_TRACE_ID(id) \ + ((id > CORESIGHT_TRACE_ID_RES_0) && (id < CORESIGHT_TRACE_ID_RES_TOP)) + +/** + * Trace ID map. + * + * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs. + * Initialised so that the reserved IDs are permanently marked as + * in use. + * @pend_rel_ids: CPU IDs that have been released by the trace source but not + * yet marked as available, to allow re-allocation to the same + * CPU during a perf session. + */ +struct coresight_trace_id_map { + DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX); + DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX); +}; + +/* Allocate and release IDs for a single default trace ID map */ + +/** + * Read and optionally allocate a CoreSight trace ID and associate with a CPU. + * + * Function will read the current trace ID for the associated CPU, + * allocating an new ID if one is not currently allocated. + * + * Numeric ID values allocated use legacy allocation algorithm if possible, + * otherwise any available ID is used. + * + * @cpu: The CPU index to allocate for. + * + * return: CoreSight trace ID or -EINVAL if allocation impossible. + */ +int coresight_trace_id_get_cpu_id(int cpu); + +/** + * Release an allocated trace ID associated with the CPU. + * + * This will release the CoreSight trace ID associated with the CPU, + * unless a perf session is in operation. + * + * If a perf session is in operation then the ID will be marked as pending + * release. + * + * @cpu: The CPU index to release the associated trace ID. + */ +void coresight_trace_id_put_cpu_id(int cpu); + +/** + * Read the current allocated CoreSight Trace ID value for the CPU. + * + * Fast read of the current value that does not allocate if no ID allocated + * for the CPU. + * + * Used in perf context where it is known that the value for the CPU will not + * be changing, when perf starts and event on a core and outputs the Trace ID + * for the CPU as a packet in the data file. IDs cannot change during a perf + * session. + * + * This function does not take the lock protecting the ID lists, avoiding + * locking dependency issues with perf locks. + * + * @cpu: The CPU index to read. + * + * return: current value, will be 0 if unallocated. + */ +int coresight_trace_id_read_cpu_id(int cpu); + +/** + * Allocate a CoreSight trace ID for a system component. + * + * Unconditionally allocates a Trace ID, without associating the ID with a CPU. + * + * Used to allocate IDs for system trace sources such as STM. + * + * return: Trace ID or -EINVAL if allocation is impossible. + */ +int coresight_trace_id_get_system_id(void); + +/** + * Release an allocated system trace ID. + * + * Unconditionally release a trace ID allocated to a system component. + * + * @id: value of trace ID allocated. + */ +void coresight_trace_id_put_system_id(int id); + +/* notifiers for perf session start and stop */ + +/** + * Notify the Trace ID allocator that a perf session is starting. + * + * Increase the perf session reference count - called by perf when setting up + * a trace event. + * + * This reference count is used by the ID allocator to ensure that trace IDs + * associated with a CPU cannot change or be released during a perf session. + */ +void coresight_trace_id_perf_start(void); + +/** + * Notify the ID allocator that a perf session is stopping. + * + * Decrease the perf session reference count. + * if this causes the count to go to zero, then all Trace IDs marked as pending + * release, will be released. + */ +void coresight_trace_id_perf_stop(void); + +#endif /* _CORESIGHT_TRACE_ID_H */ diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index 0c2cee05c4e3046badf95ad3bcdc2ac6b425822f..286dafc4a8d6fde1276c7fcbc0e47c65ef8e98e6 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -7,9 +7,19 @@ #ifndef _LINUX_CORESIGHT_PMU_H #define _LINUX_CORESIGHT_PMU_H +#include + #define CORESIGHT_ETM_PMU_NAME "cs_etm" -#define CORESIGHT_ETM_PMU_SEED 0x1 -#define CORESIGHT_ETM_CSID_MAX 0x70 + +/* + * The legacy Trace ID system based on fixed calculation from the cpu + * number. This has been replaced by drivers using a dynamic allocation + * system - but need to retain the legacy algorithm for backward comparibility + * in certain situations:- + * a) new perf running on older systems that generate the legacy mapping + * b) older tools that may not update at the same time as the kernel. + */ +#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_CYCACC 12 @@ -23,15 +33,16 @@ #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 -static inline int coresight_get_trace_id(int cpu) -{ - /* - * A trace ID of value 0 is invalid, so let's start at some - * random value that fits in 7 bits and go from there. Since - * the common convention is to have data trace IDs be I(N) + 1, - * set instruction trace IDs as a function of the CPU number. - */ - return (CORESIGHT_ETM_PMU_SEED + (cpu * 2)) % CORESIGHT_ETM_CSID_MAX; -} +/* + * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. + * Used to associate a CPU with the CoreSight Trace ID. + * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. + * [59:08] - Unused (SBZ) + * [63:60] - Version + */ +#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) +#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_CURR_VERSION 0 #endif diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 36e6913e50fd17e597cbfc565feb7f5788fdcaec..42ec64fc8d85ad97109241df03ebfaf773e5853f 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -312,14 +312,11 @@ struct coresight_ops_link { * Operations available for sources. * @cpu_id: returns the value of the CPU number this component * is associated to. - * @trace_id: returns the value of the component's trace ID as known - * to the HW. * @enable: enables tracing for a source. * @disable: disables tracing for a source. */ struct coresight_ops_source { int (*cpu_id)(struct coresight_device *csdev); - int (*trace_id)(struct coresight_device *csdev); int (*enable)(struct coresight_device *csdev, struct perf_event *event, u32 mode); void (*disable)(struct coresight_device *csdev, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 39b391d6469ac1e5fc70773c48e3f900d404455e..86fbd9a733630f3e9166b108e56dc413fe7df1b1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1433,6 +1433,7 @@ perf_event_addr_filters(struct perf_event *event) } extern void perf_event_addr_filters_sync(struct perf_event *event); +extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id); extern int perf_output_begin(struct perf_output_handle *handle, struct perf_sample_data *data, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4b0893023877c1b9683b533fd33e66e50e0ba11c..b0350710db6e41196de842702c47b340fda7b99e 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1081,6 +1081,21 @@ enum perf_event_type { */ PERF_RECORD_TEXT_POKE = 20, + /* + * Data written to the AUX area by hardware due to aux_output, may need + * to be matched to the event by an architecture-specific hardware ID. + * This records the hardware ID, but requires sample_id to provide the + * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT + * records from multiple events. + * + * struct { + * struct perf_event_header header; + * u64 hw_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 28eb8152564ae250cb52286cd17b17d4bed7a94d..061c1aaae901b5ce9c495b3ea9280c97d99f52ad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8867,6 +8867,37 @@ static void perf_log_itrace_start(struct perf_event *event) perf_output_end(&handle); } +void perf_report_aux_output_id(struct perf_event *event, u64 hw_id) +{ + struct perf_output_handle handle; + struct perf_sample_data sample; + struct perf_aux_event { + struct perf_event_header header; + u64 hw_id; + } rec; + int ret; + + if (event->parent) + event = event->parent; + + rec.header.type = PERF_RECORD_AUX_OUTPUT_HW_ID; + rec.header.misc = 0; + rec.header.size = sizeof(rec); + rec.hw_id = hw_id; + + perf_event_header__init_id(&rec.header, &sample, event); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); + + if (ret) + return; + + perf_output_put(&handle, rec); + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} +EXPORT_SYMBOL_GPL(perf_report_aux_output_id); + static int __perf_event_account_interrupt(struct perf_event *event, int throttle) { diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 3c52e434bf3acf6a778932ed94ffa22885fdd910..09d0de435470b327be8ba64575328bb895135dea 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -7,9 +7,32 @@ #ifndef _LINUX_CORESIGHT_PMU_H #define _LINUX_CORESIGHT_PMU_H +#include + #define CORESIGHT_ETM_PMU_NAME "cs_etm" -#define CORESIGHT_ETM_PMU_SEED 0x1 -#define CORESIGHT_ETM_CSID_MAX 0x70 + +/* + * The legacy Trace ID system based on fixed calculation from the cpu + * number. This has been replaced by drivers using a dynamic allocation + * system - but need to retain the legacy algorithm for backward comparibility + * in certain situations:- + * a) new perf running on older systems that generate the legacy mapping + * b) older tools that may not update at the same time as the kernel. + */ +#define CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) (0x10 + (cpu * 2)) + +/* CoreSight trace ID is currently the bottom 7 bits of the value */ +#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) + +/* + * perf record will set the legacy meta data values as unused initially. + * This allows perf report to manage the decoders created when dynamic + * allocation in operation. + */ +#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) + +/* Value to set for unused trace ID values */ +#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_BRANCH_BROADCAST 8 @@ -28,15 +51,16 @@ #define ETM4_CFG_BIT_RETSTK 12 #define ETM4_CFG_BIT_VMID_OPT 15 -static inline int coresight_get_trace_id(int cpu) -{ - /* - * A trace ID of value 0 is invalid, so let's start at some - * random value that fits in 7 bits and go from there. Since - * the common convention is to have data trace IDs be I(N) + 1, - * set instruction trace IDs as a function of the CPU number. - */ - return (CORESIGHT_ETM_PMU_SEED + (cpu * 2)) % CORESIGHT_ETM_CSID_MAX; -} +/* + * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. + * Used to associate a CPU with the CoreSight Trace ID. + * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. + * [59:08] - Unused (SBZ) + * [63:60] - Version + */ +#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) +#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_CURR_VERSION 0 #endif diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 670e925299e1eaf051dee5d2e368af464e89fdc7..63a94ce7173ced621c0150acb4156dc80b1caaf0 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -53,7 +53,15 @@ static const char * const metadata_etmv4_ro[] = { [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2", [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8", [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus", - [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch" +}; + +static const char * const metadata_ete_ro[] = { + [CS_ETE_TRCIDR0] = "trcidr/trcidr0", + [CS_ETE_TRCIDR1] = "trcidr/trcidr1", + [CS_ETE_TRCIDR2] = "trcidr/trcidr2", + [CS_ETE_TRCIDR8] = "trcidr/trcidr8", + [CS_ETE_TRCAUTHSTATUS] = "mgmt/trcauthstatus", + [CS_ETE_TRCDEVARCH] = "mgmt/trcdevarch", }; static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); @@ -415,13 +423,16 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, perf_evlist__to_front(evlist, cs_etm_evsel); /* - * In the case of per-cpu mmaps, we need the CPU on the - * AUX event. We also need the contextID in order to be notified + * get the CPU on the sample - need it to associate trace ID in the + * AUX_OUTPUT_HW_ID event, and the AUX event for per-cpu mmaps. + */ + evsel__set_sample_bit(cs_etm_evsel, CPU); + + /* + * Also the case of per-cpu mmaps, need the contextID in order to be notified * when a context switch happened. */ if (!perf_cpu_map__empty(cpus)) { - evsel__set_sample_bit(cs_etm_evsel, CPU); - err = cs_etm_set_option(itr, cs_etm_evsel, BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); if (err) @@ -608,7 +619,7 @@ static bool cs_etm_is_ete(struct auxtrace_record *itr, int cpu) { struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; - int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + int trcdevarch = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCDEVARCH]); /* * ETE if ARCHVER is 5 (ARCHVER is 4 for ETM) and ARCHPART is 0xA13. @@ -624,8 +635,10 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, /* Get trace configuration register */ data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr); - /* Get traceID from the framework */ - data[CS_ETMV4_TRCTRACEIDR] = coresight_get_trace_id(cpu); + /* traceID set to legacy version, in case new perf running on older system */ + data[CS_ETMV4_TRCTRACEIDR] = + CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG; + /* Get read-only information from sysFS */ data[CS_ETMV4_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]); @@ -639,6 +652,33 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, metadata_etmv4_ro[CS_ETMV4_TRCAUTHSTATUS]); } +static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, int cpu) +{ + struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); + struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; + + /* Get trace configuration register */ + data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr); + /* traceID set to legacy version, in case new perf running on older system */ + data[CS_ETE_TRCTRACEIDR] = + CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG; + + /* Get read-only information from sysFS */ + data[CS_ETE_TRCIDR0] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR0]); + data[CS_ETE_TRCIDR1] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR1]); + data[CS_ETE_TRCIDR2] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR2]); + data[CS_ETE_TRCIDR8] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCIDR8]); + data[CS_ETE_TRCAUTHSTATUS] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCAUTHSTATUS]); + /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ + data[CS_ETE_TRCDEVARCH] = cs_etm_get_ro(cs_etm_pmu, cpu, + metadata_ete_ro[CS_ETE_TRCDEVARCH]); +} + static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) @@ -652,11 +692,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* first see what kind of tracer this cpu is affined to */ if (cs_etm_is_ete(itr, cpu)) { magic = __perf_cs_ete_magic; - /* ETE uses the same registers as ETMv4 plus TRCDEVARCH */ - cs_etm_save_etmv4_header(&info->priv[*offset], itr, cpu); - info->priv[*offset + CS_ETE_TRCDEVARCH] = - cs_etm_get_ro(cs_etm_pmu, cpu, - metadata_etmv4_ro[CS_ETE_TRCDEVARCH]); + cs_etm_save_ete_header(&info->priv[*offset], itr, cpu); /* How much space was used */ increment = CS_ETE_PRIV_MAX; @@ -672,9 +708,9 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, magic = __perf_cs_etmv3_magic; /* Get configuration register */ info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); - /* Get traceID from the framework */ + /* traceID set to legacy value in case new perf running on old system */ info->priv[*offset + CS_ETM_ETMTRACEIDR] = - coresight_get_trace_id(cpu); + CORESIGHT_LEGACY_CPU_TRACE_ID(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG; /* Get read-only information from sysFS */ info->priv[*offset + CS_ETM_ETMCCER] = cs_etm_get_ro(cs_etm_pmu, cpu, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 6bf0547bf134c2c3af298c6bab6e75831be3a335..b4f7189ec22263cee20ab587e3ee9a007f3df71d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -117,6 +117,7 @@ ifdef CONFIG_LIBOPENCSD perf-$(CONFIG_AUXTRACE) += cs-etm.o perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ endif +perf-$(CONFIG_AUXTRACE) += cs-etm-base.o perf-y += parse-branch-options.o perf-y += dump-insn.o diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c new file mode 100644 index 0000000000000000000000000000000000000000..43e7ec008283191f3e9be61541e9ebd3f33b7add --- /dev/null +++ b/tools/perf/util/cs-etm-base.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * File for any parts of the Coresight decoding that don't require + * OpenCSD. + */ + +#include +#include + +#include "cs-etm.h" + +static const char * const cs_etm_global_header_fmts[] = { + [CS_HEADER_VERSION] = " Header version %llx\n", + [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", + [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", +}; + +static const char * const cs_etm_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", + [CS_ETM_ETMCR] = " ETMCR %llx\n", + [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", + [CS_ETM_ETMCCER] = " ETMCCER %llx\n", + [CS_ETM_ETMIDR] = " ETMIDR %llx\n", +}; + +static const char * const cs_etmv4_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", + [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", + [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", + [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", + [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", + [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", + [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", + [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", +}; + +static const char * const cs_ete_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", + [CS_ETE_TRCCONFIGR] = " TRCCONFIGR %llx\n", + [CS_ETE_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", + [CS_ETE_TRCIDR0] = " TRCIDR0 %llx\n", + [CS_ETE_TRCIDR1] = " TRCIDR1 %llx\n", + [CS_ETE_TRCIDR2] = " TRCIDR2 %llx\n", + [CS_ETE_TRCIDR8] = " TRCIDR8 %llx\n", + [CS_ETE_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", + [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n", +}; + +static const char * const param_unk_fmt = + " Unknown parameter [%d] %"PRIx64"\n"; +static const char * const magic_unk_fmt = + " Magic number Unknown %"PRIx64"\n"; + +static int cs_etm__print_cpu_metadata_v0(u64 *val, int *offset) +{ + int i = *offset, j, nr_params = 0, fmt_offset; + u64 magic; + + /* check magic value */ + magic = val[i + CS_ETM_MAGIC]; + if ((magic != __perf_cs_etmv3_magic) && + (magic != __perf_cs_etmv4_magic)) { + /* failure - note bad magic value */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + + /* print common header block */ + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); + + if (magic == __perf_cs_etmv3_magic) { + nr_params = CS_ETM_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETM_ETMCR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } else if (magic == __perf_cs_etmv4_magic) { + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETMV4_TRCCONFIGR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + *offset = i; + return 0; +} + +static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset) +{ + int i = *offset, j, total_params = 0; + u64 magic; + + magic = val[i + CS_ETM_MAGIC]; + /* total params to print is NR_PARAMS + common block size for v1 */ + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; + + if (magic == __perf_cs_etmv3_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETM_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } + } else if (magic == __perf_cs_etmv4_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETMV4_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + } else if (magic == __perf_cs_ete_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETE_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else + fprintf(stdout, cs_ete_priv_fmts[j], val[i]); + } + } else { + /* failure - note bad magic value and error out */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + *offset = i; + return 0; +} + +static void cs_etm__print_auxtrace_info(u64 *val, int num) +{ + int i, cpu = 0, version, err; + + version = val[0]; + + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { + if (version == 0) + err = cs_etm__print_cpu_metadata_v0(val, &i); + /* printing same for both, but value bit flags added on v2 */ + else if ((version == 1) || (version == 2)) + err = cs_etm__print_cpu_metadata_v1(val, &i); + if (err) + return; + } +} + +/* + * Do some basic checks and print the auxtrace info header before calling + * into cs_etm__process_auxtrace_info_full() which requires OpenCSD to be + * linked in. This allows some basic debugging if OpenCSD is missing. + */ +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; + int event_header_size = sizeof(struct perf_event_header); + int num_cpu; + u64 *ptr = NULL; + u64 hdr_version; + + if (auxtrace_info->header.size < (event_header_size + INFO_HEADER_SIZE)) + return -EINVAL; + + /* First the global part */ + ptr = (u64 *) auxtrace_info->priv; + + /* Look for version of the header */ + hdr_version = ptr[0]; + if (hdr_version > CS_HEADER_CURRENT_VERSION) { + pr_err("\nCS ETM Trace: Unknown Header Version = %#" PRIx64, hdr_version); + pr_err(", version supported <= %x\n", CS_HEADER_CURRENT_VERSION); + return -EINVAL; + } + + if (dump_trace) { + num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; + cs_etm__print_auxtrace_info(ptr, num_cpu); + } + + return cs_etm__process_auxtrace_info_full(event, session); +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 0b369969b26cb9cb70438b9154ca77bc1a1553ac..78ef69785d2d503399f9a6f862d6cc017213657d 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -624,6 +624,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, switch (t_params->protocol) { case CS_ETM_PROTO_ETMV3: case CS_ETM_PROTO_PTM: + csid = (t_params->etmv3.reg_idr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3); decoder->decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ? OCSD_BUILTIN_DCD_ETMV3 : @@ -631,11 +632,13 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, trace_config = &config_etmv3; break; case CS_ETM_PROTO_ETMV4i: + csid = (t_params->etmv4.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); decoder->decoder_name = OCSD_BUILTIN_DCD_ETMV4I; trace_config = &trace_config_etmv4; break; case CS_ETM_PROTO_ETE: + csid = (t_params->ete.reg_traceidr & CORESIGHT_TRACE_ID_VAL_MASK); cs_etm_decoder__gen_ete_config(t_params, &trace_config_ete); decoder->decoder_name = OCSD_BUILTIN_DCD_ETE; trace_config = &trace_config_ete; @@ -644,6 +647,10 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, return -1; } + /* if the CPU has no trace ID associated, no decoder needed */ + if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL) + return 0; + if (d_params->operation == CS_ETM_OPERATION_DECODE) { if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 88fee0935036dccc776732df557c294581893929..ca18c44cfec090a993d0f3d0bb0ff85a7542be3b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -193,6 +193,167 @@ int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) return 0; } +static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata) +{ + struct int_node *inode; + + /* Get an RB node for this CPU */ + inode = intlist__findnew(traceid_list, trace_chan_id); + + /* Something went wrong, no need to continue */ + if (!inode) + return -ENOMEM; + + /* + * The node for that CPU should not be taken. + * Back out if that's the case. + */ + if (inode->priv) + return -EINVAL; + + /* All good, associate the traceID with the metadata pointer */ + inode->priv = cpu_metadata; + + return 0; +} + +static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) +{ + u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; + + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + *trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] & + CORESIGHT_TRACE_ID_VAL_MASK); + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + *trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] & + CORESIGHT_TRACE_ID_VAL_MASK); + break; + default: + return -EINVAL; + } + return 0; +} + +/* + * update metadata trace ID from the value found in the AUX_HW_INFO packet. + * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. + */ +static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) +{ + u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; + + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; + break; + + default: + return -EINVAL; + } + return 0; +} + +/* + * FIELD_GET (linux/bitfield.h) not available outside kernel code, + * and the header contains too many dependencies to just copy over, + * so roll our own based on the original + */ +#define __bf_shf(x) (__builtin_ffsll(x) - 1) +#define FIELD_GET(_mask, _reg) \ + ({ \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +/* + * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. + * + * The payload associates the Trace ID and the CPU. + * The routine is tolerant of seeing multiple packets with the same association, + * but a CPU / Trace ID association changing during a session is an error. + */ +static int cs_etm__process_aux_output_hw_id(struct perf_session *session, + union perf_event *event) +{ + struct cs_etm_auxtrace *etm; + struct perf_sample sample; + struct int_node *inode; + struct evsel *evsel; + u64 *cpu_data; + u64 hw_id; + int cpu, version, err; + u8 trace_chan_id, curr_chan_id; + + /* extract and parse the HW ID */ + hw_id = event->aux_output_hw_id.hw_id; + version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); + trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + + /* check that we can handle this version */ + if (version > CS_AUX_HW_ID_CURR_VERSION) + return -EINVAL; + + /* get access to the etm metadata */ + etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); + if (!etm || !etm->metadata) + return -EINVAL; + + /* parse the sample to get the CPU */ + evsel = evlist__event2evsel(session->evlist, event); + if (!evsel) + return -EINVAL; + err = evsel__parse_sample(evsel, event, &sample); + if (err) + return err; + cpu = sample.cpu; + if (cpu == -1) { + /* no CPU in the sample - possibly recorded with an old version of perf */ + pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record."); + return -EINVAL; + } + + /* See if the ID is mapped to a CPU, and it matches the current CPU */ + inode = intlist__find(traceid_list, trace_chan_id); + if (inode) { + cpu_data = inode->priv; + if ((int)cpu_data[CS_ETM_CPU] != cpu) { + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); + return -EINVAL; + } + + /* check that the mapped ID matches */ + err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data); + if (err) + return err; + if (curr_chan_id != trace_chan_id) { + pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); + return -EINVAL; + } + + /* mapped and matched - return OK */ + return 0; + } + + /* not one we've seen before - lets map it */ + cpu_data = etm->metadata[cpu]; + err = cs_etm__map_trace_id(trace_chan_id, cpu_data); + if (err) + return err; + + /* + * if we are picking up the association from the packet, need to plug + * the correct trace ID into the metadata for setting up decoders later. + */ + err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); + return err; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { @@ -464,12 +625,12 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, u64 **metadata = etm->metadata; t_params[idx].protocol = CS_ETM_PROTO_ETE; - t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; - t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; - t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; - t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; - t_params[idx].ete.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; - t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; + t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0]; + t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1]; + t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2]; + t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8]; + t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR]; + t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR]; t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH]; } @@ -2510,141 +2671,6 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) return timeless_decoding; } -static const char * const cs_etm_global_header_fmts[] = { - [CS_HEADER_VERSION] = " Header version %llx\n", - [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", - [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", -}; - -static const char * const cs_etm_priv_fmts[] = { - [CS_ETM_MAGIC] = " Magic number %llx\n", - [CS_ETM_CPU] = " CPU %lld\n", - [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", - [CS_ETM_ETMCR] = " ETMCR %llx\n", - [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", - [CS_ETM_ETMCCER] = " ETMCCER %llx\n", - [CS_ETM_ETMIDR] = " ETMIDR %llx\n", -}; - -static const char * const cs_etmv4_priv_fmts[] = { - [CS_ETM_MAGIC] = " Magic number %llx\n", - [CS_ETM_CPU] = " CPU %lld\n", - [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", - [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", - [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", - [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", - [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", - [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", - [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", - [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", - [CS_ETE_TRCDEVARCH] = " TRCDEVARCH %llx\n" -}; - -static const char * const param_unk_fmt = - " Unknown parameter [%d] %llx\n"; -static const char * const magic_unk_fmt = - " Magic number Unknown %llx\n"; - -static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) -{ - int i = *offset, j, nr_params = 0, fmt_offset; - __u64 magic; - - /* check magic value */ - magic = val[i + CS_ETM_MAGIC]; - if ((magic != __perf_cs_etmv3_magic) && - (magic != __perf_cs_etmv4_magic)) { - /* failure - note bad magic value */ - fprintf(stdout, magic_unk_fmt, magic); - return -EINVAL; - } - - /* print common header block */ - fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); - fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); - - if (magic == __perf_cs_etmv3_magic) { - nr_params = CS_ETM_NR_TRC_PARAMS_V0; - fmt_offset = CS_ETM_ETMCR; - /* after common block, offset format index past NR_PARAMS */ - for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) - fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - } else if (magic == __perf_cs_etmv4_magic) { - nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; - fmt_offset = CS_ETMV4_TRCCONFIGR; - /* after common block, offset format index past NR_PARAMS */ - for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) - fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - } - *offset = i; - return 0; -} - -static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) -{ - int i = *offset, j, total_params = 0; - __u64 magic; - - magic = val[i + CS_ETM_MAGIC]; - /* total params to print is NR_PARAMS + common block size for v1 */ - total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; - - if (magic == __perf_cs_etmv3_magic) { - for (j = 0; j < total_params; j++, i++) { - /* if newer record - could be excess params */ - if (j >= CS_ETM_PRIV_MAX) - fprintf(stdout, param_unk_fmt, j, val[i]); - else - fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - } - } else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) { - /* - * ETE and ETMv4 can be printed in the same block because the number of parameters - * is saved and they share the list of parameter names. ETE is also only supported - * in V1 files. - */ - for (j = 0; j < total_params; j++, i++) { - /* if newer record - could be excess params */ - if (j >= CS_ETE_PRIV_MAX) - fprintf(stdout, param_unk_fmt, j, val[i]); - else - fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - } - } else { - /* failure - note bad magic value and error out */ - fprintf(stdout, magic_unk_fmt, magic); - return -EINVAL; - } - *offset = i; - return 0; -} - -static void cs_etm__print_auxtrace_info(__u64 *val, int num) -{ - int i, cpu = 0, version, err; - - /* bail out early on bad header version */ - version = val[0]; - if (version > CS_HEADER_CURRENT_VERSION) { - /* failure.. return */ - fprintf(stdout, " Unknown Header Version = %x, ", version); - fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); - return; - } - - for (i = 0; i < CS_HEADER_VERSION_MAX; i++) - fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); - - for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { - if (version == 0) - err = cs_etm__print_cpu_metadata_v0(val, &i); - else if (version == 1) - err = cs_etm__print_cpu_metadata_v1(val, &i); - if (err) - return; - } -} - /* * Read a single cpu parameter block from the auxtrace_info priv block. * @@ -2750,11 +2776,16 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o } /* - * In per-thread mode, CPU is set to -1, but TID will be set instead. See - * auxtrace_mmap_params__set_idx(). Return 'not found' if neither CPU nor TID match. + * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See + * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a + * CPU as we set this always for the AUX_OUTPUT_HW_ID event. + * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1. + * Return 'not found' if mismatch. */ - if ((auxtrace_event->cpu == (__u32) -1 && auxtrace_event->tid != sample->tid) || - auxtrace_event->cpu != sample->cpu) + if (auxtrace_event->cpu == (__u32) -1) { + if (auxtrace_event->tid != sample->tid) + return 1; + } else if (auxtrace_event->cpu != sample->cpu) return 1; if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) { @@ -2803,6 +2834,17 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o return 1; } +static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event, + u64 offset __maybe_unused, void *data __maybe_unused) +{ + /* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */ + if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) { + (*(int *)data)++; /* increment found count */ + return cs_etm__process_aux_output_hw_id(session, event); + } + return 0; +} + static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event, u64 offset __maybe_unused, void *data __maybe_unused) { @@ -2881,57 +2923,79 @@ static int cs_etm__queue_aux_records(struct perf_session *session) return 0; } -int cs_etm__process_auxtrace_info(union perf_event *event, - struct perf_session *session) +/* map trace ids to correct metadata block, from information in metadata */ +static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata) +{ + u64 cs_etm_magic; + u8 trace_chan_id; + int i, err; + + for (i = 0; i < num_cpu; i++) { + cs_etm_magic = metadata[i][CS_ETM_MAGIC]; + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; + trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]); + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK; + trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]); + break; + default: + /* unknown magic number */ + return -EINVAL; + } + err = cs_etm__map_trace_id(trace_chan_id, metadata[i]); + if (err) + return err; + } + return 0; +} + +/* + * If we found AUX_HW_ID packets, then set any metadata marked as unused to the + * unused value to reduce the number of unneeded decoders created. + */ +static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) +{ + u64 cs_etm_magic; + int i; + + for (i = 0; i < num_cpu; i++) { + cs_etm_magic = metadata[i][CS_ETM_MAGIC]; + switch (cs_etm_magic) { + case __perf_cs_etmv3_magic: + if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) + metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; + break; + case __perf_cs_etmv4_magic: + case __perf_cs_ete_magic: + if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) + metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; + break; + default: + /* unknown magic number */ + return -EINVAL; + } + } + return 0; +} + +int cs_etm__process_auxtrace_info_full(union perf_event *event, + struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; struct cs_etm_auxtrace *etm = NULL; - struct int_node *inode; - unsigned int pmu_type; int event_header_size = sizeof(struct perf_event_header); - int info_header_size; int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu, trcidr_idx; + int num_cpu; int err = 0; + int aux_hw_id_found; int i, j; - u64 *ptr, *hdr = NULL; + u64 *ptr = NULL; u64 **metadata = NULL; - u64 hdr_version; - - /* - * sizeof(auxtrace_info_event::type) + - * sizeof(auxtrace_info_event::reserved) == 8 - */ - info_header_size = 8; - - if (total_size < (event_header_size + info_header_size)) - return -EINVAL; - - priv_size = total_size - event_header_size - info_header_size; - - /* First the global part */ - ptr = (u64 *) auxtrace_info->priv; - - /* Look for version of the header */ - hdr_version = ptr[0]; - if (hdr_version > CS_HEADER_CURRENT_VERSION) { - /* print routine will print an error on bad version */ - if (dump_trace) - cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); - return -EINVAL; - } - - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); - if (!hdr) - return -ENOMEM; - - /* Extract header information - see cs-etm.h for format */ - for (i = 0; i < CS_HEADER_VERSION_MAX; i++) - hdr[i] = ptr[i]; - num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; - pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & - 0xffffffff); /* * Create an RB tree for traceID-metadata tuple. Since the conversion @@ -2939,17 +3003,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event, * in anything other than a sequential array is worth doing. */ traceid_list = intlist__new(NULL); - if (!traceid_list) { - err = -ENOMEM; - goto err_free_hdr; - } + if (!traceid_list) + return -ENOMEM; + /* First the global part */ + ptr = (u64 *) auxtrace_info->priv; + num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff; metadata = zalloc(sizeof(*metadata) * num_cpu); if (!metadata) { err = -ENOMEM; goto err_free_traceid_list; } + /* Start parsing after the common part of the header */ + i = CS_HEADER_VERSION_MAX; + /* * The metadata is stored in the auxtrace_info section and encodes * the configuration of the ARM embedded trace macrocell which is @@ -2962,23 +3030,13 @@ int cs_etm__process_auxtrace_info(union perf_event *event, cs_etm__create_meta_blk(ptr, &i, CS_ETM_PRIV_MAX, CS_ETM_NR_TRC_PARAMS_V0); - - /* The traceID is our handle */ - trcidr_idx = CS_ETM_ETMTRACEIDR; - } else if (ptr[i] == __perf_cs_etmv4_magic) { metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETMV4_PRIV_MAX, CS_ETMV4_NR_TRC_PARAMS_V0); - - /* The traceID is our handle */ - trcidr_idx = CS_ETMV4_TRCTRACEIDR; } else if (ptr[i] == __perf_cs_ete_magic) { metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1); - - /* ETE shares first part of metadata with ETMv4 */ - trcidr_idx = CS_ETMV4_TRCTRACEIDR; } else { ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n", ptr[i]); @@ -2990,26 +3048,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, err = -ENOMEM; goto err_free_metadata; } - - /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); - - /* Something went wrong, no need to continue */ - if (!inode) { - err = -ENOMEM; - goto err_free_metadata; - } - - /* - * The node for that CPU should not be taken. - * Back out if that's the case. - */ - if (inode->priv) { - err = -EINVAL; - goto err_free_metadata; - } - /* All good, associate the traceID with the metadata pointer */ - inode->priv = metadata[j]; } /* @@ -3019,6 +3057,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, * The following tests if the correct number of double words was * present in the auxtrace info section. */ + priv_size = total_size - event_header_size - INFO_HEADER_SIZE; if (i * 8 != priv_size) { err = -EINVAL; goto err_free_metadata; @@ -3047,8 +3086,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->machine = &session->machines.host; etm->num_cpu = num_cpu; - etm->pmu_type = pmu_type; - etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); + etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff); + etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0); etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); @@ -3082,27 +3121,55 @@ int cs_etm__process_auxtrace_info(union perf_event *event, goto err_delete_thread; } - if (dump_trace) { - cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - } - err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread; + /* + * Map Trace ID values to CPU metadata. + * + * Trace metadata will always contain Trace ID values from the legacy algorithm. If the + * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata + * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. + * + * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use + * the same IDs as the old algorithm as far as is possible, unless there are clashes + * in which case a different value will be used. This means an older perf may still + * be able to record and read files generate on a newer system. + * + * For a perf able to interpret AUX_HW_ID packets we first check for the presence of + * those packets. If they are there then the values will be mapped and plugged into + * the metadata. We then set any remaining metadata values with the used flag to a + * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. + * + * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel + * then we map Trace ID values to CPU directly from the metadata - clearing any unused + * flags if present. + */ + + /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ + aux_hw_id_found = 0; + err = perf_session__peek_events(session, session->header.data_offset, + session->header.data_size, + cs_etm__process_aux_hw_id_cb, &aux_hw_id_found); + if (err) + goto err_delete_thread; + + /* if HW ID found then clear any unused metadata ID values */ + if (aux_hw_id_found) + err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); + /* otherwise, this is a file with metadata values only, map from metadata */ + else + err = cs_etm__map_trace_ids_metadata(num_cpu, metadata); + + if (err) + goto err_delete_thread; + err = cs_etm__queue_aux_records(session); if (err) goto err_delete_thread; etm->data_queued = etm->queues.populated; - /* - * Print warning in pipe mode, see cs_etm__process_auxtrace_event() and - * cs_etm__queue_aux_fragment() for details relating to limitations. - */ - if (!etm->data_queued) - pr_warning("CS ETM warning: Coresight decode and TRBE support requires random file access.\n" - "Continuing with best effort decoding in piped mode.\n\n"); - return 0; err_delete_thread: @@ -3119,14 +3186,5 @@ int cs_etm__process_auxtrace_info(union perf_event *event, zfree(&metadata); err_free_traceid_list: intlist__delete(traceid_list); -err_free_hdr: - zfree(&hdr); - /* - * At this point, as a minimum we have valid header. Dump the rest of - * the info section - the print routines will error out on structural - * issues. - */ - if (dump_trace) - cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index f54834f6f9e35e95c379b3a1b357d8f609359480..16d53e1db50cc74576a80539a0a9a878367724cd 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -27,13 +27,17 @@ enum { /* * Update the version for new format. * - * New version 1 format adds a param count to the per cpu metadata. + * Version 1: format adds a param count to the per cpu metadata. * This allows easy adding of new metadata parameters. * Requires that new params always added after current ones. * Also allows client reader to handle file versions that are different by * checking the number of params in the file vs the number expected. + * + * Version 2: Drivers will use PERF_RECORD_AUX_OUTPUT_HW_ID to output + * CoreSight Trace ID. ...TRACEIDR metadata will be set to legacy values + * but with addition flags. */ -#define CS_HEADER_CURRENT_VERSION 1 +#define CS_HEADER_CURRENT_VERSION 2 /* Beginning of header common to both ETMv3 and V4 */ enum { @@ -80,10 +84,25 @@ enum { * added in header V1 */ enum { - CS_ETE_TRCDEVARCH = CS_ETMV4_PRIV_MAX, + /* Dynamic, configurable parameters */ + CS_ETE_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, + CS_ETE_TRCTRACEIDR, + /* RO, taken from sysFS */ + CS_ETE_TRCIDR0, + CS_ETE_TRCIDR1, + CS_ETE_TRCIDR2, + CS_ETE_TRCIDR8, + CS_ETE_TRCAUTHSTATUS, + CS_ETE_TRCDEVARCH, CS_ETE_PRIV_MAX }; +/* + * Check for valid CoreSight trace ID. If an invalid value is present in the metadata, + * then IDs are present in the hardware ID packet in the data file. + */ +#define CS_IS_VALID_TRACE_ID(id) ((id > 0) && (id < 0x70)) + /* * ETMv3 exception encoding number: * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) @@ -200,9 +219,13 @@ struct cs_etm_packet_queue { #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) #define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64)) -#ifdef HAVE_CSTRACE_SUPPORT +#define INFO_HEADER_SIZE (sizeof(((struct perf_record_auxtrace_info *)0)->type) + \ + sizeof(((struct perf_record_auxtrace_info *)0)->reserved__)) + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); + +#ifdef HAVE_CSTRACE_SUPPORT int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, @@ -212,10 +235,12 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id); struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id); +int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused); #else static inline int -cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) { return -1; }