From 78b4ae46d7cf6c64e72a8e3965217722656b8659 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:57 -0800 Subject: [PATCH 01/45] perf script: Support data page size ANBZ: #9586 commit 6b9bae63de4fe24365ad0c2d23e77ae06f8c58e4 upstream. Display the data page size if it is available and asked by the user: Can be configured by the user, for example: perf script --fields comm,event,phys_addr,data_page_size dtlb mem-loads:uP: 3fec82ea8 4K dtlb mem-loads:uP: 3fec82e90 4K dtlb mem-loads:uP: 3e23700a4 4K dtlb mem-loads:uP: 3fec82f20 4K dtlb mem-loads:uP: 3e23700a4 4K dtlb mem-loads:uP: 3b4211bec 4K dtlb mem-loads:uP: 382205dc0 2M dtlb mem-loads:uP: 36fa082c0 2M dtlb mem-loads:uP: 377607340 2M dtlb mem-loads:uP: 330010180 2M dtlb mem-loads:uP: 33200fd80 2M dtlb mem-loads:uP: 31b012b80 2M Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/Documentation/perf-script.txt | 5 +++-- tools/perf/builtin-script.c | 17 +++++++++++++++-- tools/perf/util/event.h | 3 +++ tools/perf/util/session.c | 13 +++++++++++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4f712fb8f175..44d37210fc8f 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,8 +116,9 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc. + srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, + brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, + metric, misc, srccode, ipc, data_page_size. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8d55de9f1d3c..94fa50ad1131 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -30,6 +30,7 @@ #include "util/thread-stack.h" #include "util/time-utils.h" #include "util/path.h" +#include "util/event.h" #include "ui/ui.h" #include "print_binary.h" #include "archinsn.h" @@ -115,6 +116,7 @@ enum perf_output_field { PERF_OUTPUT_SRCCODE = 1ULL << 30, PERF_OUTPUT_IPC = 1ULL << 31, PERF_OUTPUT_TOD = 1ULL << 32, + PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, }; struct perf_script { @@ -179,6 +181,7 @@ struct output_option { {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, {.str = "ipc", .field = PERF_OUTPUT_IPC}, {.str = "tod", .field = PERF_OUTPUT_TOD}, + {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, }; enum { @@ -251,7 +254,8 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | - PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR, + PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | + PERF_OUTPUT_DATA_PAGE_SIZE, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -499,6 +503,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) return -EINVAL; + if (PRINT_FIELD(DATA_PAGE_SIZE) && + evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE)) + return -EINVAL; + return 0; } @@ -1921,6 +1929,7 @@ static void process_event(struct perf_script *script, unsigned int type = output_type(attr->type); struct evsel_script *es = evsel->priv; FILE *fp = es->fp; + char str[PAGE_SIZE_NAME_LEN]; if (output[type].fields == 0) return; @@ -2009,6 +2018,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); + if (PRINT_FIELD(DATA_PAGE_SIZE)) + fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str)); + perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); @@ -3513,7 +3525,8 @@ int cmd_script(int argc, const char **argv) "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod", + "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," + "data_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b16d9b5a7883..c03c95916129 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -417,4 +417,7 @@ extern int sysctl_perf_event_max_stack; extern int sysctl_perf_event_max_contexts_per_stack; extern unsigned int proc_map_timeout; +#define PAGE_SIZE_NAME_LEN 32 +char *get_page_size_name(u64 size, char *str); + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 86afc1088355..f81124fa8b42 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -32,6 +32,7 @@ #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" +#include "units.h" #include #ifdef HAVE_ZSTD_SUPPORT @@ -1287,10 +1288,19 @@ static void dump_event(struct evlist *evlist, union perf_event *event, event->header.size, perf_event__name(event->header.type)); } +char *get_page_size_name(u64 size, char *str) +{ + if (!size || !unit_number__scnprintf(str, PAGE_SIZE_NAME_LEN, size)) + snprintf(str, PAGE_SIZE_NAME_LEN, "%s", "N/A"); + + return str; +} + static void dump_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *sample) { u64 sample_type; + char str[PAGE_SIZE_NAME_LEN]; if (!dump_trace) return; @@ -1329,6 +1339,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_PHYS_ADDR) printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr); + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str)); + if (sample_type & PERF_SAMPLE_TRANSACTION) printf("... transaction: %" PRIx64 "\n", sample->transaction); -- Gitee From fba6d39dfd1c3cf5cd5c7c38f0493f7ae88bfae4 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:58 -0800 Subject: [PATCH 02/45] perf sort: Add sort option for data page size ANBZ: #9586 commit a50d03e3b8b68df13e47dcbde6c5d39b4237c479 upstream. Add a new sort option "data_page_size" for --mem-mode sort. With this option applied, perf can sort and report by sample's data page size. Here is an example: perf report --stdio --mem-mode --sort=comm,symbol,phys_daddr,data_page_size # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 9K of event 'mem-loads:uP' # Total weight : 9028 # Sort order : comm,symbol,phys_daddr,data_page_size # # Overhead Command Symbol Data Physical # Address # Data Page Size # ........ ....... ............................ # ...................... ...................... # 11.19% dtlb [.] touch_buffer [.] 0x00000003fec82ea8 4K 8.61% dtlb [.] GetTickCount [.] 0x00000003c4f2c8a8 4K 4.52% dtlb [.] GetTickCount [.] 0x00000003fec82f58 4K 4.33% dtlb [.] __gettimeofday [.] 0x00000003fec82f48 4K 4.32% dtlb [.] GetTickCount [.] 0x00000003fec82f78 4K 4.28% dtlb [.] GetTickCount [.] 0x00000003fec82f50 4K 4.23% dtlb [.] GetTickCount [.] 0x00000003fec82f70 4K 4.11% dtlb [.] GetTickCount [.] 0x00000003fec82f68 4K 4.00% dtlb [.] Calibrate [.] 0x00000003fec82f98 4K 3.91% dtlb [.] Calibrate [.] 0x00000003fec82f90 4K 3.43% dtlb [.] touch_buffer [.] 0x00000003fec82e98 4K 3.42% dtlb [.] touch_buffer [.] 0x00000003fec82e90 4K 0.09% dtlb [.] DoDependentLoads [.] 0x000000036ea084c0 2M 0.08% dtlb [.] DoDependentLoads [.] 0x000000032b010b80 2M [kun: conflicts solved. ] Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.c | 3 +++ tools/perf/util/hist.h | 1 + tools/perf/util/machine.c | 7 ++++-- tools/perf/util/map_symbol.h | 1 + tools/perf/util/sort.c | 30 ++++++++++++++++++++++++ tools/perf/util/sort.h | 1 + 7 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index cff34d7c4ca0..df2486929128 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -154,6 +154,7 @@ OPTIONS - dcacheline: the cacheline the data address is on at the time of the sample - phys_daddr: physical address of data being executed on at the time of sample - blocked: reason of blocked load access for the data at the time of the sample + - data_page_size: the data page size of data being executed on at the time of sample And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3d11fa622126..839eb84c5b9b 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -190,6 +190,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR, unresolved_col_width + 4 + 2); + hists__new_col_len(hists, HISTC_MEM_DATA_PAGE_SIZE, + unresolved_col_width + 4 + 2); + } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 65016bcd6c95..94f88d0ae130 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -56,6 +56,7 @@ enum hist_column { HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_DSO, HISTC_MEM_PHYS_DADDR, + HISTC_MEM_DATA_PAGE_SIZE, HISTC_MEM_LOCKED, HISTC_MEM_TLB, HISTC_MEM_LVL, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 61b27ee56a9f..be1c9bdd4209 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2030,11 +2030,12 @@ static void ip__resolve_ams(struct thread *thread, ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = 0; + ams->data_page_size = 0; } static void ip__resolve_data(struct thread *thread, u8 m, struct addr_map_symbol *ams, - u64 addr, u64 phys_addr) + u64 addr, u64 phys_addr, u64 daddr_page_size) { struct addr_location al; @@ -2048,6 +2049,7 @@ static void ip__resolve_data(struct thread *thread, ams->ms.sym = al.sym; ams->ms.map = al.map; ams->phys_addr = phys_addr; + ams->data_page_size = daddr_page_size; } struct mem_info *sample__resolve_mem(struct perf_sample *sample, @@ -2060,7 +2062,8 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); ip__resolve_data(al->thread, al->cpumode, &mi->daddr, - sample->addr, sample->phys_addr); + sample->addr, sample->phys_addr, + sample->data_page_size); mi->data_src.val = sample->data_src; return mi; diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index 5b8ca93798e9..7d22ade082c8 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -19,5 +19,6 @@ struct addr_map_symbol { u64 addr; u64 al_addr; u64 phys_addr; + u64 data_page_size; }; #endif // __PERF_MAP_SYMBOL diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6eca56ff5684..08cc4a6dddf4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1662,6 +1662,35 @@ struct sort_entry sort_mem_phys_daddr = { .se_width_idx = HISTC_MEM_PHYS_DADDR, }; +static int64_t +sort__data_page_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = left->mem_info->daddr.data_page_size; + if (right->mem_info) + r = right->mem_info->daddr.data_page_size; + + return (int64_t)(r - l); +} + +static int hist_entry__data_page_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + char str[PAGE_SIZE_NAME_LEN]; + + return repsep_snprintf(bf, size, "%-*s", width, + get_page_size_name(he->mem_info->daddr.data_page_size, str)); +} + +struct sort_entry sort_mem_data_page_size = { + .se_header = "Data Page Size", + .se_cmp = sort__data_page_size_cmp, + .se_snprintf = hist_entry__data_page_size_snprintf, + .se_width_idx = HISTC_MEM_DATA_PAGE_SIZE, +}; + static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1945,6 +1974,7 @@ static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr), DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked), + DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 4a9cc00a998b..02bd7fd395e0 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -261,6 +261,7 @@ enum sort_type { SORT_MEM_IADDR_SYMBOL, SORT_MEM_PHYS_DADDR, SORT_MEM_BLOCKED, + SORT_MEM_DATA_PAGE_SIZE, }; /* -- Gitee From e3ee6c1f3faca50bf29963ae77bbf40b49186c08 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Dec 2020 10:57:59 -0800 Subject: [PATCH 03/45] perf mem: Factor out a function to generate sort order ANBZ: #9586 commit 2e7f545096f954a9726c9415763dd0bfbcac47e0 upstream. Now, "--phys-data" is the only option which impacts the sort order. A simple "if else" is enough to handle the option. But there will be more options added, e.g. "--data-page-size", which also impact the sort order. The code will become too complex to be maintained. Divide the sort order string into several small pieces. The first piece is always the default sort string for LOAD/STORE. Appends the specific sort string if related option is applied. No functional change. [kun: add "blocked" argment to mem. ] Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: http://lore.kernel.org/lkml/20201216185805.9981-4-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/builtin-mem.c | 41 ++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 274091e0cd29..f1c53a25fa32 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -298,11 +298,35 @@ static int report_raw_events(struct perf_mem *mem) perf_session__delete(session); return ret; } +static char *get_sort_order(struct perf_mem *mem) +{ + bool has_extra_options = mem->phys_addr ? true : false; + char sort[128]; + + /* + * there is no weight (cost) associated with stores, so don't print + * the column + */ + if (!(mem->operation & MEM_OPERATION_LOAD)) { + strcpy(sort, "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"); + } else if (has_extra_options) { + strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr," + "dso_daddr,snoop,tlb,locked"); + } else + return NULL; + + if (mem->phys_addr) + strcat(sort, "blocked,phys_daddr"); + + return strdup(sort); +} static int report_events(int argc, const char **argv, struct perf_mem *mem) { const char **rep_argv; int ret, i = 0, j, rep_argc; + char *new_sort_order; if (mem->dump_raw) return report_raw_events(mem); @@ -316,20 +340,9 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) rep_argv[i++] = "--mem-mode"; rep_argv[i++] = "-n"; /* display number of samples */ - /* - * there is no weight (cost) associated with stores, so don't print - * the column - */ - if (!(mem->operation & MEM_OPERATION_LOAD)) { - if (mem->phys_addr) - rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked,phys_daddr"; - else - rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"; - } else if (mem->phys_addr) - rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr," - "dso_daddr,snoop,tlb,locked,blocked,phys_daddr"; + new_sort_order = get_sort_order(mem); + if (new_sort_order) + rep_argv[i++] = new_sort_order; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; -- Gitee From ef639ac3f70d8d9418c118817a8e91aa2fe0b17d Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jan 2021 11:57:47 -0800 Subject: [PATCH 04/45] perf mem: Clean up output format ANBZ: #9586 commit 407ee5c920dfead7b3fcff0644843c2f84d24245 upstream. Now, "--phys-data" is the only option which impacts the output format. A simple "if else" is enough to handle the option. But there will be more options added, e.g. "--data-page-size", which also impact the output format. The code will become too complex to be maintained. Divide the big printf into several small pieces. Output the specific piece only if the related option is applied. No functional change. Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-2-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/builtin-mem.c | 93 ++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 55 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index f1c53a25fa32..61b3de26ed5b 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -172,7 +172,7 @@ dump_raw_samples(struct perf_tool *tool, { struct perf_mem *mem = container_of(tool, struct perf_mem, tool); struct addr_location al; - const char *fmt; + const char *fmt, *field_sep; if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", @@ -186,60 +186,41 @@ dump_raw_samples(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - if (mem->phys_addr) { - if (symbol_conf.field_sep) { - fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64 - "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; - } else { - fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 - "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64 - "%s%s:%s\n"; - symbol_conf.field_sep = " "; - } - - printf(fmt, - sample->pid, - symbol_conf.field_sep, - sample->tid, - symbol_conf.field_sep, - sample->ip, - symbol_conf.field_sep, - sample->addr, - symbol_conf.field_sep, - sample->phys_addr, - symbol_conf.field_sep, - sample->weight, - symbol_conf.field_sep, - sample->data_src, - symbol_conf.field_sep, - al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", - al.sym ? al.sym->name : "???"); + field_sep = symbol_conf.field_sep; + if (field_sep) { + fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s"; } else { - if (symbol_conf.field_sep) { - fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 - "%s0x%"PRIx64"%s%s:%s\n"; - } else { - fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 - "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; - symbol_conf.field_sep = " "; - } + fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64"%s"; + symbol_conf.field_sep = " "; + } + printf(fmt, + sample->pid, + symbol_conf.field_sep, + sample->tid, + symbol_conf.field_sep, + sample->ip, + symbol_conf.field_sep, + sample->addr, + symbol_conf.field_sep); - printf(fmt, - sample->pid, - symbol_conf.field_sep, - sample->tid, - symbol_conf.field_sep, - sample->ip, - symbol_conf.field_sep, - sample->addr, - symbol_conf.field_sep, - sample->weight, - symbol_conf.field_sep, - sample->data_src, - symbol_conf.field_sep, - al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", - al.sym ? al.sym->name : "???"); + if (mem->phys_addr) { + printf("0x%016"PRIx64"%s", + sample->phys_addr, + symbol_conf.field_sep); } + + if (field_sep) + fmt = "%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; + else + fmt = "%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; + + printf(fmt, + sample->weight, + symbol_conf.field_sep, + sample->data_src, + symbol_conf.field_sep, + al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", + al.sym ? al.sym->name : "???"); out_put: addr_location__put(&al); return 0; @@ -287,10 +268,12 @@ static int report_raw_events(struct perf_mem *mem) if (ret < 0) goto out_delete; + printf("# PID, TID, IP, ADDR, "); + if (mem->phys_addr) - printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - else - printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); + printf("PHYS ADDR, "); + + printf("LOCAL WEIGHT, DSRC, SYMBOL\n"); ret = perf_session__process_events(session); -- Gitee From 29cbd2ceb2b268e7f6a7234a0b427af66e06f8b8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 5 Jan 2021 11:57:48 -0800 Subject: [PATCH 05/45] perf mem: Support data page size ANBZ: #9586 commit 06280e3b15fdaa28f02d995c0a74ec46f75db90a upstream. Add option --data-page-size in "perf mem" to record/report data page size. Here are some examples: # perf mem --phys-data --data-page-size report -D # PID, TID, IP, ADDR, PHYS ADDR, DATA PAGE SIZE, LOCAL WEIGHT, DSRC, SYMBOL 20134 20134 0xffffffffb5bd2fd0 0x016ffff9a274e96a308 0x000000044e96a308 4K 1168 0x5080144 /lib/modules/4.18.0-rc7+/build/vmlinux:perf_ctx_unlock 20134 20134 0xffffffffb63f645c 0xffffffffb752b814 0xcfb52b814 2M 225 0x26a100142 /lib/modules/4.18.0-rc7+/build/vmlinux:_raw_spin_lock 20134 20134 0xffffffffb660300c 0xfffffe00016b8bb0 0x0 4K 0 0x5080144 /lib/modules/4.18.0-rc7+/build/vmlinux:__x86_indirect_thunk_rax # # perf mem --phys-data --data-page-size report --stdio # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 5K of event 'cpu/mem-loads,ldlat=30/P' # Total weight : 281234 # Sort order : # mem,sym,dso,symbol_daddr,dso_daddr,tlb,locked,phys_daddr,data_page_size # # Overhead Samples Memory access Symbol Shared Object Data Symbol Data Object TLB access Locked Data Physical Address Data Page Size # ........ ....... ............. ............................ ................ ...................... ........... ............ ...... ...................... .............. 28.54% 1826 L1 or L1 hit [k] __x86_indirect_thunk_rax [kernel.vmlinux] [k] 0xffffb0df31b0ff28 [unknown] L1 or L2 hit No [k] 0x0000000000000000 4K 6.02% 256 L1 or L1 hit [.] touch_buffer dtlb [.] 0x00007ffd50109da8 [stack] L1 or L2 hit No [.] 0x000000042454ada8 4K 3.23% 5 L1 or L1 hit [k] clear_huge_page [kernel.vmlinux] [k] 0xffff9a2753b8ce60 [unknown] L1 or L2 hit No [k] 0x0000000453b8ce60 2M 2.98% 4 L1 or L1 hit [k] clear_page_erms [kernel.vmlinux] [k] 0xffffb0df31b0fd00 [unknown] L1 or L2 hit No [k] 0x0000000000000000 4K Signed-off-by: Kan Liang Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Stephane Eranian Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-3-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/Documentation/perf-mem.txt | 3 +++ tools/perf/builtin-mem.c | 20 +++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 199ea0f0a6c0..66177511c5c4 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -63,6 +63,9 @@ OPTIONS --phys-data:: Record/Report sample physical addresses +--data-page-size:: + Record/Report sample data address page size + RECORD OPTIONS -------------- -e:: diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 61b3de26ed5b..adfb7d0d4f4d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -30,6 +30,7 @@ struct perf_mem { bool dump_raw; bool force; bool phys_addr; + bool data_page_size; int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -124,6 +125,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->phys_addr) rec_argv[i++] = "--phys-data"; + if (mem->data_page_size) + rec_argv[i++] = "--data-page-size"; + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { e = perf_mem_events__ptr(j); if (!e->record) @@ -173,6 +177,7 @@ dump_raw_samples(struct perf_tool *tool, struct perf_mem *mem = container_of(tool, struct perf_mem, tool); struct addr_location al; const char *fmt, *field_sep; + char str[PAGE_SIZE_NAME_LEN]; if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", @@ -209,6 +214,12 @@ dump_raw_samples(struct perf_tool *tool, symbol_conf.field_sep); } + if (mem->data_page_size) { + printf("%s%s", + get_page_size_name(sample->data_page_size, str), + symbol_conf.field_sep); + } + if (field_sep) fmt = "%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; else @@ -273,6 +284,9 @@ static int report_raw_events(struct perf_mem *mem) if (mem->phys_addr) printf("PHYS ADDR, "); + if (mem->data_page_size) + printf("DATA PAGE SIZE, "); + printf("LOCAL WEIGHT, DSRC, SYMBOL\n"); ret = perf_session__process_events(session); @@ -283,7 +297,7 @@ static int report_raw_events(struct perf_mem *mem) } static char *get_sort_order(struct perf_mem *mem) { - bool has_extra_options = mem->phys_addr ? true : false; + bool has_extra_options = (mem->phys_addr | mem->data_page_size) ? true : false; char sort[128]; /* @@ -302,6 +316,9 @@ static char *get_sort_order(struct perf_mem *mem) if (mem->phys_addr) strcat(sort, "blocked,phys_daddr"); + if (mem->data_page_size) + strcat(sort, ",data_page_size"); + return strdup(sort); } @@ -447,6 +464,7 @@ int cmd_mem(int argc, const char **argv) " between columns '.' is reserved."), OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"), + OPT_BOOLEAN(0, "data-page-size", &mem.data_page_size, "Record/Report sample data address page size"), OPT_END() }; const char *const mem_subcommands[] = { "record", "report", NULL }; -- Gitee From f493c61eb4a5c0fd09778327b46d1f04a043170d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 5 Jan 2021 11:57:50 -0800 Subject: [PATCH 06/45] perf script: Add support for PERF_SAMPLE_CODE_PAGE_SIZE ANBZ: #9586 commit c513de8a703183fc228280b31a4091363037950f upstream. Display sampled code page sizes when PERF_SAMPLE_CODE_PAGE_SIZE was set. For example: # perf script --fields comm,event,ip,code_page_size dtlb mem-loads:uP: 445777 4K dtlb mem-loads:uP: 40f724 4K dtlb mem-loads:uP: 474926 4K dtlb mem-loads:uP: 401075 4K dtlb mem-loads:uP: 401095 4K dtlb mem-loads:uP: 401095 4K dtlb mem-loads:uP: 4010cc 4K dtlb mem-loads:uP: 440b6f 4K # Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-5-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/Documentation/perf-script.txt | 2 +- tools/perf/builtin-script.c | 13 +++++++++++-- tools/perf/util/session.c | 3 +++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 44d37210fc8f..60dae302db27 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -118,7 +118,7 @@ OPTIONS comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, - metric, misc, srccode, ipc, data_page_size. + metric, misc, srccode, ipc, data_page_size, code_page_size. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 94fa50ad1131..6d761e4467a6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -117,6 +117,7 @@ enum perf_output_field { PERF_OUTPUT_IPC = 1ULL << 31, PERF_OUTPUT_TOD = 1ULL << 32, PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, + PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, }; struct perf_script { @@ -182,6 +183,7 @@ struct output_option { {.str = "ipc", .field = PERF_OUTPUT_IPC}, {.str = "tod", .field = PERF_OUTPUT_TOD}, {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, + {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, }; enum { @@ -255,7 +257,7 @@ static struct { PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | - PERF_OUTPUT_DATA_PAGE_SIZE, + PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -507,6 +509,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_DATA_PAGE_SIZE, "DATA_PAGE_SIZE", PERF_OUTPUT_DATA_PAGE_SIZE)) return -EINVAL; + if (PRINT_FIELD(CODE_PAGE_SIZE) && + evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE)) + return -EINVAL; + return 0; } @@ -2021,6 +2027,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(DATA_PAGE_SIZE)) fprintf(fp, " %s", get_page_size_name(sample->data_page_size, str)); + if (PRINT_FIELD(CODE_PAGE_SIZE)) + fprintf(fp, " %s", get_page_size_name(sample->code_page_size, str)); + perf_sample__fprintf_ipc(sample, attr, fp); fprintf(fp, "\n"); @@ -3526,7 +3535,7 @@ int cmd_script(int argc, const char **argv) "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size", + "data_page_size,code_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f81124fa8b42..254aef88d6ea 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1342,6 +1342,9 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) printf(" .. data page size: %s\n", get_page_size_name(sample->data_page_size, str)); + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + printf(" .. code page size: %s\n", get_page_size_name(sample->code_page_size, str)); + if (sample_type & PERF_SAMPLE_TRANSACTION) printf("... transaction: %" PRIx64 "\n", sample->transaction); -- Gitee From e62a619bc6ab69af7e88487a11e86bebeb6e672f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 5 Jan 2021 11:57:51 -0800 Subject: [PATCH 07/45] perf report: Add support for PERF_SAMPLE_CODE_PAGE_SIZE ANBZ: #9586 commit 9fd74f209c69c9157584af9cdc500af9bbc06b82 upstream. Add a new sort dimension "code_page_size" for common sort. With this option applied, perf can sort and report by sample's code page size. For example: # perf report --stdio --sort=comm,symbol,code_page_size # To display the perf.data header info, please use # --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 3K of event 'mem-loads:uP' # Event count (approx.): 1470769 # # Overhead Command Symbol Code Page Size IPC [IPC Coverage] # ........ ....... ............................ .............. .................... # 69.56% dtlb [.] GetTickCount 4K - - 17.93% dtlb [.] Calibrate 4K - - 11.40% dtlb [.] __gettimeofday 4K - - # [kun: conflicts solved. ] Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-6-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/Documentation/perf-report.txt | 1 + tools/perf/util/hist.c | 2 ++ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 26 ++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 5 files changed, 32 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index df2486929128..69ac993427ae 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -111,6 +111,7 @@ OPTIONS - ins_lat: Instruction latency in core cycles. This is the global instruction latency - local_ins_lat: Local instruction latency version + - code_page_size: the code page size of sampled code address (ip) By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 839eb84c5b9b..c39c1ff38b34 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -217,6 +217,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_TIME, 16); else hists__new_col_len(hists, HISTC_TIME, 12); + hists__new_col_len(hists, HISTC_CODE_PAGE_SIZE, 6); if (h->srcline) { len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header)); @@ -726,6 +727,7 @@ __hists__add_entry(struct hists *hists, .cpumode = al->cpumode, .ip = al->addr, .level = al->level, + .code_page_size = sample->code_page_size, .stat = { .nr_events = 1, .period = sample->period, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 94f88d0ae130..05295cd87832 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -53,6 +53,7 @@ enum hist_column { HISTC_DSO_TO, HISTC_LOCAL_WEIGHT, HISTC_GLOBAL_WEIGHT, + HISTC_CODE_PAGE_SIZE, HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_DSO, HISTC_MEM_PHYS_DADDR, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 08cc4a6dddf4..5c8229cfcf6d 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1691,6 +1691,31 @@ struct sort_entry sort_mem_data_page_size = { .se_width_idx = HISTC_MEM_DATA_PAGE_SIZE, }; +static int64_t +sort__code_page_size_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = left->code_page_size; + uint64_t r = right->code_page_size; + + return (int64_t)(r - l); +} + +static int hist_entry__code_page_size_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + char str[PAGE_SIZE_NAME_LEN]; + + return repsep_snprintf(bf, size, "%-*s", width, + get_page_size_name(he->code_page_size, str)); +} + +struct sort_entry sort_code_page_size = { + .se_header = "Code Page Size", + .se_cmp = sort__code_page_size_cmp, + .se_snprintf = hist_entry__code_page_size_snprintf, + .se_width_idx = HISTC_CODE_PAGE_SIZE, +}; + static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1937,6 +1962,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_TIME, "time", sort_time), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), + DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 02bd7fd395e0..bedc44d6b3cb 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -107,6 +107,7 @@ struct hist_entry { u64 transaction; s32 socket; s32 cpu; + u64 code_page_size; u8 cpumode; u8 depth; @@ -232,6 +233,7 @@ enum sort_type { SORT_TIME, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, + SORT_CODE_PAGE_SIZE, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, -- Gitee From ea9558bbc00c2228647e7842d03cefab8dea3a40 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 16 Dec 2020 12:45:10 -0300 Subject: [PATCH 08/45] perf test: Make sample-parsing test aware of PERF_SAMPLE_{CODE,DATA}_PAGE_SIZE ANBZ: #9586 commit dc67d1920417140052976f3377fd216b87a50aad upstream. To fix this: $ perf test -v 27 27: Sample parsing : --- start --- test child forked, pid 586013 sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating test child finished with -1 ---- end ---- Sample parsing: FAILED! $ This patchset is still not completely merged, so when adding the PERF_SAMPLE_CODE_PAGE_SIZE to 'struct perf_sample' we need to add the bits added in this patch for 'perf_sample.data_page_size'. [kun: conflicts solved, and keep PERF_SAMPLE_WEIGHT_STRUCT in test__sample_parsing().] Fixes: 251cc77b8176de37 ("tools headers UAPI: Update tools's copy of linux/perf_event.h") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/tests/sample-parsing.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 7ad2256ef1cd..a80c57e041de 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -157,6 +157,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_CGROUP) COMP(cgroup); + if (type & PERF_SAMPLE_DATA_PAGE_SIZE) + COMP(data_page_size); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -237,6 +240,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) }, .phys_addr = 113, .cgroup = 114, + .data_page_size = 115, .ins_lat = 117, .aux_sample = { .size = sizeof(aux_data), -- Gitee From ba149a701379d6b3d52354bbfb92d72abfee8964 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 5 Jan 2021 11:57:52 -0800 Subject: [PATCH 09/45] perf test: Add test case for PERF_SAMPLE_CODE_PAGE_SIZE ANBZ: #9586 commit d8eda898057e6fab8b2a9137485c574c91b2554f upstream. Extend sample-parsing test cases to support new sample type PERF_SAMPLE_CODE_PAGE_SIZE. [kun: conflicts solved.] Signed-off-by: Stephane Eranian Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Andi Kleen Cc: Ingo Molnar Cc: Mark Rutland Cc: Michael Ellerman Cc: Will Deacon Link: https://lore.kernel.org/r/20210105195752.43489-7-kan.liang@linux.intel.com Signed-off-by: Kan Liang Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/tests/sample-parsing.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index a80c57e041de..0dbe3aa99853 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -160,6 +160,9 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_DATA_PAGE_SIZE) COMP(data_page_size); + if (type & PERF_SAMPLE_CODE_PAGE_SIZE) + COMP(code_page_size); + if (type & PERF_SAMPLE_AUX) { COMP(aux_sample.size); if (memcmp(s1->aux_sample.data, s2->aux_sample.data, @@ -241,6 +244,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .phys_addr = 113, .cgroup = 114, .data_page_size = 115, + .code_page_size = 116, .ins_lat = 117, .aux_sample = { .size = sizeof(aux_data), -- Gitee From 79a75764d018df4065a0b35fb7748df9a4806c39 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:03 +0800 Subject: [PATCH 10/45] perf tools: Check mem-loads auxiliary event ANBZ: #9586 commit ddc11da5eb37e27a4b66cddcaf11233ef51b3a79 upstream. For some platforms, an auxiliary event has to be enabled simultaneously with the load latency event. For Alderlake, the auxiliary event is created in "cpu_core" pmu. So first we need to check the existing of "cpu_core" pmu and then check if this pmu has auxiliary event. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/arch/x86/util/mem-events.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index 588110fd8904..e79232e3f2a0 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -11,8 +11,13 @@ static bool mem_loads_name__init; bool is_mem_loads_aux_event(struct evsel *leader) { - if (!pmu_have_event("cpu", "mem-loads-aux")) - return false; + if (perf_pmu__find("cpu")) { + if (!pmu_have_event("cpu", "mem-loads-aux")) + return false; + } else if (perf_pmu__find("cpu_core")) { + if (!pmu_have_event("cpu_core", "mem-loads-aux")) + return false; + } return leader->core.attr.config == MEM_LOADS_AUX; } -- Gitee From 65d79c34933448500798d5fda05ea2dedbecc34c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:04 +0800 Subject: [PATCH 11/45] perf tools: Support pmu prefix for mem-load event ANBZ: #9586 commit d2f327acc638312a96d0c0a20c56c7db945d30d7 upstream. The perf_mem_events__name() can generate the mem-load event name. It uses a variable 'mem_loads_name__init' to avoid generating the event name every time (because perf_pmu__scan takes some time). The perf_mem_events__name() assumes the pmu is "cpu" but it's not correct for hybrid platform. For Alderlake, the pmu is "cpu_core" or "cpu_atom" Introduce a new parameter 'pmu_name' in perf_mem_events__name to let the caller specify a pmu name. Considering such event name is x86 specific, so move perf_mem_events[] to arch/x86/util/mem-events.c. We still keep the variable 'mem_loads_name__init' but it's only used when pmu_name is NULL (compatible for original behavior). When pmu_name is not NULL (e.g. "cpu_core"), this patch doesn't have optimization. That can be implemented in follow up patch. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/arch/arm64/util/mem-events.c | 2 +- tools/perf/arch/powerpc/util/mem-events.c | 2 +- tools/perf/arch/x86/util/mem-events.c | 35 ++++++++++++++++++----- tools/perf/builtin-c2c.c | 4 +-- tools/perf/builtin-mem.c | 4 +-- tools/perf/util/mem-events.c | 4 +-- tools/perf/util/mem-events.h | 2 +- 7 files changed, 37 insertions(+), 16 deletions(-) diff --git a/tools/perf/arch/arm64/util/mem-events.c b/tools/perf/arch/arm64/util/mem-events.c index b45259bcbcd1..df817d1f9f3e 100644 --- a/tools/perf/arch/arm64/util/mem-events.c +++ b/tools/perf/arch/arm64/util/mem-events.c @@ -20,7 +20,7 @@ struct perf_mem_event *perf_mem_events__ptr(int i) return &perf_mem_events[i]; } -char *perf_mem_events__name(int i) +char *perf_mem_events__name(int i, char *pmu_name __maybe_unused) { struct perf_mem_event *e = perf_mem_events__ptr(i); diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c index 07fb5e049488..4120fafe0be4 100644 --- a/tools/perf/arch/powerpc/util/mem-events.c +++ b/tools/perf/arch/powerpc/util/mem-events.c @@ -3,7 +3,7 @@ #include "mem-events.h" /* PowerPC does not support 'ldlat' parameter. */ -char *perf_mem_events__name(int i) +char *perf_mem_events__name(int i, char *pmu_name __maybe_unused) { if (i == PERF_MEM_EVENTS__LOAD) return (char *) "cpu/mem-loads/"; diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index e79232e3f2a0..f9e444a4fe70 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -7,7 +7,23 @@ static char mem_loads_name[100]; static bool mem_loads_name__init; #define MEM_LOADS_AUX 0x8203 -#define MEM_LOADS_AUX_NAME "{cpu/mem-loads-aux/,cpu/mem-loads,ldlat=%u/pp}:S" +#define MEM_LOADS_AUX_NAME "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"), + E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), + E(NULL, NULL, NULL), +}; + +struct perf_mem_event *perf_mem_events__ptr(int i) +{ + if (i >= PERF_MEM_EVENTS__MAX) + return NULL; + + return &perf_mem_events[i]; +} bool is_mem_loads_aux_event(struct evsel *leader) { @@ -22,7 +38,7 @@ bool is_mem_loads_aux_event(struct evsel *leader) return leader->core.attr.config == MEM_LOADS_AUX; } -char *perf_mem_events__name(int i) +char *perf_mem_events__name(int i, char *pmu_name) { struct perf_mem_event *e = perf_mem_events__ptr(i); @@ -30,17 +46,22 @@ char *perf_mem_events__name(int i) return NULL; if (i == PERF_MEM_EVENTS__LOAD) { - if (mem_loads_name__init) + if (mem_loads_name__init && !pmu_name) return mem_loads_name; - mem_loads_name__init = true; + if (!pmu_name) { + mem_loads_name__init = true; + pmu_name = (char *)"cpu"; + } - if (pmu_have_event("cpu", "mem-loads-aux")) { + if (pmu_have_event(pmu_name, "mem-loads-aux")) { scnprintf(mem_loads_name, sizeof(mem_loads_name), - MEM_LOADS_AUX_NAME, perf_mem_events__loads_ldlat); + MEM_LOADS_AUX_NAME, pmu_name, pmu_name, + perf_mem_events__loads_ldlat); } else { scnprintf(mem_loads_name, sizeof(mem_loads_name), - e->name, perf_mem_events__loads_ldlat); + e->name, pmu_name, + perf_mem_events__loads_ldlat); } return mem_loads_name; } diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f15a6746bb57..552b6114df4e 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -3463,13 +3463,13 @@ static int perf_c2c__record(int argc, const char **argv) if (!e->supported) { pr_err("failed: event '%s' not supported\n", - perf_mem_events__name(j)); + perf_mem_events__name(j, NULL)); free(rec_argv); return -1; } rec_argv[i++] = "-e"; - rec_argv[i++] = perf_mem_events__name(j); + rec_argv[i++] = perf_mem_events__name(j, NULL); } if (all_user) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index adfb7d0d4f4d..56704a323bc4 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -135,13 +135,13 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (!e->supported) { pr_err("failed: event '%s' not supported\n", - perf_mem_events__name(j)); + perf_mem_events__name(j, NULL)); free(rec_argv); return -1; } rec_argv[i++] = "-e"; - rec_argv[i++] = perf_mem_events__name(j); + rec_argv[i++] = perf_mem_events__name(j, NULL); } if (all_user) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index bc3a96f06d31..cc2c6abb5b22 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -37,7 +37,7 @@ struct perf_mem_event * __weak perf_mem_events__ptr(int i) return &perf_mem_events[i]; } -char * __weak perf_mem_events__name(int i) +char * __weak perf_mem_events__name(int i, char *pmu_name __maybe_unused) { struct perf_mem_event *e = perf_mem_events__ptr(i); @@ -141,7 +141,7 @@ void perf_mem_events__list(void) fprintf(stderr, "%-13s%-*s%s\n", e->tag ?: "", verbose > 0 ? 25 : 0, - verbose > 0 ? perf_mem_events__name(j) : "", + verbose > 0 ? perf_mem_events__name(j, NULL) : "", e->supported ? ": available" : ""); } } diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index ffe58845b979..18d3cc29b84b 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -38,7 +38,7 @@ extern unsigned int perf_mem_events__loads_ldlat; int perf_mem_events__parse(const char *str); int perf_mem_events__init(void); -char *perf_mem_events__name(int i); +char *perf_mem_events__name(int i, char *pmu_name); struct perf_mem_event *perf_mem_events__ptr(int i); bool is_mem_loads_aux_event(struct evsel *leader); -- Gitee From 77e1cebdee1824dcab9acb970034750de3580e8f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:05 +0800 Subject: [PATCH 12/45] perf tools: Support pmu prefix for mem-store event ANBZ: #9586 commit a91ffcf30e0002e6f52d4c2cd9639443e514e88a upstream. For enabling mem-store event, it doesn't need an auxiliary event. So just build an event name string with the pmu prefix. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/arch/x86/util/mem-events.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index f9e444a4fe70..5214370ca4e4 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -5,6 +5,7 @@ static char mem_loads_name[100]; static bool mem_loads_name__init; +static char mem_stores_name[100]; #define MEM_LOADS_AUX 0x8203 #define MEM_LOADS_AUX_NAME "{%s/mem-loads-aux/,%s/mem-loads,ldlat=%u/}:P" @@ -13,7 +14,7 @@ static bool mem_loads_name__init; static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"), - E("ldlat-stores", "cpu/mem-stores/P", "cpu/events/mem-stores"), + E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"), E(NULL, NULL, NULL), }; @@ -66,5 +67,14 @@ char *perf_mem_events__name(int i, char *pmu_name) return mem_loads_name; } + if (i == PERF_MEM_EVENTS__STORE) { + if (!pmu_name) + pmu_name = (char *)"cpu"; + + scnprintf(mem_stores_name, sizeof(mem_stores_name), + e->name, pmu_name); + return mem_stores_name; + } + return (char *)e->name; } -- Gitee From 673cf38970b0340e138f04842d25a39d0bdbedbe Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:06 +0800 Subject: [PATCH 13/45] perf tools: Check if mem_events is supported for hybrid platform ANBZ: #9586 commit e7ce8d11bfb06a06c1e00830c223514086191649 upstream. Check if the mem_events ('mem-loads' and 'mem-stores') exist in the sysfs path. For Alderlake, the hybrid cpu pmu are "cpu_core" and "cpu_atom". Check the existing of following paths: /sys/devices/cpu_atom/events/mem-loads /sys/devices/cpu_atom/events/mem-stores /sys/devices/cpu_core/events/mem-loads /sys/devices/cpu_core/events/mem-stores If the patch exists, the mem_event is supported. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-5-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/util/mem-events.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index cc2c6abb5b22..497fe092a81a 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -12,6 +12,8 @@ #include "mem-events.h" #include "debug.h" #include "symbol.h" +#include "pmu.h" +#include "pmu-hybrid.h" unsigned int perf_mem_events__loads_ldlat = 30; @@ -100,6 +102,15 @@ int perf_mem_events__parse(const char *str) return -1; } +static bool perf_mem_event__supported(const char *mnt, char *sysfs_name) +{ + char path[PATH_MAX]; + struct stat st; + + scnprintf(path, PATH_MAX, "%s/devices/%s", mnt, sysfs_name); + return !stat(path, &st); +} + int perf_mem_events__init(void) { const char *mnt = sysfs__mount(); @@ -110,9 +121,9 @@ int perf_mem_events__init(void) return -ENOENT; for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - char path[PATH_MAX]; struct perf_mem_event *e = perf_mem_events__ptr(j); - struct stat st; + struct perf_pmu *pmu; + char sysfs_name[100]; /* * If the event entry isn't valid, skip initialization @@ -121,11 +132,20 @@ int perf_mem_events__init(void) if (!e->tag) continue; - scnprintf(path, PATH_MAX, "%s/devices/%s", - mnt, e->sysfs_name); + if (!perf_pmu__has_hybrid()) { + scnprintf(sysfs_name, sizeof(sysfs_name), + e->sysfs_name, "cpu"); + e->supported = perf_mem_event__supported(mnt, sysfs_name); + } else { + perf_pmu__for_each_hybrid_pmu(pmu) { + scnprintf(sysfs_name, sizeof(sysfs_name), + e->sysfs_name, pmu->name); + e->supported |= perf_mem_event__supported(mnt, sysfs_name); + } + } - if (!stat(path, &st)) - e->supported = found = true; + if (e->supported) + found = true; } return found ? 0 : -ENOENT; -- Gitee From ba3e589cb78e8b910c92b41cd221ff3eb8aa49e1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:07 +0800 Subject: [PATCH 14/45] perf mem: Support record for hybrid platform ANBZ: #9586 commit 4a9086adc329c9460aefc563969b24eed534adba upstream. Support 'perf mem record' for hybrid platform. On hybrid platform, such as Alderlake, when executing 'perf mem record', it actually calls: record -e {cpu_core/mem-loads-aux/,cpu_core/mem-loads,ldlat=30/}:P -e cpu_atom/mem-loads,ldlat=30/P -e cpu_core/mem-stores/P -e cpu_atom/mem-stores/P Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-6-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/builtin-mem.c | 43 ++++++++++++++---------- tools/perf/util/mem-events.c | 65 ++++++++++++++++++++++++++++++++++++ tools/perf/util/mem-events.h | 2 ++ 3 files changed, 93 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 56704a323bc4..f6a52e164ed0 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -18,6 +18,8 @@ #include "util/dso.h" #include "util/map.h" #include "util/symbol.h" +#include "util/pmu.h" +#include "util/pmu-hybrid.h" #include #define MEM_OPERATION_LOAD 0x1 @@ -62,8 +64,9 @@ static const char * const *record_mem_usage = __usage; static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { - int rec_argc, i = 0, j; + int rec_argc, i = 0, j, tmp_nr = 0; const char **rec_argv; + char **rec_tmp; int ret; bool all_user = false, all_kernel = false; struct perf_mem_event *e; @@ -87,11 +90,24 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_KEEP_UNKNOWN); - rec_argc = argc + 9; /* max number of arguments */ + if (!perf_pmu__has_hybrid()) + rec_argc = argc + 9; /* max number of arguments */ + else + rec_argc = argc + 9 * perf_pmu__hybrid_pmu_num(); + rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; + /* + * Save the allocated event name strings. + */ + rec_tmp = calloc(rec_argc + 1, sizeof(char *)); + if (!rec_tmp) { + free(rec_argv); + return -1; + } + rec_argv[i++] = "record"; e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD_STORE); @@ -128,21 +144,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->data_page_size) rec_argv[i++] = "--data-page-size"; - for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - e = perf_mem_events__ptr(j); - if (!e->record) - continue; - - if (!e->supported) { - pr_err("failed: event '%s' not supported\n", - perf_mem_events__name(j, NULL)); - free(rec_argv); - return -1; - } - - rec_argv[i++] = "-e"; - rec_argv[i++] = perf_mem_events__name(j, NULL); - } + ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &tmp_nr); + if (ret) + goto out; if (all_user) rec_argv[i++] = "--all-user"; @@ -164,6 +168,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) } ret = cmd_record(i, rec_argv); +out: + for (i = 0; i < tmp_nr; i++) + free(rec_tmp[i]); + + free(rec_tmp); free(rec_argv); return ret; } diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 497fe092a81a..ace841f94fec 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -166,6 +166,71 @@ void perf_mem_events__list(void) } } +static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e, + int idx) +{ + const char *mnt = sysfs__mount(); + char sysfs_name[100]; + struct perf_pmu *pmu; + + perf_pmu__for_each_hybrid_pmu(pmu) { + scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name, + pmu->name); + if (!perf_mem_event__supported(mnt, sysfs_name)) { + pr_err("failed: event '%s' not supported\n", + perf_mem_events__name(idx, pmu->name)); + } + } +} + +int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, + char **rec_tmp, int *tmp_nr) +{ + int i = *argv_nr, k = 0; + struct perf_mem_event *e; + struct perf_pmu *pmu; + char *s; + + for (int j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + e = perf_mem_events__ptr(j); + if (!e->record) + continue; + + if (!perf_pmu__has_hybrid()) { + if (!e->supported) { + pr_err("failed: event '%s' not supported\n", + perf_mem_events__name(j, NULL)); + return -1; + } + + rec_argv[i++] = "-e"; + rec_argv[i++] = perf_mem_events__name(j, NULL); + } else { + if (!e->supported) { + perf_mem_events__print_unsupport_hybrid(e, j); + return -1; + } + + perf_pmu__for_each_hybrid_pmu(pmu) { + rec_argv[i++] = "-e"; + s = perf_mem_events__name(j, pmu->name); + if (s) { + s = strdup(s); + if (!s) + return -1; + + rec_argv[i++] = s; + rec_tmp[k++] = s; + } + } + } + } + + *argv_nr = i; + *tmp_nr = k; + return 0; +} + static const char * const tlb_access[] = { "N/A", "HIT", diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 18d3cc29b84b..ebcd44c385e4 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -43,6 +43,8 @@ struct perf_mem_event *perf_mem_events__ptr(int i); bool is_mem_loads_aux_event(struct evsel *leader); void perf_mem_events__list(void); +int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, + char **rec_tmp, int *tmp_nr); struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -- Gitee From be75427a5fd356bb72ab385a7be8629e223bb754 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:08 +0800 Subject: [PATCH 15/45] perf mem: Fix wrong verbose output for recording events ANBZ: #9586 commit a6d9de8427584553b71492071f6ffd7c92ec8b99 upstream. Current code: for (j = 0; j < argc; j++, i++) rec_argv[i] = argv[j]; if (verbose > 0) { pr_debug("calling: record "); while (rec_argv[j]) { pr_debug("%s ", rec_argv[j]); j++; } pr_debug("\n"); } The entries of argv[] are copied to the end of rec_argv[], not copied to the beginning of rec_argv[]. So the index j at rec_argv[] doesn't point to the first event. Now we record the start index and end index for events in rec_argv[], and print them if verbose is enabled. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-7-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/builtin-mem.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index f6a52e164ed0..e09aa7c75095 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -65,6 +65,7 @@ static const char * const *record_mem_usage = __usage; static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j, tmp_nr = 0; + int start, end; const char **rec_argv; char **rec_tmp; int ret; @@ -144,9 +145,11 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->data_page_size) rec_argv[i++] = "--data-page-size"; + start = i; ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &tmp_nr); if (ret) goto out; + end = i; if (all_user) rec_argv[i++] = "--all-user"; @@ -160,10 +163,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (verbose > 0) { pr_debug("calling: record "); - while (rec_argv[j]) { + for (j = start; j < end; j++) pr_debug("%s ", rec_argv[j]); - j++; - } + pr_debug("\n"); } -- Gitee From 26221929591ccca72b6258e031c4b901fbfe9685 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:09 +0800 Subject: [PATCH 16/45] perf mem: Disable 'mem-loads-aux' group before reporting ANBZ: #9586 commit d5a8bd0fcd069819aa48f5e38548e07d5eb3e651 upstream. For some platforms, such as Alderlake, the 'mem-loads' event is required to use together with 'mem-loads-aux' within a group and 'mem-loads-aux' must be the group leader. Now we disable this group before reporting because 'mem-loads-aux' is just an auxiliary event. It doesn't carry any valid memory load result. If we show the 'mem-loads-aux' + 'mem-loads' as a group in report, it needs many of changes but they are totally unnecessary. [kun: conflicts solved. ] Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-8-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/builtin-report.c | 2 ++ tools/perf/util/evlist.c | 25 +++++++++++++++++++++++++ tools/perf/util/evlist.h | 2 ++ 3 files changed, 29 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8b512dd1e3d4..a6cb226f07c9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -930,6 +930,8 @@ static int __cmd_report(struct report *rep) return ret; } + evlist__check_mem_load_aux(session->evlist); + if (rep->stats_mode) return stats_print(rep); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index aa915812ea38..79ec21e89179 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1982,3 +1982,28 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) } return NULL; } + +void evlist__check_mem_load_aux(struct evlist *evlist) +{ + struct evsel *leader, *evsel, *pos; + + /* + * For some platforms, the 'mem-loads' event is required to use + * together with 'mem-loads-aux' within a group and 'mem-loads-aux' + * must be the group leader. Now we disable this group before reporting + * because 'mem-loads-aux' is just an auxiliary event. It doesn't carry + * any valid memory load information. + */ + evlist__for_each_entry(evlist, evsel) { + leader = evsel->leader; + if (leader == evsel) + continue; + + if (leader->name && strstr(leader->name, "mem-loads-aux")) { + for_each_group_evsel(pos, leader) { + pos->leader = pos; + pos->core.nr_members = 0; + } + } + } +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bd7c6d2b4e8e..2a324fa75823 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -373,4 +373,6 @@ int evlist__ctlfd_ack(struct evlist *evlist); #define EVLIST_DISABLED_MSG "Events disabled\n" struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); + +void evlist__check_mem_load_aux(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ -- Gitee From 27b9994b54bdb2313a842967c05720f6909a351e Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 27 May 2021 08:16:10 +0800 Subject: [PATCH 17/45] perf c2c: Support record for hybrid platform ANBZ: #9586 commit 79e157b00853af0e96d76997a93feec476a23bfa upstream. Support 'perf c2c record' for hybrid platform. On hybrid platform, such as Alderlake, when executing 'perf c2c record', it actually calls: record -W -d --phys-data --sample-cpu -e {cpu_core/mem-loads-aux/,cpu_core/mem-loads,ldlat=30/}:P -e cpu_atom/mem-loads,ldlat=30/P -e cpu_core/mem-stores/P -e cpu_atom/mem-stores/P Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210527001610.10553-9-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/builtin-c2c.c | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 552b6114df4e..041a23935443 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -42,6 +42,8 @@ #include "ui/ui.h" #include "ui/progress.h" #include "../perf.h" +#include "pmu.h" +#include "pmu-hybrid.h" struct c2c_hists { struct hists hists; @@ -3399,8 +3401,9 @@ static const char * const *record_mem_usage = __usage_record; static int perf_c2c__record(int argc, const char **argv) { - int rec_argc, i = 0, j; + int rec_argc, i = 0, j, rec_tmp_nr = 0; const char **rec_argv; + char **rec_tmp; int ret; bool all_user = false, all_kernel = false; bool event_set = false; @@ -3424,11 +3427,21 @@ static int perf_c2c__record(int argc, const char **argv) argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_KEEP_UNKNOWN); - rec_argc = argc + 11; /* max number of arguments */ + if (!perf_pmu__has_hybrid()) + rec_argc = argc + 11; /* max number of arguments */ + else + rec_argc = argc + 11 * perf_pmu__hybrid_pmu_num(); + rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; + rec_tmp = calloc(rec_argc + 1, sizeof(char *)); + if (!rec_tmp) { + free(rec_argv); + return -1; + } + rec_argv[i++] = "record"; if (!event_set) { @@ -3456,21 +3469,9 @@ static int perf_c2c__record(int argc, const char **argv) rec_argv[i++] = "--phys-data"; rec_argv[i++] = "--sample-cpu"; - for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { - e = perf_mem_events__ptr(j); - if (!e->record) - continue; - - if (!e->supported) { - pr_err("failed: event '%s' not supported\n", - perf_mem_events__name(j, NULL)); - free(rec_argv); - return -1; - } - - rec_argv[i++] = "-e"; - rec_argv[i++] = perf_mem_events__name(j, NULL); - } + ret = perf_mem_events__record_args(rec_argv, &i, rec_tmp, &rec_tmp_nr); + if (ret) + goto out; if (all_user) rec_argv[i++] = "--all-user"; @@ -3494,6 +3495,11 @@ static int perf_c2c__record(int argc, const char **argv) } ret = cmd_record(i, rec_argv); +out: + for (i = 0; i < rec_tmp_nr; i++) + free(rec_tmp[i]); + + free(rec_tmp); free(rec_argv); return ret; } -- Gitee From 23f53a1f458c0bdfd42b52819af29e4144bb5a4e Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 6 Oct 2021 19:36:51 +0530 Subject: [PATCH 18/45] perf: Add comment about current state of PERF_MEM_LVL_* namespace and remove an extra line ANBZ: #9586 commit f4c6217f7f5936f7173d028559ff5d25cce10816 upstream. Add a comment about PERF_MEM_LVL_* namespace being depricated to some extent in favour of added PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. Remove an extra line present in perf_mem__lvl_scnprintf function. Signed-off-by: Kajol Jain Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211006140654.298352-2-kjain@linux.ibm.com Signed-off-by: Kun(llfl) --- include/uapi/linux/perf_event.h | 8 +++++++- tools/include/uapi/linux/perf_event.h | 8 +++++++- tools/perf/util/mem-events.c | 1 - 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 73c1589fa140..7870fbe74115 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1234,7 +1234,13 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* memory hierarchy (memory level, hit or miss) */ +/* + * PERF_MEM_LVL_* namespace being depricated to some extent in the + * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. + * Supporting this namespace inorder to not break defined ABIs. + * + * memory hierarchy (memory level, hit or miss) + */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index e6afdc54571f..69f403f0d977 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1222,7 +1222,13 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* memory hierarchy (memory level, hit or miss) */ +/* + * PERF_MEM_LVL_* namespace being depricated to some extent in the + * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. + * Supporting this namespace inorder to not break defined ABIs. + * + * memory hierarchy (memory level, hit or miss) + */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ace841f94fec..e049f2333019 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -322,7 +322,6 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) /* already taken care of */ m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); - if (mem_info && mem_info->data_src.mem_remote) { strcat(out, "Remote "); l += 7; -- Gitee From c2868991e0655817b8ce7a5ddd6c2151f07822ab Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 6 Oct 2021 19:36:52 +0530 Subject: [PATCH 19/45] perf: Add mem_hops field in perf_mem_data_src structure ANBZ: #9586 commit fec9cc6175d0ec1e13efe12be491d9bd4de62f80 upstream. Going forward, future generation systems can have more hierarchy within the node/package level but currently we don't have any data source encoding field in perf, which can be used to represent this level of data. Add a new field called 'mem_hops' in the perf_mem_data_src structure which can be used to represent intra-node/package or inter-node/off-package details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value can be used to present different hop levels data. Also add corresponding macros to define mem_hop field values and shift value. Currently we define macro for HOPS_0 which corresponds to data coming from another core but same node. For ex: Encodings for mem_hops fields with L2 cache: L2 - local L2 L2 | REMOTE | HOPS_0 - remote core, same node L2 Signed-off-by: Kajol Jain Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211006140654.298352-3-kjain@linux.ibm.com Signed-off-by: Kun(llfl) --- include/uapi/linux/perf_event.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7870fbe74115..7de6e162814b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1203,14 +1203,16 @@ union perf_mem_data_src { mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ mem_blk:3, /* access blocked */ - mem_rsvd:21; + mem_hops:3, /* hop level */ + mem_rsvd:18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:21, + __u64 mem_rsvd:18, + mem_hops:3, /* hop level */ mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ @@ -1306,6 +1308,11 @@ union perf_mem_data_src { #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_SHIFT 40 +/* hop level */ +#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ +/* 2-7 available */ +#define PERF_MEM_HOPS_SHIFT 43 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) -- Gitee From 6df6b1029ae5131997eb9ffb7e56e121daf4c30f Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 6 Oct 2021 19:36:53 +0530 Subject: [PATCH 20/45] tools/perf: Add mem_hops field in perf_mem_data_src structure ANBZ: #9586 commit cae1d759065ee989de246d4a72bc2bfe9ad9d262 upstream. Going forward, future generation systems can have more hierarchy within the node/package level but currently we don't have any data source encoding field in perf, which can be used to represent this level of data. Add a new field called 'mem_hops' in the perf_mem_data_src structure which can be used to represent intra-node/package or inter-node/off-package details. This field is of size 3 bits where PERF_MEM_HOPS_{NA, 0..6} value can be used to present different hop levels data. Also add corresponding macros to define mem_hop field values and shift value. Currently we define macro for HOPS_0 which corresponds to data coming from another core but same node. Add functionality to represent mem_hop field data in perf_mem__lvl_scnprintf function with the help of added string array called mem_hops. For ex: Encodings for mem_hops fields with L2 cache: L2 - local L2 L2 | REMOTE | HOPS_0 - remote core, same node L2 Since with the addition of HOPS field, now remote can be used to denote cache access from the same node but different core, a check is added in the c2c_decode_stats function to set mrem only when HOPS is zero along with set remote field. Signed-off-by: Kajol Jain Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211006140654.298352-4-kjain@linux.ibm.com Signed-off-by: Kun(llfl) --- tools/include/uapi/linux/perf_event.h | 11 +++++++++-- tools/perf/util/mem-events.c | 19 ++++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 69f403f0d977..10d358698c91 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1191,14 +1191,16 @@ union perf_mem_data_src { mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ mem_blk:3, /* access blocked */ - mem_rsvd:21; + mem_hops:3, /* hop level */ + mem_rsvd:18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:21, + __u64 mem_rsvd:18, + mem_hops:3, /* hop level */ mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ @@ -1294,6 +1296,11 @@ union perf_mem_data_src { #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_SHIFT 40 +/* hop level */ +#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ +/* 2-7 available */ +#define PERF_MEM_HOPS_SHIFT 43 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index e049f2333019..b6535c554448 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -303,6 +303,16 @@ static const char * const mem_lvlnum[] = { [PERF_MEM_LVLNUM_NA] = "N/A", }; +static const char * const mem_hops[] = { + "N/A", + /* + * While printing, 'Remote' will be added to represent + * 'Remote core, same node' accesses as remote field need + * to be set with mem_hops field. + */ + "core, same node", +}; + int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; @@ -327,6 +337,9 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) l += 7; } + if (mem_info && mem_info->data_src.mem_hops) + l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); + printed = 0; for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { if (!(m & 0x1)) @@ -487,8 +500,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) /* * Skylake might report unknown remote level via this * bit, consider it when evaluating remote HITMs. + * + * Incase of power, remote field can also be used to denote cache + * accesses from the another core of same node. Hence, setting + * mrem only when HOPS is zero along with set remote field. */ - bool mrem = data_src->mem_remote; + bool mrem = (data_src->mem_remote && !data_src->mem_hops); int err = 0; #define HITM_INC(__f) \ -- Gitee From 043d41266392a89988962a6053082ac40cdc5f5e Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Wed, 6 Oct 2021 19:36:54 +0530 Subject: [PATCH 21/45] powerpc/perf: Fix data source encodings for L2.1 and L3.1 accesses ANBZ: #9586 commit 26da4abfb38201c3cbe127daeded76d4c2bc9077 upstream. Fix the data source encodings to represent L2.1/L3.1(another core's L2/L3 on the same node) accesses properly for power10 and older plaforms. Add new macros(LEVEL/REM) which can be used to add mem_lvl_num and remote field data inside perf_mem_data_src structure. Result in power9 system with patch changes: localhost:~/linux/tools/perf # ./perf mem report | grep Remote 0.01% 1 252 Remote core, same node L3 or L3 hit [.] 0x0000000000002dd0 producer_consumer [.] 0x00007fff7f25eb90 anon HitM N/A No N/A 0 0 0.01% 1 220 Remote core, same node L3 or L3 hit [.] 0x0000000000002dd0 producer_consumer [.] 0x00007fff77776d90 anon HitM N/A No N/A 0 0 0.01% 1 220 Remote core, same node L3 or L3 hit [.] 0x0000000000002dd0 producer_consumer [.] 0x00007fff817d9410 anon HitM N/A No N/A 0 0 Fixes: 79e96f8f930d ("powerpc/perf: Export memory hierarchy info to user space") Signed-off-by: Kajol Jain Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20211006140654.298352-5-kjain@linux.ibm.com Signed-off-by: Kun(llfl) --- arch/powerpc/perf/isa207-common.c | 26 +++++++++++++++++++++----- arch/powerpc/perf/isa207-common.h | 2 ++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 52990becbdfc..d71015577bff 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -193,11 +193,27 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) ret |= P(SNOOP, HIT); break; case 5: - ret = PH(LVL, REM_CCE1); - if ((sub_idx == 0) || (sub_idx == 2) || (sub_idx == 4)) - ret |= P(SNOOP, HIT); - else if ((sub_idx == 1) || (sub_idx == 3) || (sub_idx == 5)) - ret |= P(SNOOP, HITM); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + ret = REM | P(HOPS, 0); + + if (sub_idx == 0 || sub_idx == 4) + ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); + else if (sub_idx == 1 || sub_idx == 5) + ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM); + else if (sub_idx == 2 || sub_idx == 6) + ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + else if (sub_idx == 3 || sub_idx == 7) + ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + } else { + if (sub_idx == 0) + ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0); + else if (sub_idx == 1) + ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0); + else if (sub_idx == 2 || sub_idx == 4) + ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0); + else if (sub_idx == 3 || sub_idx == 5) + ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0); + } break; case 6: ret = PH(LVL, REM_CCE2); diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 454b32c31440..32c13f6030e2 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -261,6 +261,8 @@ #define P(a, b) PERF_MEM_S(a, b) #define PH(a, b) (P(LVL, HIT) | P(a, b)) #define PM(a, b) (P(LVL, MISS) | P(a, b)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp); int isa207_compute_mmcr(u64 event[], int n_ev, -- Gitee From 55bbe9f1c4d8e981e7a0361f0b2df0cc3fe96d5a Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 6 Dec 2021 14:47:46 +0530 Subject: [PATCH 22/45] perf: Add new macros for mem_hops field ANBZ: #9586 commit cb1c4aba055f928ffae0c868e8dfe08eeab302e7 upstream. Add new macros for mem_hops field which can be used to represent remote-node, socket and board level details. Currently the code had macro for HOPS_0, which corresponds to data coming from another core but same node. Add new macros for HOPS_1 to HOPS_3 to represent remote-node, socket and board level data. For ex: Encodings for mem_hops fields with L2 cache: L2 - local L2 L2 | REMOTE | HOPS_0 - remote core, same node L2 L2 | REMOTE | HOPS_1 - remote node, same socket L2 L2 | REMOTE | HOPS_2 - remote socket, same board L2 L2 | REMOTE | HOPS_3 - remote board L2 Signed-off-by: Kajol Jain Acked-by: Peter Zijlstra (Intel) Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211206091749.87585-2-kjain@linux.ibm.com Signed-off-by: Kun(llfl) --- include/uapi/linux/perf_event.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 7de6e162814b..9d207caf8ef8 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1310,7 +1310,10 @@ union perf_mem_data_src { /* hop level */ #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -/* 2-7 available */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ +/* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ -- Gitee From 59e4db6356731b0290a5aba64c36e50d95fcbc8f Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 6 Dec 2021 14:47:48 +0530 Subject: [PATCH 23/45] powerpc/perf: Add encodings to represent data based on newer composite PERF_MEM_LVLNUM* fields ANBZ: #9586 commit 4a20ee106154ac1765dea97932faad29f0ba57fc upstream. The code represent data coming from L1/L2/L3 cache hits based on PERF_MEM_LVL_* namespace, which is in the process of deprecation in the favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent L1/L2/L3 cache hits based on newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields for power10 and older platforms Result in power9 system without patch changes: localhost:# ./perf mem report --sort="mem,sym,dso" --stdio # Overhead Samples Memory access Symbol Shared Object # ........ ............ ........................ ................................. ................ # 29.51% 1 L2 hit [k] perf_event_exec [kernel.vmlinux] 27.05% 1 L1 hit [k] perf_ctx_unlock [kernel.vmlinux] 13.93% 1 L1 hit [k] vtime_delta [kernel.vmlinux] 13.11% 1 L1 hit [k] prepend_path.isra.11 [kernel.vmlinux] 8.20% 1 L1 hit [.] 00000038.plt_call.__GI_strlen libc-2.28.so 8.20% 1 L1 hit [k] perf_event_interrupt [kernel.vmlinux] Result in power9 system with patch changes: localhost:# ./perf mem report --sort="mem,sym,dso" --stdio # Overhead Samples Memory access Symbol Shared Object # ........ ............ ........................ .......................... ................ # 36.63% 1 L2 or L2 hit [k] perf_event_exec [kernel.vmlinux] 25.50% 1 L1 or L1 hit [k] vtime_delta [kernel.vmlinux] 13.12% 1 L1 or L1 hit [k] unmap_region [kernel.vmlinux] 12.62% 1 L1 or L1 hit [k] perf_sample_event_took [kernel.vmlinux] 6.93% 1 L1 or L1 hit [k] perf_ctx_unlock [kernel.vmlinux] 5.20% 1 L1 or L1 hit [.] __memcpy_power7 libc-2.28.so Signed-off-by: Kajol Jain Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211206091749.87585-4-kjain@linux.ibm.com Signed-off-by: Kun(llfl) --- arch/powerpc/perf/isa207-common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index d71015577bff..658dcae7ec50 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -175,13 +175,13 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) /* Nothing to do */ break; case 1: - ret = PH(LVL, L1); + ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT); break; case 2: - ret = PH(LVL, L2); + ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT); break; case 3: - ret = PH(LVL, L3); + ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); break; case 4: if (sub_idx <= 1) -- Gitee From 6dfb85fc80ea12300fed32caa3bc62bbe1ba87be Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 6 Dec 2021 14:47:49 +0530 Subject: [PATCH 24/45] powerpc/perf: Add data source encodings for power10 platform ANBZ: #9586 commit 6ed05a8efda56e5be11081954929421de19cce88 upstream. The code represent memory/cache level data based on PERF_MEM_LVL_* namespace, which is in the process of deprication in the favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent cache/memory data based on newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_,HOPS_} fields. Add data source encodings to represent data coming from local memory/Remote memory/distant memory and remote/distant cache hits. In order to represent data coming from OpenCAPI cache/memory, we use LVLNUM "PMEM" field which is used to present persistent memory accesses. Result in power10 system with patch changes: localhost:# ./perf mem report --sort="mem,sym,dso" --stdio # Overhead Samples Memory access Symbol Shared Object # ........ ............ ........................ .......................... ................ # 29.46% 2331 L1 or L1 hit [.] __random libc-2.28.so 23.11% 2121 L1 or L1 hit [.] producer_populate_cache producer_consumer 18.56% 1758 L1 or L1 hit [.] __random_r libc-2.28.so 15.64% 1559 L2 or L2 hit [.] __random libc-2.28.so ..... 0.09% 5 Remote socket, same board Any cache hit [.] __random libc-2.28.so 0.07% 4 Remote socket, same board Any cache hit [.] __random libc-2.28.so ..... Signed-off-by: Kajol Jain Reviewed-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211206091749.87585-5-kjain@linux.ibm.com Signed-off-by: Kun(llfl) --- arch/powerpc/perf/isa207-common.c | 54 ++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 658dcae7ec50..f0e2c5fd9bec 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -184,13 +184,28 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); break; case 4: - if (sub_idx <= 1) - ret = PH(LVL, LOC_RAM); - else if (sub_idx > 1 && sub_idx <= 2) - ret = PH(LVL, REM_RAM1); - else - ret = PH(LVL, REM_RAM2); - ret |= P(SNOOP, HIT); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + ret = P(SNOOP, HIT); + + if (sub_idx == 1) + ret |= PH(LVL, LOC_RAM) | LEVEL(RAM); + else if (sub_idx == 2 || sub_idx == 3) + ret |= P(LVL, HIT) | LEVEL(PMEM); + else if (sub_idx == 4) + ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2); + else if (sub_idx == 5 || sub_idx == 7) + ret |= P(LVL, HIT) | LEVEL(PMEM) | REM; + else if (sub_idx == 6) + ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3); + } else { + if (sub_idx <= 1) + ret = PH(LVL, LOC_RAM); + else if (sub_idx > 1 && sub_idx <= 2) + ret = PH(LVL, REM_RAM1); + else + ret = PH(LVL, REM_RAM2); + ret |= P(SNOOP, HIT); + } break; case 5: if (cpu_has_feature(CPU_FTR_ARCH_31)) { @@ -216,11 +231,26 @@ static inline u64 isa207_find_source(u64 idx, u32 sub_idx) } break; case 6: - ret = PH(LVL, REM_CCE2); - if ((sub_idx == 0) || (sub_idx == 2)) - ret |= P(SNOOP, HIT); - else if ((sub_idx == 1) || (sub_idx == 3)) - ret |= P(SNOOP, HITM); + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + if (sub_idx == 0) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 2); + else if (sub_idx == 1) + ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 2); + else if (sub_idx == 2) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HIT) | P(HOPS, 3); + else if (sub_idx == 3) + ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM | + P(SNOOP, HITM) | P(HOPS, 3); + } else { + ret = PH(LVL, REM_CCE2); + if (sub_idx == 0 || sub_idx == 2) + ret |= P(SNOOP, HIT); + else if (sub_idx == 1 || sub_idx == 3) + ret |= P(SNOOP, HITM); + } break; case 7: ret = PM(LVL, L1); -- Gitee From 2fe84aadce9aec9ad3d682e44806b62eb6903e7d Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Sep 2022 06:09:54 -0700 Subject: [PATCH 25/45] perf: Add sample_flags to indicate the PMU-filled sample data ANBZ: #9586 commit 3aac580d5cc3001ca1627725b3b61edb529f341d upstream. On some platforms, some data e.g., timestamps, can be retrieved from the PMU driver. Usually, the data from the PMU driver is more accurate. The current perf kernel should output the PMU-filled sample data if it's available. To check the availability of the PMU-filled sample data, the current perf kernel initializes the related fields in the perf_sample_data_init(). When outputting a sample, the perf checks whether the field is updated by the PMU driver. If yes, the updated value will be output. If not, the perf uses an SW way to calculate the value or just outputs the initialized value if an SW way is unavailable either. With more and more data being provided by the PMU driver, more fields has to be initialized in the perf_sample_data_init(). That will increase the number of cache lines touched in perf_sample_data_init() and be harmful to the performance. Add new "sample_flags" to indicate the PMU-filled sample data. The PMU driver should set the corresponding PERF_SAMPLE_ flag when the field is updated. The initialization of the corresponding field is not required anymore. The following patches will make use of it and remove the corresponding fields from the perf_sample_data_init(), which will further minimize the number of cache lines touched. Only clear the sample flags that have already been done by the PMU driver in the perf_prepare_sample() for the PERF_RECORD_SAMPLE. For the other PERF_RECORD_ event type, the sample data is not available. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220901130959.1285717-2-kan.liang@linux.intel.com Signed-off-by: Kun(llfl) --- include/linux/perf_event.h | 2 ++ kernel/events/core.c | 17 +++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1381f54bbee7..545d97e28694 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1023,6 +1023,7 @@ struct perf_sample_data { * Fields set by perf_sample_data_init(), group so as to * minimize the cachelines touched. */ + u64 sample_flags; u64 addr; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; @@ -1073,6 +1074,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr, u64 period) { /* remaining struct members initialized in perf_prepare_sample() */ + data->sample_flags = 0; data->addr = addr; data->raw = NULL; data->br_stack = NULL; diff --git a/kernel/events/core.c b/kernel/events/core.c index 405856d12ed2..59ac39d6a772 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6681,11 +6681,10 @@ static void perf_aux_sample_output(struct perf_event *event, static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, - struct perf_event *event) + struct perf_event *event, + u64 sample_type) { - u64 sample_type = event->attr.sample_type; - - data->type = sample_type; + data->type = event->attr.sample_type; header->size += event->id_header_size; if (sample_type & PERF_SAMPLE_TID) { @@ -6714,7 +6713,7 @@ void perf_event_header__init_id(struct perf_event_header *header, struct perf_event *event) { if (event->attr.sample_id_all) - __perf_event_header__init_id(header, data, event); + __perf_event_header__init_id(header, data, event, event->attr.sample_type); } static void __perf_event__output_id_sample(struct perf_output_handle *handle, @@ -7193,6 +7192,7 @@ void perf_prepare_sample(struct perf_event_header *header, struct pt_regs *regs) { u64 sample_type = event->attr.sample_type; + u64 filtered_sample_type; header->type = PERF_RECORD_SAMPLE; header->size = sizeof(*header) + event->header_size; @@ -7200,7 +7200,12 @@ void perf_prepare_sample(struct perf_event_header *header, header->misc = 0; header->misc |= perf_misc_flags(regs); - __perf_event_header__init_id(header, data, event); + /* + * Clear the sample flags that have already been done by the + * PMU driver. + */ + filtered_sample_type = sample_type & ~data->sample_flags; + __perf_event_header__init_id(header, data, event, filtered_sample_type); if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE)) data->ip = perf_instruction_pointer(regs); -- Gitee From 1294ff3fa08b6ed98b94a46f81fe4d52c86bc677 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Sep 2022 06:09:55 -0700 Subject: [PATCH 26/45] perf/x86/intel/pebs: Fix PEBS timestamps overwritten ANBZ: #9586 commit 47a3aeb39e8dc099ae431cd8b46bdf218f5511b2 upstream. The PEBS TSC-based timestamps do not appear correctly in the final perf.data output file from perf record. The data->time field setup by PEBS in the setup_pebs_fixed_sample_data() is later overwritten by perf_events generic code in perf_prepare_sample(). There is an ordering problem. Set the sample flags when the data->time is updated by PEBS. The data->time field will not be overwritten anymore. Reported-by: Andreas Kogler Reported-by: Stephane Eranian Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220901130959.1285717-3-kan.liang@linux.intel.com Signed-off-by: Kun(llfl) --- arch/x86/events/intel/ds.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index aabcaf8185f4..700a2e648f9d 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1693,8 +1693,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * We can only do this for the default trace clock. */ if (x86_pmu.intel_cap.pebs_format >= 3 && - event->attr.use_clockid == 0) + event->attr.use_clockid == 0) { data->time = native_sched_clock_from_tsc(pebs->tsc); + data->sample_flags |= PERF_SAMPLE_TIME; + } if (has_branch_stack(event)) data->br_stack = &cpuc->lbr_stack; @@ -1756,8 +1758,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); data->period = event->hw.last_period; - if (event->attr.use_clockid == 0) + if (event->attr.use_clockid == 0) { data->time = native_sched_clock_from_tsc(basic->tsc); + data->sample_flags |= PERF_SAMPLE_TIME; + } /* * We must however always use iregs for the unwinder to stay sane; the -- Gitee From a58f7671b9ddb48c78588981e2a61123f9414068 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Sep 2022 06:09:56 -0700 Subject: [PATCH 27/45] perf: Use sample_flags for branch stack ANBZ: #9586 commit a9a931e2666878343782c82d7d55cc173ddeb3e9 upstream. Use the new sample_flags to indicate whether the branch stack is filled by the PMU driver. Remove the br_stack from the perf_sample_data_init() to minimize the number of cache lines touched. [ kun: conflicts solved. ] Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220901130959.1285717-4-kan.liang@linux.intel.com Signed-off-by: Kun(llfl) --- arch/powerpc/perf/core-book3s.c | 1 + arch/x86/events/amd/core.c | 4 +++- arch/x86/events/core.c | 4 +++- arch/x86/events/intel/core.c | 4 +++- arch/x86/events/intel/ds.c | 5 ++++- include/linux/perf_event.h | 6 +++--- kernel/events/core.c | 4 ++-- 7 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 003cfa607024..73ab919946f5 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2207,6 +2207,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, cpuhw = this_cpu_ptr(&cpu_hw_events); power_pmu_bhrb_read(event, cpuhw); data.br_stack = &cpuhw->bhrb_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; } if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 6fdc72941bd4..04bf31452648 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -967,8 +967,10 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index c928bfa7c232..b07bb8a1270c 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1699,8 +1699,10 @@ int x86_pmu_handle_irq(struct pt_regs *regs) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data.br_stack = &cpuc->lbr_stack; + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3ff0fa0fb9b7..730ecb618564 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2961,8 +2961,10 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) + if (has_branch_stack(event)) { intel_pmu_lbr_save_brstack(&data, cpuc, event); + data.sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 700a2e648f9d..a0c504ac7d35 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1698,8 +1698,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, data->sample_flags |= PERF_SAMPLE_TIME; } - if (has_branch_stack(event)) + if (has_branch_stack(event)) { data->br_stack = &cpuc->lbr_stack; + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; + } } static void adaptive_pebs_save_regs(struct pt_regs *regs, @@ -1853,6 +1855,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (has_branch_stack(event)) { intel_pmu_store_pebs_lbrs(lbr); intel_pmu_lbr_save_brstack(data, cpuc, event); + data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; } } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 545d97e28694..e6ed8d0183d5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1026,8 +1026,6 @@ struct perf_sample_data { u64 sample_flags; u64 addr; struct perf_raw_record *raw; - struct perf_branch_stack *br_stack; - u64 *br_stack_cntr; u64 period; union perf_sample_weight weight; u64 txn; @@ -1037,6 +1035,9 @@ struct perf_sample_data { * The other fields, optionally {set,used} by * perf_{prepare,output}_sample(). */ + struct perf_branch_stack *br_stack; + u64 *br_stack_cntr; + u64 type; u64 ip; struct { @@ -1077,7 +1078,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->sample_flags = 0; data->addr = addr; data->raw = NULL; - data->br_stack = NULL; data->br_stack_cntr = NULL; data->period = period; data->weight.full = 0; diff --git a/kernel/events/core.c b/kernel/events/core.c index 59ac39d6a772..2f13b5f79b51 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6933,7 +6933,7 @@ void perf_output_sample(struct perf_output_handle *handle, } if (sample_type & PERF_SAMPLE_BRANCH_STACK) { - if (data->br_stack) { + if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) { size_t size; size = data->br_stack->nr @@ -7248,7 +7248,7 @@ void perf_prepare_sample(struct perf_event_header *header, if (sample_type & PERF_SAMPLE_BRANCH_STACK) { int size = sizeof(u64); /* nr */ - if (data->br_stack) { + if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) { if (perf_sample_save_hw_index(event)) size += sizeof(u64); -- Gitee From c327e1689336014a366056a31388ade948876f60 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Sep 2022 06:09:57 -0700 Subject: [PATCH 28/45] perf: Use sample_flags for weight ANBZ: #9586 commit 2abe681da0a192ab850a5271d838a7817b469fca upstream. Use the new sample_flags to indicate whether the weight field is filled by the PMU driver. Remove the weight field from the perf_sample_data_init() to minimize the number of cache lines touched. [ kun: Conflicts solved. ] Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220901130959.1285717-5-kan.liang@linux.intel.com Signed-off-by: Kun(llfl) --- arch/powerpc/perf/core-book3s.c | 4 +++- arch/x86/events/intel/ds.c | 10 +++++++--- include/linux/perf_event.h | 3 +-- kernel/events/core.c | 3 +++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 73ab919946f5..4115c1b71956 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2215,8 +2215,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && - ppmu->get_mem_weight) + ppmu->get_mem_weight) { ppmu->get_mem_weight(&data.weight.full); + data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } if (perf_event_overflow(event, &data, regs)) power_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index a0c504ac7d35..d0583db59218 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1585,8 +1585,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, /* * Use latency for weight (only avail with PEBS-LL) */ - if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) + if (fll && (sample_type & PERF_SAMPLE_WEIGHT_TYPE)) { data->weight.full = pebs->lat; + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } /* * data.data_src encodes the data source @@ -1678,9 +1680,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, if (x86_pmu.intel_cap.pebs_format >= 2) { /* Only set the TSX weight when no memory weight. */ - if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) + if ((sample_type & PERF_SAMPLE_WEIGHT_TYPE) && !fll) { data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); - + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } if (sample_type & PERF_SAMPLE_TRANSACTION) data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, pebs->ax); @@ -1826,6 +1829,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, data->weight.var1_dw = (u32)(weight & PEBS_LATENCY_MASK) ?: intel_get_tsx_weight(meminfo->tsx_tuning); } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; } if (sample_type & PERF_SAMPLE_DATA_SRC) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e6ed8d0183d5..7537bf07a81d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1027,7 +1027,6 @@ struct perf_sample_data { u64 addr; struct perf_raw_record *raw; u64 period; - union perf_sample_weight weight; u64 txn; union perf_mem_data_src data_src; @@ -1037,6 +1036,7 @@ struct perf_sample_data { */ struct perf_branch_stack *br_stack; u64 *br_stack_cntr; + union perf_sample_weight weight; u64 type; u64 ip; @@ -1080,7 +1080,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->raw = NULL; data->br_stack_cntr = NULL; data->period = period; - data->weight.full = 0; data->data_src.val = PERF_MEM_NA; data->txn = 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 2f13b5f79b51..a1ccd12f30b0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7305,6 +7305,9 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += size; } + if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE) + data->weight.full = 0; + if (sample_type & PERF_SAMPLE_REGS_INTR) { /* regs dump ABI info */ int size = sizeof(u64); -- Gitee From 866df450cb18fc7733f3e8d6ae9a8a6f5f23ea66 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Sep 2022 06:09:58 -0700 Subject: [PATCH 29/45] perf: Use sample_flags for data_src ANBZ: #9586 commit e16fd7f2cb1a65555cfe76f983eaefb1eab7471f upstream. Use the new sample_flags to indicate whether the data_src field is filled by the PMU driver. Remove the data_src field from the perf_sample_data_init() to minimize the number of cache lines touched. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220901130959.1285717-6-kan.liang@linux.intel.com Signed-off-by: Kun(llfl) --- arch/powerpc/perf/core-book3s.c | 4 +++- arch/x86/events/intel/ds.c | 8 ++++++-- include/linux/perf_event.h | 3 +-- kernel/events/core.c | 3 +++ 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 4115c1b71956..42c9d97f7a34 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -2211,8 +2211,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, } if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC && - ppmu->get_mem_data_src) + ppmu->get_mem_data_src) { ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs); + data.sample_flags |= PERF_SAMPLE_DATA_SRC; + } if (event->attr.sample_type & PERF_SAMPLE_WEIGHT && ppmu->get_mem_weight) { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index d0583db59218..2681c35067d3 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1593,8 +1593,10 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, /* * data.data_src encodes the data source */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (sample_type & PERF_SAMPLE_DATA_SRC) { data->data_src.val = get_data_src(event, pebs->dse); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } /* * We must however always use iregs for the unwinder to stay sane; the @@ -1832,8 +1834,10 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; } - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (sample_type & PERF_SAMPLE_DATA_SRC) { data->data_src.val = get_data_src(event, meminfo->aux); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) data->addr = meminfo->address; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7537bf07a81d..fe636cd18656 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1028,7 +1028,6 @@ struct perf_sample_data { struct perf_raw_record *raw; u64 period; u64 txn; - union perf_mem_data_src data_src; /* * The other fields, optionally {set,used} by @@ -1037,6 +1036,7 @@ struct perf_sample_data { struct perf_branch_stack *br_stack; u64 *br_stack_cntr; union perf_sample_weight weight; + union perf_mem_data_src data_src; u64 type; u64 ip; @@ -1080,7 +1080,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->raw = NULL; data->br_stack_cntr = NULL; data->period = period; - data->data_src.val = PERF_MEM_NA; data->txn = 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index a1ccd12f30b0..11857dee7c26 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7308,6 +7308,9 @@ void perf_prepare_sample(struct perf_event_header *header, if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE) data->weight.full = 0; + if (filtered_sample_type & PERF_SAMPLE_DATA_SRC) + data->data_src.val = PERF_MEM_NA; + if (sample_type & PERF_SAMPLE_REGS_INTR) { /* regs dump ABI info */ int size = sizeof(u64); -- Gitee From 81216cff7a10efbba6ee006bde3de3fffe138cd3 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 1 Sep 2022 06:09:59 -0700 Subject: [PATCH 30/45] perf: Use sample_flags for txn ANBZ: #9586 commit ee9db0e14b0575aa827579dc2471a29ec5fc6877 upstream. Use the new sample_flags to indicate whether the txn field is filled by the PMU driver. Remove the txn field from the perf_sample_data_init() to minimize the number of cache lines touched. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20220901130959.1285717-7-kan.liang@linux.intel.com Signed-off-by: Kun(llfl) --- arch/x86/events/intel/ds.c | 8 ++++++-- include/linux/perf_event.h | 3 +-- kernel/events/core.c | 3 +++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2681c35067d3..dbd483c3acfc 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1686,9 +1686,11 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, data->weight.full = intel_get_tsx_weight(pebs->tsx_tuning); data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; } - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (sample_type & PERF_SAMPLE_TRANSACTION) { data->txn = intel_get_tsx_transaction(pebs->tsx_tuning, pebs->ax); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } } /* @@ -1842,9 +1844,11 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) data->addr = meminfo->address; - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (sample_type & PERF_SAMPLE_TRANSACTION) { data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning, gprs ? gprs->ax : 0); + data->sample_flags |= PERF_SAMPLE_TRANSACTION; + } } if (format_size & PEBS_DATACFG_XMMS) { diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fe636cd18656..7420fd42fe74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1027,7 +1027,6 @@ struct perf_sample_data { u64 addr; struct perf_raw_record *raw; u64 period; - u64 txn; /* * The other fields, optionally {set,used} by @@ -1037,6 +1036,7 @@ struct perf_sample_data { u64 *br_stack_cntr; union perf_sample_weight weight; union perf_mem_data_src data_src; + u64 txn; u64 type; u64 ip; @@ -1080,7 +1080,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->raw = NULL; data->br_stack_cntr = NULL; data->period = period; - data->txn = 0; } /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 11857dee7c26..9ef1249561a7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7311,6 +7311,9 @@ void perf_prepare_sample(struct perf_event_header *header, if (filtered_sample_type & PERF_SAMPLE_DATA_SRC) data->data_src.val = PERF_MEM_NA; + if (filtered_sample_type & PERF_SAMPLE_TRANSACTION) + data->txn = 0; + if (sample_type & PERF_SAMPLE_REGS_INTR) { /* regs dump ABI info */ int size = sizeof(u64); -- Gitee From 173a7ae710744a00954d0baa64b84cc530b02609 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:51 +0530 Subject: [PATCH 31/45] perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO} ANBZ: #9586 commit ee3e88dfec23153d0675b5d00522297b9adf657c upstream. PERF_MEM_LVLNUM_EXTN_MEM which can be used to indicate accesses to extension memory like CXL etc. PERF_MEM_LVL_IO can be used for IO accesses but it can not distinguish between local and remote IO. Introduce new field PERF_MEM_LVLNUM_IO which can be clubbed with PERF_MEM_REMOTE_REMOTE to indicate Remote IO accesses. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-2-ravi.bangoria@amd.com Signed-off-by: Kun(llfl) --- include/uapi/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 9d207caf8ef8..0da89eba5c6d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1266,7 +1266,9 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +/* 5-0x8 available */ +#define PERF_MEM_LVLNUM_EXTN_MEM 0x09 /* Extension memory */ +#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -- Gitee From 2e8efc845b5d747508e088b5f2c1f6c96e547e8d Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:52 +0530 Subject: [PATCH 32/45] perf/x86/amd: Add IBS OP_DATA2 DataSrc bit definitions ANBZ: #9586 commit 610c238041fbc682936d34132362a54a802600fe upstream. IBS_OP_DATA2 DataSrc provides detail about location of the data being accessed from by load ops. Define macros for legacy and extended DataSrc values. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-3-ravi.bangoria@amd.com Signed-off-by: Kun(llfl) --- arch/x86/include/asm/amd-ibs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h index b40b2d4ea2ac..9369f411f3be 100644 --- a/arch/x86/include/asm/amd-ibs.h +++ b/arch/x86/include/asm/amd-ibs.h @@ -6,6 +6,22 @@ #include +/* IBS_OP_DATA2 DataSrc */ +#define IBS_DATA_SRC_LOC_CACHE 2 +#define IBS_DATA_SRC_DRAM 3 +#define IBS_DATA_SRC_REM_CACHE 4 +#define IBS_DATA_SRC_IO 7 + +/* IBS_OP_DATA2 DataSrc Extension */ +#define IBS_DATA_SRC_EXT_LOC_CACHE 1 +#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2 +#define IBS_DATA_SRC_EXT_DRAM 3 +#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5 +#define IBS_DATA_SRC_EXT_PMEM 6 +#define IBS_DATA_SRC_EXT_IO 7 +#define IBS_DATA_SRC_EXT_EXT_MEM 8 +#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12 + /* * IBS Hardware MSRs */ -- Gitee From ecbacf37f9ec00be47d1601e06a03cb1c83fd633 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:53 +0530 Subject: [PATCH 33/45] perf/x86/amd: Support PERF_SAMPLE_DATA_SRC ANBZ: #9586 commit 7c10dd0a88b1cc6ae4637fffb494c5e080027eb6 upstream. struct perf_mem_data_src is used to pass arch specific memory access details into generic form. These details gets consumed by tools like perf mem and c2c. IBS tagged load/store sample provides most of the information needed for these tools. Add a logic to convert IBS specific raw data into perf_mem_data_src. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-4-ravi.bangoria@amd.com Signed-off-by: Kun(llfl) --- arch/x86/events/amd/ibs.c | 318 +++++++++++++++++++++++++++++++++++++- 1 file changed, 312 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index c251bc44c088..c9d472ae0bb6 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -688,6 +688,312 @@ static struct perf_ibs perf_ibs_op = { .get_count = get_ibs_op_count, }; +static void perf_ibs_get_mem_op(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_op = PERF_MEM_OP_NA; + + if (op_data3->ld_op) + data_src->mem_op = PERF_MEM_OP_LOAD; + else if (op_data3->st_op) + data_src->mem_op = PERF_MEM_OP_STORE; +} + +/* + * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has + * more fine granular DataSrc encodings. Others have coarse. + */ +static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) +{ + if (ibs_caps & IBS_CAPS_ZEN4) + return (op_data2->data_src_hi << 3) | op_data2->data_src_lo; + + return op_data2->data_src_lo; +} + +static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + u8 ibs_data_src = perf_ibs_data_src(op_data2); + + data_src->mem_lvl = 0; + + /* + * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached + * memory accesses. So, check DcUcMemAcc bit early. + */ + if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) { + data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT; + return; + } + + /* L1 Hit */ + if (op_data3->dc_miss == 0) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + return; + } + + /* L2 Hit */ + if (op_data3->l2_miss == 0) { + /* Erratum #1293 */ + if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || + !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + return; + } + } + + /* + * OP_DATA2 is valid only for load ops. Skip all checks which + * uses OP_DATA2[DataSrc]. + */ + if (data_src->mem_op != PERF_MEM_OP_LOAD) + goto check_mab; + + /* L3 Hit */ + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + return; + } + } else { + if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 | + PERF_MEM_LVL_HIT; + return; + } + } + + /* A peer cache in a near CCX */ + if (ibs_caps & IBS_CAPS_ZEN4 && + ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + return; + } + + /* A peer cache in a far CCX */ + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; + return; + } + } else { + if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) { + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; + return; + } + } + + /* DRAM */ + if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) { + if (op_data2->rmt_node == 0) + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + else + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + return; + } + + /* PMEM */ + if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) { + data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + + /* Extension Memory */ + if (ibs_caps & IBS_CAPS_ZEN4 && + ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) { + data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + + /* IO */ + if (ibs_data_src == IBS_DATA_SRC_EXT_IO) { + data_src->mem_lvl = PERF_MEM_LVL_IO; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + if (op_data2->rmt_node) { + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + /* IBS doesn't provide Remote socket detail */ + data_src->mem_hops = PERF_MEM_HOPS_1; + } + return; + } + +check_mab: + /* + * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding + * DC misses. However, such data may come from any level in mem + * hierarchy. IBS provides detail about both MAB as well as actual + * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set + * MAB only when IBS fails to provide DataSrc. + */ + if (op_data3->dc_miss_no_mab_alloc) { + data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; + return; + } + + data_src->mem_lvl = PERF_MEM_LVL_NA; +} + +static bool perf_ibs_cache_hit_st_valid(void) +{ + /* 0: Uninitialized, 1: Valid, -1: Invalid */ + static int cache_hit_st_valid; + + if (unlikely(!cache_hit_st_valid)) { + if (boot_cpu_data.x86 == 0x19 && + (boot_cpu_data.x86_model <= 0xF || + (boot_cpu_data.x86_model >= 0x20 && + boot_cpu_data.x86_model <= 0x5F))) { + cache_hit_st_valid = -1; + } else { + cache_hit_st_valid = 1; + } + } + + return cache_hit_st_valid == 1; +} + +static void perf_ibs_get_mem_snoop(union ibs_op_data2 *op_data2, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + u8 ibs_data_src; + + data_src->mem_snoop = PERF_MEM_SNOOP_NA; + + if (!perf_ibs_cache_hit_st_valid() || + data_src->mem_op != PERF_MEM_OP_LOAD || + data_src->mem_lvl & PERF_MEM_LVL_L1 || + data_src->mem_lvl & PERF_MEM_LVL_L2 || + op_data2->cache_hit_st) + return; + + ibs_data_src = perf_ibs_data_src(op_data2); + + if (ibs_caps & IBS_CAPS_ZEN4) { + if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE || + ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE || + ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + } else if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + } +} + +static void perf_ibs_get_tlb_lvl(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_dtlb = PERF_MEM_TLB_NA; + + if (!op_data3->dc_lin_addr_valid) + return; + + if (!op_data3->dc_l1tlb_miss) { + data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT; + return; + } + + if (!op_data3->dc_l2tlb_miss) { + data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT; + return; + } + + data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS; +} + +static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, + struct perf_sample_data *data) +{ + union perf_mem_data_src *data_src = &data->data_src; + + data_src->mem_lock = PERF_MEM_LOCK_NA; + + if (op_data3->dc_locked_op) + data_src->mem_lock = PERF_MEM_LOCK_LOCKED; +} + +#define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) + +static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, + struct perf_sample_data *data, + union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3) +{ + perf_ibs_get_mem_lvl(op_data2, op_data3, data); + perf_ibs_get_mem_snoop(op_data2, data); + perf_ibs_get_tlb_lvl(op_data3, data); + perf_ibs_get_mem_lock(op_data3, data); +} + +static __u64 perf_ibs_get_op_data2(struct perf_ibs_data *ibs_data, + union ibs_op_data3 *op_data3) +{ + __u64 val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)]; + + /* Erratum #1293 */ + if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF && + (op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { + /* + * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode. + * DataSrc=0 is 'No valid status' and RmtNode is invalid when + * DataSrc=0. + */ + val = 0; + } + return val; +} + +static void perf_ibs_parse_ld_st_data(__u64 sample_type, + struct perf_ibs_data *ibs_data, + struct perf_sample_data *data) +{ + union ibs_op_data3 op_data3; + union ibs_op_data2 op_data2; + + data->data_src.val = PERF_MEM_NA; + op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + + perf_ibs_get_mem_op(&op_data3, data); + if (data->data_src.mem_op != PERF_MEM_OP_LOAD && + data->data_src.mem_op != PERF_MEM_OP_STORE) + return; + + op_data2.val = perf_ibs_get_op_data2(ibs_data, &op_data3); + + if (sample_type & PERF_SAMPLE_DATA_SRC) { + perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3); + data->sample_flags |= PERF_SAMPLE_DATA_SRC; + } +} + +static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, + int check_rip) +{ + if (sample_type & PERF_SAMPLE_RAW || + (perf_ibs == &perf_ibs_op && + sample_type & PERF_SAMPLE_DATA_SRC)) + return perf_ibs->offset_max; + else if (check_rip) + return 3; + return 1; +} + static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); @@ -735,12 +1041,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) size = 1; offset = 1; check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK)); - if (event->attr.sample_type & PERF_SAMPLE_RAW) - offset_max = perf_ibs->offset_max; - else if (check_rip) - offset_max = 3; - else - offset_max = 1; + + offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip); + do { rdmsrl(msr + offset, *buf++); size++; @@ -793,6 +1096,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) data.raw = &raw; } + if (perf_ibs == &perf_ibs_op) + perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); + /* * rip recorded by IbsOpRip will not be consistent with rsp and rbp * recorded as part of interrupt regs. Thus we need to use rip from -- Gitee From f6ad77fabbf7b382f67ce809e410c7f41eea1f4e Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:54 +0530 Subject: [PATCH 34/45] perf/x86/amd: Support PERF_SAMPLE_{WEIGHT|WEIGHT_STRUCT} ANBZ: #9586 commit 6b2ae4952ef8ac23b467bc10776404092b581143 upstream. IbsDcMissLat indicates the number of clock cycles from when a miss is detected in the data cache to when the data was delivered to the core. Similarly, IbsTagToRetCtr provides number of cycles from when the op was tagged to when the op was retired. Consider these fields for sample->weight. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-5-ravi.bangoria@amd.com Signed-off-by: Kun(llfl) --- arch/x86/events/amd/ibs.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index c9d472ae0bb6..2aedb1183ea2 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -965,6 +965,7 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type, { union ibs_op_data3 op_data3; union ibs_op_data2 op_data2; + union ibs_op_data op_data; data->data_src.val = PERF_MEM_NA; op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; @@ -980,6 +981,19 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type, perf_ibs_get_data_src(ibs_data, data, &op_data2, &op_data3); data->sample_flags |= PERF_SAMPLE_DATA_SRC; } + + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE && op_data3.dc_miss && + data->data_src.mem_op == PERF_MEM_OP_LOAD) { + op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; + + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { + data->weight.var1_dw = op_data3.dc_miss_lat; + data->weight.var2_w = op_data.tag_to_ret_ctr; + } else if (sample_type & PERF_SAMPLE_WEIGHT) { + data->weight.full = op_data3.dc_miss_lat; + } + data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; + } } static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, @@ -987,7 +1001,8 @@ static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, { if (sample_type & PERF_SAMPLE_RAW || (perf_ibs == &perf_ibs_op && - sample_type & PERF_SAMPLE_DATA_SRC)) + (sample_type & PERF_SAMPLE_DATA_SRC || + sample_type & PERF_SAMPLE_WEIGHT_TYPE))) return perf_ibs->offset_max; else if (check_rip) return 3; -- Gitee From 4bf0ab1fb3047ccf4b73af7f660b90e63b9604f2 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:55 +0530 Subject: [PATCH 35/45] perf/x86/amd: Support PERF_SAMPLE_ADDR ANBZ: #9586 commit cb2bb85f7ed8740ab5fc06bbec386faa39ba44ef upstream. IBS_DC_LINADDR provides the linear data address for the tagged load/ store operation. Populate perf sample address using it. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-6-ravi.bangoria@amd.com Signed-off-by: Kun(llfl) --- arch/x86/events/amd/ibs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 2aedb1183ea2..9f5f95a50345 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -994,6 +994,11 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type, } data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE; } + + if (sample_type & PERF_SAMPLE_ADDR && op_data3.dc_lin_addr_valid) { + data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; + data->sample_flags |= PERF_SAMPLE_ADDR; + } } static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, @@ -1002,7 +1007,8 @@ static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, if (sample_type & PERF_SAMPLE_RAW || (perf_ibs == &perf_ibs_op && (sample_type & PERF_SAMPLE_DATA_SRC || - sample_type & PERF_SAMPLE_WEIGHT_TYPE))) + sample_type & PERF_SAMPLE_WEIGHT_TYPE || + sample_type & PERF_SAMPLE_ADDR))) return perf_ibs->offset_max; else if (check_rip) return 3; -- Gitee From fc4815938ce35d11df675dcfd478bf77f46f8187 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:56 +0530 Subject: [PATCH 36/45] perf/x86/amd: Support PERF_SAMPLE_PHY_ADDR ANBZ: #9586 commit 5b26af6d2b7854639ddf893366bbca7e74fa7c54 upstream. IBS_DC_PHYSADDR provides the physical data address for the tagged load/ store operation. Populate perf sample physical address using it. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-7-ravi.bangoria@amd.com Signed-off-by: Kun(llfl) --- arch/x86/events/amd/ibs.c | 8 +++++++- kernel/events/core.c | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 9f5f95a50345..0cd57e2c6a7e 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -999,6 +999,11 @@ static void perf_ibs_parse_ld_st_data(__u64 sample_type, data->addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; data->sample_flags |= PERF_SAMPLE_ADDR; } + + if (sample_type & PERF_SAMPLE_PHYS_ADDR && op_data3.dc_phy_addr_valid) { + data->phys_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)]; + data->sample_flags |= PERF_SAMPLE_PHYS_ADDR; + } } static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, @@ -1008,7 +1013,8 @@ static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, (perf_ibs == &perf_ibs_op && (sample_type & PERF_SAMPLE_DATA_SRC || sample_type & PERF_SAMPLE_WEIGHT_TYPE || - sample_type & PERF_SAMPLE_ADDR))) + sample_type & PERF_SAMPLE_ADDR || + sample_type & PERF_SAMPLE_PHYS_ADDR))) return perf_ibs->offset_max; else if (check_rip) return 3; diff --git a/kernel/events/core.c b/kernel/events/core.c index 9ef1249561a7..848df7999c75 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7329,7 +7329,8 @@ void perf_prepare_sample(struct perf_event_header *header, header->size += size; } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) + if (sample_type & PERF_SAMPLE_PHYS_ADDR && + filtered_sample_type & PERF_SAMPLE_PHYS_ADDR) data->phys_addr = perf_virt_to_phys(data->addr); #ifdef CONFIG_CGROUP_PERF -- Gitee From 864b49722f48fa0e2d2aa749d2ca223fddea986f Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 28 Sep 2022 15:27:57 +0530 Subject: [PATCH 37/45] perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file ANBZ: #9586 commit cfef80bad4cf79cdc964a53c98254dfa462be83f upstream. PERF_MEM_SNOOPX_PEER is defined only in tools uapi header. Although it's used only by perf tool, not defining it in kernel header can create problems in future. Signed-off-by: Ravi Bangoria Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220928095805.596-8-ravi.bangoria@amd.com Signed-off-by: Kun(llfl) --- include/uapi/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 0da89eba5c6d..f6441c28ecba 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1286,7 +1286,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOP_SHIFT 19 #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -/* 1 free */ +#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ #define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ -- Gitee From cae951f1b1f8d6d3dc2d29453a1031b1f6ccd5af Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:39 +0530 Subject: [PATCH 38/45] tools headers UAPI: Sync include/uapi/linux/perf_event.h header with the kernel ANBZ: #9586 commit b7ddd38ccc723f0dca68151baed1e6c07c2a6005 upstream. Two new fields for mem_lvl_num has been introduced: PERF_MEM_LVLNUM_IO and PERF_MEM_LVLNUM_CXL which are required to support perf mem/c2c on AMD platform. Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/include/uapi/linux/perf_event.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 10d358698c91..5fe6d4ff2f68 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1254,7 +1254,9 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0xa available */ +/* 5-0x8 available */ +#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ #define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -- Gitee From 240bff1e3a2eef3801aa8aabfea7a7aac6a2e9e1 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:40 +0530 Subject: [PATCH 39/45] perf amd ibs: Sync arch/x86/include/asm/amd-ibs.h header with the kernel ANBZ: #9586 commit 160ae99365abeac216aeaa3407dce6cf038037e1 upstream. Although new details added into this header is currently used by kernel only, tools copy needs to be in sync with kernel file to avoid tools/perf/check-headers.sh warnings. Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-3-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/arch/x86/include/asm/amd-ibs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h index 21e01cf6162e..2d8b74699d9a 100644 --- a/tools/arch/x86/include/asm/amd-ibs.h +++ b/tools/arch/x86/include/asm/amd-ibs.h @@ -6,6 +6,22 @@ #include "msr-index.h" +/* IBS_OP_DATA2 DataSrc */ +#define IBS_DATA_SRC_LOC_CACHE 2 +#define IBS_DATA_SRC_DRAM 3 +#define IBS_DATA_SRC_REM_CACHE 4 +#define IBS_DATA_SRC_IO 7 + +/* IBS_OP_DATA2 DataSrc Extension */ +#define IBS_DATA_SRC_EXT_LOC_CACHE 1 +#define IBS_DATA_SRC_EXT_NEAR_CCX_CACHE 2 +#define IBS_DATA_SRC_EXT_DRAM 3 +#define IBS_DATA_SRC_EXT_FAR_CCX_CACHE 5 +#define IBS_DATA_SRC_EXT_PMEM 6 +#define IBS_DATA_SRC_EXT_IO 7 +#define IBS_DATA_SRC_EXT_EXT_MEM 8 +#define IBS_DATA_SRC_EXT_PEER_AGENT_MEM 12 + /* * IBS Hardware MSRs */ -- Gitee From dd30646310a071c69a57bb98520a28fecd12b961 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:41 +0530 Subject: [PATCH 40/45] perf mem: Add support for printing PERF_MEM_LVLNUM_{CXL|IO} ANBZ: #9586 commit 923396f6827d00ef18c1bf589551e5a604191261 upstream. Add support for printing these new fields in perf mem report. Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-4-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/util/mem-events.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index b6535c554448..6ccd1e7cbbdd 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -296,6 +296,8 @@ static const char * const mem_lvl[] = { }; static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_CXL] = "CXL", + [PERF_MEM_LVLNUM_IO] = "I/O", [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache", [PERF_MEM_LVLNUM_LFB] = "LFB", [PERF_MEM_LVLNUM_RAM] = "RAM", -- Gitee From bfe399c31f164619cad825854a198a40705a829d Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:42 +0530 Subject: [PATCH 41/45] perf mem/c2c: Set PERF_SAMPLE_WEIGHT for LOAD_STORE events ANBZ: #9586 commit 4173cc055dc92f199a43775775e54dc7fafd37b6 upstream. Currently perf sets PERF_SAMPLE_WEIGHT flag only for mem load events. Set it for combined load-store event as well which will enable recording of load latency by default on arch that does not support independent mem load event. Also document missing -W in perf-record man page. Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-5-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/Documentation/perf-record.txt | 1 + tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-mem.c | 1 + 3 files changed, 3 insertions(+) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 6992a5149d13..a2bc552cf23f 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -393,6 +393,7 @@ is enabled for all the sampling events. The sampled branch type is the same for The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k Note that this feature may not be available on all processors. +-W:: --weight:: Enable weightened sampling. An additional weight is recorded per sample and can be displayed with the weight and local_weight sort keys. This currently works for TSX diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 041a23935443..d60a4d7fc373 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -3452,6 +3452,7 @@ static int perf_c2c__record(int argc, const char **argv) */ if (e->tag) { e->record = true; + rec_argv[i++] = "-W"; } else { e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); e->record = true; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index e09aa7c75095..e8ed8e858a99 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -121,6 +121,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) (mem->operation & MEM_OPERATION_LOAD) && (mem->operation & MEM_OPERATION_STORE)) { e->record = true; + rec_argv[i++] = "-W"; } else { if (mem->operation & MEM_OPERATION_LOAD) { e = perf_mem_events__ptr(PERF_MEM_EVENTS__LOAD); -- Gitee From cb12996f3ce48196fb89d51e82144a5067477253 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:43 +0530 Subject: [PATCH 42/45] perf mem/c2c: Add load store event mappings for AMD ANBZ: #9586 commit f7b58cbdb3ff36eba8622e67eee66c10dd1c9995 upstream. The 'perf mem' and 'perf c2c' tools are wrappers around 'perf record' with mem load/ store events. IBS tagged load/store sample provides most of the information needed for these tools. Wire in the "ibs_op//" event as mem-ldst event for AMD. There are some limitations though: Only load/store micro-ops provide mem/c2c information. Whereas, IBS does not have a way to choose a particular type of micro-op to tag. This results in many non-LS micro-ops being tagged which appear as N/A in the perf report. IBS, being an uncore pmu from kernel point of view[1], does not support per process monitoring. Thus, perf mem/c2c on AMD are currently supported in per-cpu mode only. Example: $ sudo perf mem record -- -c 10000 ^C[ perf record: Woken up 227 times to write data ] [ perf record: Captured and wrote 58.760 MB perf.data (836978 samples) ] $ sudo perf mem report -F mem,sample,snoop Samples: 836K of event 'ibs_op//', Event count (approx.): 8418762 Memory access Samples Snoop N/A 700620 N/A L1 hit 126675 N/A L2 hit 424 N/A L3 hit 664 HitM L3 hit 10 N/A Local RAM hit 2 N/A Remote RAM (1 hop) hit 8558 N/A Remote Cache (1 hop) hit 3 N/A Remote Cache (1 hop) hit 2 HitM Remote Cache (2 hops) hit 10 HitM Remote Cache (2 hops) hit 6 N/A Uncached hit 4 N/A $ [1]: https://lore.kernel.org/lkml/20220829113347.295-1-ravi.bangoria@amd.com Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-6-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/Documentation/perf-c2c.txt | 14 ++++++++---- tools/perf/Documentation/perf-mem.txt | 3 ++- tools/perf/arch/x86/util/mem-events.c | 31 +++++++++++++++++++++++++-- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 9023a16c18dd..5f7f55624b6f 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -19,9 +19,10 @@ C2C stands for Cache To Cache. The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows you to track down the cacheline contentions. -On x86, the tool is based on load latency and precise store facility events +On Intel, the tool is based on load latency and precise store facility events provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling -with thresholding feature. +with thresholding feature. On AMD, the tool uses IBS op pmu (due to hardware +limitations, perf c2c is not supported on Zen3 cpus). These events provide: - memory address of the access @@ -49,7 +50,8 @@ RECORD OPTIONS -l:: --ldlat:: - Configure mem-loads latency. (x86 only) + Configure mem-loads latency. Supported on Intel and Arm64 processors + only. Ignored on other archs. -k:: --all-kernel:: @@ -135,11 +137,15 @@ Following perf record options are configured by default: -W,-d,--phys-data,--sample-cpu Unless specified otherwise with '-e' option, following events are monitored by -default on x86: +default on Intel: cpu/mem-loads,ldlat=30/P cpu/mem-stores/P +following on AMD: + + ibs_op// + and following on PowerPC: cpu/mem-loads/ diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 66177511c5c4..005c95580b1e 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -85,7 +85,8 @@ RECORD OPTIONS Be more verbose (show counter open errors, etc) --ldlat :: - Specify desired latency for loads event. (x86 only) + Specify desired latency for loads event. Supported on Intel and Arm64 + processors only. Ignored on other archs. In addition, for report all perf report options are valid, and for record all perf record options. diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index 5214370ca4e4..f683ac702247 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include "util/pmu.h" +#include "util/env.h" #include "map_symbol.h" #include "mem-events.h" +#include "linux/string.h" static char mem_loads_name[100]; static bool mem_loads_name__init; @@ -12,18 +14,43 @@ static char mem_stores_name[100]; #define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } -static struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { +static struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX] = { E("ldlat-loads", "%s/mem-loads,ldlat=%u/P", "%s/events/mem-loads"), E("ldlat-stores", "%s/mem-stores/P", "%s/events/mem-stores"), E(NULL, NULL, NULL), }; +static struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = { + E(NULL, NULL, NULL), + E(NULL, NULL, NULL), + E("mem-ldst", "ibs_op//", "ibs_op"), +}; + +static int perf_mem_is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + + perf_env__cpuid(&env); + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) + return 1; + return -1; +} + struct perf_mem_event *perf_mem_events__ptr(int i) { + /* 0: Uninitialized, 1: Yes, -1: No */ + static int is_amd; + if (i >= PERF_MEM_EVENTS__MAX) return NULL; - return &perf_mem_events[i]; + if (!is_amd) + is_amd = perf_mem_is_amd_cpu(); + + if (is_amd == 1) + return &perf_mem_events_amd[i]; + + return &perf_mem_events_intel[i]; } bool is_mem_loads_aux_event(struct evsel *leader) -- Gitee From 3981ea89b1c628d08edb07c70b4627cbadb69f46 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:44 +0530 Subject: [PATCH 43/45] perf mem/c2c: Avoid printing empty lines for unsupported events ANBZ: #9586 commit 2c5f652c442600cfd86fc2a7a7cfd8152f254971 upstream. The 'perf mem' and 'perf c2c' tools can be used with 3 different events: load, store and combined load-store. Some architectures might support only partial set of events in which case, perf prints an empty line for unsupported events. Avoid that. Ex, AMD Zen cpus supports only combined load-store event and does not support individual load and store event. Before patch: $ perf mem record -e list mem-ldst : available $ After patch: $ perf mem record -e list mem-ldst : available $ Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-7-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/util/mem-events.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 6ccd1e7cbbdd..0ccc6e5ccdf9 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -158,11 +158,12 @@ void perf_mem_events__list(void) for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { struct perf_mem_event *e = perf_mem_events__ptr(j); - fprintf(stderr, "%-13s%-*s%s\n", - e->tag ?: "", - verbose > 0 ? 25 : 0, - verbose > 0 ? perf_mem_events__name(j, NULL) : "", - e->supported ? ": available" : ""); + fprintf(stderr, "%-*s%-*s%s", + e->tag ? 13 : 0, + e->tag ? : "", + e->tag && verbose > 0 ? 25 : 0, + e->tag && verbose > 0 ? perf_mem_events__name(j, NULL) : "", + e->supported ? ": available\n" : ""); } } -- Gitee From 68a49aa7d595241835d773a45b9d3889734dc91b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:45 +0530 Subject: [PATCH 44/45] perf mem: Print "LFB/MAB" for PERF_MEM_LVLNUM_LFB ANBZ: #9586 commit c72de11605c5e291981cd30225542169fb3da4df upstream. A hw component to track outstanding L1 Data Cache misses is called LFB (Line Fill Buffer) on Intel and Arm. However similar component exists on other arch with different names, for ex, it's called MAB (Miss Address Buffer) on AMD. Use 'LFB/MAB' instead of just 'LFB'. Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-8-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/util/mem-events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 0ccc6e5ccdf9..25c52d591478 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -284,7 +284,7 @@ static const char * const mem_lvl[] = { "HIT", "MISS", "L1", - "LFB", + "LFB/MAB", "L2", "L3", "Local RAM", @@ -300,7 +300,7 @@ static const char * const mem_lvlnum[] = { [PERF_MEM_LVLNUM_CXL] = "CXL", [PERF_MEM_LVLNUM_IO] = "I/O", [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache", - [PERF_MEM_LVLNUM_LFB] = "LFB", + [PERF_MEM_LVLNUM_LFB] = "LFB/MAB", [PERF_MEM_LVLNUM_RAM] = "RAM", [PERF_MEM_LVLNUM_PMEM] = "PMEM", [PERF_MEM_LVLNUM_NA] = "N/A", -- Gitee From 2eb119752c7e754bebc23471ed0463d9a2a63fb6 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 6 Oct 2022 21:09:46 +0530 Subject: [PATCH 45/45] perf script: Add missing fields in usage hint ANBZ: #9586 commit d79310700590b8b40d8c867012d6c899ea6fd505 upstream. A few fields are missing in the usage message printed when an unknown field option is passed. Add them to the list. [kun: conflicts solved.] Signed-off-by: Ravi Bangoria Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ali Saidi Cc: Ananth Narayan Cc: Andi Kleen Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Ian Rogers Cc: Ingo Molnar Cc: Joe Mario Cc: Kan Liang Cc: Kim Phillips Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Stephane Eranian Cc: Thomas Gleixner Cc: x86@kernel.org Link: https://lore.kernel.org/r/20221006153946.7816-9-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Kun(llfl) --- tools/perf/builtin-script.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6d761e4467a6..2574a7bcddaf 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3533,9 +3533,9 @@ int cmd_script(int argc, const char **argv) "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,srcline,period,iregs,uregs,brstack," - "brstacksym,flags,bpf-output,brstackinsn,brstackoff," - "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size,code_page_size", + "brstacksym,flags,data_src,weight,bpf-output,brstackinsn," + "brstackoff,callindent,insn,insnlen,synth,phys_addr,metric," + "misc,srccode,ipc,tod,data_page_size,code_page_size", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), -- Gitee