From 1764047ce25fff9e64c4fe98fb723146566a81a1 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 24 Jul 2022 14:00:12 +0800 Subject: [PATCH 01/58] perf symbol: Correct address for bss symbols ANBZ: #9620 commit 2d86612aacb7805f72873691a2644d7279ed0630 upstream. When using 'perf mem' and 'perf c2c', an issue is observed that tool reports the wrong offset for global data symbols. This is a common issue on both x86 and Arm64 platforms. Let's see an example, for a test program, below is the disassembly for its .bss section which is dumped with objdump: ... Disassembly of section .bss: 0000000000004040 : ... 0000000000004080 : ... 00000000000040c0 : ... 0000000000004100 : ... First we used 'perf mem record' to run the test program and then used 'perf --debug verbose=4 mem report' to observe what's the symbol info for 'buf1' and 'buf2' structures. # ./perf mem record -e ldlat-loads,ldlat-stores -- false_sharing.exe 8 # ./perf --debug verbose=4 mem report ... dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 sh_addr: 0x4040 sh_offset: 0x3028 symbol__new: buf2 0x30a8-0x30e8 ... dso__load_sym_internal: adjusting symbol: st_value: 0x4080 sh_addr: 0x4040 sh_offset: 0x3028 symbol__new: buf1 0x3068-0x30a8 ... The perf tool relies on libelf to parse symbols, in executable and shared object files, 'st_value' holds a virtual address; 'sh_addr' is the address at which section's first byte should reside in memory, and 'sh_offset' is the byte offset from the beginning of the file to the first byte in the section. The perf tool uses below formula to convert a symbol's memory address to a file address: file_address = st_value - sh_addr + sh_offset ^ ` Memory address We can see the final adjusted address ranges for buf1 and buf2 are [0x30a8-0x30e8) and [0x3068-0x30a8) respectively, apparently this is incorrect, in the code, the structure for 'buf1' and 'buf2' specifies compiler attribute with 64-byte alignment. The problem happens for 'sh_offset', libelf returns it as 0x3028 which is not 64-byte aligned, combining with disassembly, it's likely libelf doesn't respect the alignment for .bss section, therefore, it doesn't return the aligned value for 'sh_offset'. Suggested by Fangrui Song, ELF file contains program header which contains PT_LOAD segments, the fields p_vaddr and p_offset in PT_LOAD segments contain the execution info. A better choice for converting memory address to file address is using the formula: file_address = st_value - p_vaddr + p_offset This patch introduces elf_read_program_header() which returns the program header based on the passed 'st_value', then it uses the formula above to calculate the symbol file address; and the debugging log is updated respectively. After applying the change: # ./perf --debug verbose=4 mem report ... dso__load_sym_internal: adjusting symbol: st_value: 0x40c0 p_vaddr: 0x3d28 p_offset: 0x2d28 symbol__new: buf2 0x30c0-0x3100 ... dso__load_sym_internal: adjusting symbol: st_value: 0x4080 p_vaddr: 0x3d28 p_offset: 0x2d28 symbol__new: buf1 0x3080-0x30c0 ... Fixes: f17e04afaff84b5c ("perf report: Fix ELF symbol parsing") Reported-by: Chang Rui Suggested-by: Fangrui Song Signed-off-by: Leo Yan Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220724060013.171050-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/symbol-elf.c | 45 ++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 94809aed8b44..1cab29d45bfb 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -232,6 +232,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, return NULL; } +static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) +{ + size_t i, phdrnum; + u64 sz; + + if (elf_getphdrnum(elf, &phdrnum)) + return -1; + + for (i = 0; i < phdrnum; i++) { + if (gelf_getphdr(elf, i, phdr) == NULL) + return -1; + + if (phdr->p_type != PT_LOAD) + continue; + + sz = max(phdr->p_memsz, phdr->p_filesz); + if (!sz) + continue; + + if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz)) + return 0; + } + + /* Not found any valid program header */ + return -1; +} + static bool want_demangle(bool is_kernel_sym) { return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; @@ -1181,6 +1208,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, sym.st_value); used_opd = true; } + /* * When loading symbols in a data mapping, ABS symbols (which * has a value of SHN_ABS in its st_shndx) failed at @@ -1217,11 +1245,20 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, goto out_elf_end; } else if ((used_opd && runtime_ss->adjust_symbols) || (!used_opd && syms_ss->adjust_symbols)) { + GElf_Phdr phdr; + + if (elf_read_program_header(syms_ss->elf, + (u64)sym.st_value, &phdr)) { + pr_warning("%s: failed to find program header for " + "symbol: %s st_value: %#" PRIx64 "\n", + __func__, elf_name, (u64)sym.st_value); + continue; + } pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; + "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, + (u64)phdr.p_offset); + sym.st_value -= phdr.p_vaddr - phdr.p_offset; } demangled = demangle_sym(dso, kmodule, elf_name); -- Gitee From 404595418b25c955aae270f561b94c627348d0ea Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 11 Jul 2022 12:31:44 +0300 Subject: [PATCH 02/58] perf tools: Fix dso_id inode generation comparison ANBZ: #9620 commit 68566a7cf56bf3148797c218ed45a9de078ef47c upstream. Synthesized MMAP events have zero ino_generation, so do not compare them to DSOs with a real ino_generation otherwise we end up with a DSO without a build id. Fixes: 0e3149f86b99ddab ("perf dso: Move dso_id from 'struct map' to 'struct dso'") Signed-off-by: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: kvm@vger.kernel.org Cc: Namhyung Kim Link: https://lore.kernel.org/r/20220711093218.10967-2-adrian.hunter@intel.com [ Added clarification to the comment from Ian + more detailed explanation from Adrian ] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/dsos.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 183a81d5b2f9..2db91121bdaf 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -20,8 +20,19 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) if (a->ino > b->ino) return -1; if (a->ino < b->ino) return 1; - if (a->ino_generation > b->ino_generation) return -1; - if (a->ino_generation < b->ino_generation) return 1; + /* + * Synthesized MMAP events have zero ino_generation, avoid comparing + * them with MMAP events with actual ino_generation. + * + * I found it harmful because the mismatch resulted in a new + * dso that did not have a build ID whereas the original dso did have a + * build ID. The build ID was essential because the object was not found + * otherwise. - Adrian + */ + if (a->ino_generation && b->ino_generation) { + if (a->ino_generation > b->ino_generation) return -1; + if (a->ino_generation < b->ino_generation) return 1; + } return 0; } -- Gitee From ea7903f9fef8a441d08556c10af84880d7716fd0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 31 Jul 2022 09:49:23 -0700 Subject: [PATCH 03/58] perf symbol: Fail to read phdr workaround ANBZ: #9620 commit 6d518ac7be6223811ab947897273b1bbef846180 upstream. The perf jvmti agent doesn't create program headers, in this case fallback on section headers as happened previously. Committer notes: To test this, from a public post by Ian: 1) download a Java workload dacapo-9.12-MR1-bach.jar from https://sourceforge.net/projects/dacapobench/ 2) build perf such as "make -C tools/perf O=/tmp/perf NO_LIBBFD=1" it should detect Java and create /tmp/perf/libperf-jvmti.so 3) run perf with the jvmti agent: perf record -k 1 java -agentpath:/tmp/perf/libperf-jvmti.so -jar dacapo-9.12-MR1-bach.jar -n 10 fop 4) run perf inject: perf inject -i perf.data -o perf-injected.data -j 5) run perf report perf report -i perf-injected.data | grep org.apache.fop With this patch reverted I see lots of symbols like: 0.00% java jitted-388040-4656.so [.] org.apache.fop.fo.FObj.bind(org.apache.fop.fo.PropertyList) With the patch (2d86612aacb7805f ("perf symbol: Correct address for bss symbols")) I see lots of: dso__load_sym_internal: failed to find program header for symbol: Lorg/apache/fop/fo/FObj;bind(Lorg/apache/fop/fo/PropertyList;)V st_value: 0x40 Fixes: 2d86612aacb7805f ("perf symbol: Correct address for bss symbols") Reviewed-by: Leo Yan Signed-off-by: Ian Rogers Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20220731164923.691193-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/symbol-elf.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 1cab29d45bfb..d8d79a9ec775 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1249,16 +1249,29 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, if (elf_read_program_header(syms_ss->elf, (u64)sym.st_value, &phdr)) { - pr_warning("%s: failed to find program header for " + pr_debug4("%s: failed to find program header for " "symbol: %s st_value: %#" PRIx64 "\n", __func__, elf_name, (u64)sym.st_value); - continue; + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " + "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)shdr.sh_addr, + (u64)shdr.sh_offset); + /* + * Fail to find program header, let's rollback + * to use shdr.sh_addr and shdr.sh_offset to + * calibrate symbol's file address, though this + * is not necessary for normal C ELF file, we + * still need to handle java JIT symbols in this + * case. + */ + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } else { + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " + "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, + (u64)phdr.p_offset); + sym.st_value -= phdr.p_vaddr - phdr.p_offset; } - pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", - __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, - (u64)phdr.p_offset); - sym.st_value -= phdr.p_vaddr - phdr.p_offset; } demangled = demangle_sym(dso, kmodule, elf_name); -- Gitee From 221dd2556e58b0df056f35910fa4f0b2edf958ec Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 6 Aug 2022 16:51:26 +0200 Subject: [PATCH 04/58] perf probe: Fix an error handling path in 'parse_perf_probe_command()' ANBZ: #9620 commit 4bf6dcaa93bcd083a13c278a91418fe10e6d23a0 upstream. If a memory allocation fail, we should branch to the error handling path in order to free some resources allocated a few lines above. Fixes: 15354d54698648e2 ("perf probe: Generate event name with line number") Signed-off-by: Christophe JAILLET Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: kernel-janitors@vger.kernel.org Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/b71bcb01fa0c7b9778647235c3ab490f699ba278.1659797452.git.christophe.jaillet@wanadoo.fr Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/util/probe-event.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d103084fcd56..97e2a72bd6f5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1760,8 +1760,10 @@ int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev) if (!pev->event && pev->point.function && pev->point.line && !pev->point.lazy_line && !pev->point.offset) { if (asprintf(&pev->event, "%s_L%d", pev->point.function, - pev->point.line) < 0) - return -ENOMEM; + pev->point.line) < 0) { + ret = -ENOMEM; + goto out; + } } /* Copy arguments and ensure return probe has no C argument */ -- Gitee From 2b13b357bec7e2a75f687f661efc96aeb2a81c54 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 28 Jul 2022 10:39:46 +0100 Subject: [PATCH 05/58] perf python: Fix build when PYTHON_CONFIG is user supplied ANBZ: #9620 commit bc9e7fe313d5e56d4d5f34bcc04d1165f94f86fb upstream. The previous change to Python autodetection had a small mistake where the auto value was used to determine the Python binary, rather than the user supplied value. The Python binary is only used for one part of the build process, rather than the final linking, so it was producing correct builds in most scenarios, especially when the auto detected value matched what the user wanted, or the system only had a valid set of Pythons. Change it so that the Python binary path is derived from either the PYTHON_CONFIG value or PYTHON value, depending on what is specified by the user. This was the original intention. This error was spotted in a build failure an odd cross compilation environment after commit 4c41cb46a732fe82 ("perf python: Prefer python3") was merged. Fixes: 630af16eee495f58 ("perf tools: Use Python devtools for version autodetection rather than runtime") Signed-off-by: James Clark Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20220728093946.1337642-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 18d064026c10..ee273d65fd8d 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -261,7 +261,7 @@ endif # defined. get-executable-or-default fails with an error if the first argument is supplied but # doesn't exist. override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO)) -override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO))) +override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG))) grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) -- Gitee From 00c93c70c714c4f262d7d4946513ffbfe49a0cdf Mon Sep 17 00:00:00 2001 From: Lieven Hey Date: Thu, 15 Sep 2022 11:29:10 +0200 Subject: [PATCH 06/58] perf jit: Include program header in ELF files ANBZ: #9620 commit babd04386b1df8c364cdaa39ac0e54349502e1e5 upstream. The missing header makes it hard for programs like elfutils to open these files. Fixes: 2d86612aacb7805f ("perf symbol: Correct address for bss symbols") Reviewed-by: Leo Yan Signed-off-by: Lieven Hey Tested-by: Leo Yan Cc: Leo Yan Link: https://lore.kernel.org/r/20220915092910.711036-1-lieven.hey@kdab.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/genelf.c | 14 ++++++++++++++ tools/perf/util/genelf.h | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index aed49806a09b..c76b0e4ac254 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -247,6 +247,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, Elf_Data *d; Elf_Scn *scn; Elf_Ehdr *ehdr; + Elf_Phdr *phdr; Elf_Shdr *shdr; uint64_t eh_frame_base_offset; char *strsym = NULL; @@ -281,6 +282,19 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, ehdr->e_version = EV_CURRENT; ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */ + /* + * setup program header + */ + phdr = elf_newphdr(e, 1); + phdr[0].p_type = PT_LOAD; + phdr[0].p_offset = 0; + phdr[0].p_vaddr = 0; + phdr[0].p_paddr = 0; + phdr[0].p_filesz = csize; + phdr[0].p_memsz = csize; + phdr[0].p_flags = PF_X | PF_R; + phdr[0].p_align = 8; + /* * setup text section */ diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index fdeef73cf85f..43dac6998d2b 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -53,8 +53,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #if GEN_ELF_CLASS == ELFCLASS64 #define elf_newehdr elf64_newehdr +#define elf_newphdr elf64_newphdr #define elf_getshdr elf64_getshdr #define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr #define Elf_Shdr Elf64_Shdr #define Elf_Sym Elf64_Sym #define ELF_ST_TYPE(a) ELF64_ST_TYPE(a) @@ -62,8 +64,10 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a) #else #define elf_newehdr elf32_newehdr +#define elf_newphdr elf32_newphdr #define elf_getshdr elf32_getshdr #define Elf_Ehdr Elf32_Ehdr +#define Elf_Phdr Elf32_Phdr #define Elf_Shdr Elf32_Shdr #define Elf_Sym Elf32_Sym #define ELF_ST_TYPE(a) ELF32_ST_TYPE(a) -- Gitee From 604f712a3a7d38b60f947fd00b3443147d57b29e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 14 Sep 2022 15:24:29 +0300 Subject: [PATCH 07/58] perf kcore_copy: Do not check /proc/modules is unchanged ANBZ: #9620 commit 5b427df27b94aec1312cace48a746782a0925c53 upstream. /proc/kallsyms and /proc/modules are compared before and after the copy in order to ensure no changes during the copy. However /proc/modules also might change due to reference counts changing even though that does not make any difference. Any modules loaded or unloaded should be visible in changes to kallsyms, so it is not necessary to check /proc/modules also anyway. Remove the comparison checking that /proc/modules is unchanged. Fixes: fc1b691d7651d949 ("perf buildid-cache: Add ability to add kcore to the cache") Reported-by: Daniel Dao Signed-off-by: Adrian Hunter Tested-by: Daniel Dao Acked-by: Namhyung Kim Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/r/20220914122429.8770-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/symbol-elf.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index d8d79a9ec775..3e423a920015 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -2002,8 +2002,8 @@ static int kcore_copy__compare_file(const char *from_dir, const char *to_dir, * unusual. One significant peculiarity is that the mapping (start -> pgoff) * is not the same for the kernel map and the modules map. That happens because * the data is copied adjacently whereas the original kcore has gaps. Finally, - * kallsyms and modules files are compared with their copies to check that - * modules have not been loaded or unloaded while the copies were taking place. + * kallsyms file is compared with its copy to check that modules have not been + * loaded or unloaded while the copies were taking place. * * Return: %0 on success, %-1 on failure. */ @@ -2066,9 +2066,6 @@ int kcore_copy(const char *from_dir, const char *to_dir) goto out_extract_close; } - if (kcore_copy__compare_file(from_dir, to_dir, "modules")) - goto out_extract_close; - if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms")) goto out_extract_close; -- Gitee From 6ce580224a7ca0fedcc438f6357588ae311789b6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 12 Oct 2022 11:22:58 +0300 Subject: [PATCH 08/58] perf intel-pt: Fix segfault in intel_pt_print_info() with uClibc ANBZ: #9620 commit 5a3d47071f0ced0431ef82a5fb6bd077ed9493db upstream. uClibc segfaulted because NULL was passed as the format to fprintf(). That happened because one of the format strings was missing and intel_pt_print_info() didn't check that before calling fprintf(). Add the missing format string, and check format is not NULL before calling fprintf(). Fixes: 11fa7cb86b56d361 ("perf tools: Pass Intel PT information for decoding MTC and CYC") Signed-off-by: Adrian Hunter Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221012082259.22394-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/util/intel-pt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 9454beeffae1..306d17a3f4c7 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3295,6 +3295,7 @@ static const char * const intel_pt_info_fmts[] = { [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", + [INTEL_PT_MTC_FREQ_BITS] = " MTC freq bits %#"PRIx64"\n", [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", @@ -3309,8 +3310,12 @@ static void intel_pt_print_info(__u64 *arr, int start, int finish) if (!dump_trace) return; - for (i = start; i <= finish; i++) - fprintf(stdout, intel_pt_info_fmts[i], arr[i]); + for (i = start; i <= finish; i++) { + const char *fmt = intel_pt_info_fmts[i]; + + if (fmt) + fprintf(stdout, fmt, arr[i]); + } } static void intel_pt_print_info_str(const char *name, const char *str) -- Gitee From 5f4e5c45d76946336079d4dbc0899afc9dc258fe Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 4 Oct 2022 14:12:35 -0500 Subject: [PATCH 09/58] perf: Skip and warn on unknown format 'configN' attrs ANBZ: #9620 commit e552b7be12ed62357df84392efa525ecb01910fb upstream. If the kernel exposes a new perf_event_attr field in a format attr, perf will return an error stating the specified PMU can't be found. For example, a format attr with 'config3:0-63' causes an error as config3 is unknown to perf. This causes a compatibility issue between a newer kernel with older perf tool. Before this change with a kernel adding 'config3' I get: $ perf record -e arm_spe// -- true event syntax error: 'arm_spe//' \___ Cannot find PMU `arm_spe'. Missing kernel support? Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events After this change, I get: $ perf record -e arm_spe// -- true WARNING: 'arm_spe_0' format 'inv_event_filter' requires 'perf_event_attr::config3' which is not supported by this version of perf! [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.091 MB perf.data ] To support unknown configN formats, rework the YACC implementation to pass any config[0-9]+ format to perf_pmu__new_format() to handle with a warning. Reviewed-by: Namhyung Kim Signed-off-by: Rob Herring Tested-by: Leo Yan Cc: Alexander Shishkin Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220914-arm-perf-tool-spe1-2-v2-v4-1-83c098e6212e@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Conflicts: tools/perf/util/pmu.h Signed-off-by: Jing Zhang --- tools/perf/util/parse-events.c | 3 +++ tools/perf/util/pmu.c | 17 +++++++++++++++++ tools/perf/util/pmu.h | 2 ++ tools/perf/util/pmu.l | 2 -- tools/perf/util/pmu.y | 15 ++++----------- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 93a02864ba38..91a99957feb6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -361,6 +361,9 @@ __add_event(struct list_head *list, int *idx, struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : cpu_list ? perf_cpu_map__new(cpu_list) : NULL; + if (pmu) + perf_pmu__warn_invalid_formats(pmu); + if (pmu && attr->type == PERF_TYPE_RAW) perf_pmu__warn_invalid_config(pmu, attr->config, name); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f827299f7249..8b7243b4636e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -983,6 +983,23 @@ static struct perf_pmu *pmu_lookup(const char *name) return pmu; } +void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu) +{ + struct perf_pmu_format *format; + + /* fake pmu doesn't have format list */ + if (pmu == &perf_pmu__fake) + return; + + list_for_each_entry(format, &pmu->format, list) + if (format->value >= PERF_PMU_FORMAT_VALUE_CONFIG_END) { + pr_warning("WARNING: '%s' format '%s' requires 'perf_event_attr::config%d'" + "which is not supported by this version of perf!\n", + pmu->name, format->name, format->value); + return; + } +} + static struct perf_pmu *pmu_find(const char *name) { struct perf_pmu *pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 3fba6048182b..fc7bcb340461 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -16,6 +16,7 @@ enum { PERF_PMU_FORMAT_VALUE_CONFIG, PERF_PMU_FORMAT_VALUE_CONFIG1, PERF_PMU_FORMAT_VALUE_CONFIG2, + PERF_PMU_FORMAT_VALUE_CONFIG_END, }; #define PERF_PMU_FORMAT_BITS 64 @@ -136,5 +137,6 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, const char *name); double perf_pmu__cpu_slots_per_cycle(void); int perf_pmu__match(char *pattern, char *name, char *tok); +void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l index a15d9fbd7c0e..58b4926cfaca 100644 --- a/tools/perf/util/pmu.l +++ b/tools/perf/util/pmu.l @@ -27,8 +27,6 @@ num_dec [0-9]+ {num_dec} { return value(10); } config { return PP_CONFIG; } -config1 { return PP_CONFIG1; } -config2 { return PP_CONFIG2; } - { return '-'; } : { return ':'; } , { return ','; } diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y index bfd7e8509869..283efe059819 100644 --- a/tools/perf/util/pmu.y +++ b/tools/perf/util/pmu.y @@ -20,7 +20,7 @@ do { \ %} -%token PP_CONFIG PP_CONFIG1 PP_CONFIG2 +%token PP_CONFIG %token PP_VALUE PP_ERROR %type PP_VALUE %type bit_term @@ -47,18 +47,11 @@ PP_CONFIG ':' bits $3)); } | -PP_CONFIG1 ':' bits +PP_CONFIG PP_VALUE ':' bits { ABORT_ON(perf_pmu__new_format(format, name, - PERF_PMU_FORMAT_VALUE_CONFIG1, - $3)); -} -| -PP_CONFIG2 ':' bits -{ - ABORT_ON(perf_pmu__new_format(format, name, - PERF_PMU_FORMAT_VALUE_CONFIG2, - $3)); + $2, + $4)); } bits: -- Gitee From a1d90ad406bedea1960827ac067221647b35a6f4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 26 Oct 2022 10:27:36 +0300 Subject: [PATCH 10/58] perf auxtrace: Fix address filter symbol name match for modules ANBZ: #9620 commit cba04f3136b658583adb191556f99d087589c1cc upstream. For modules, names from kallsyms__parse() contain the module name which meant that module symbols did not match exactly by name. Fix by matching the name string up to the separating tab character. Fixes: 1b36c03e356936d6 ("perf record: Add support for using symbols in address filters") Signed-off-by: Adrian Hunter Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026072736.2982-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/util/auxtrace.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 6e43f665577d..4c8f9ec0d885 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2289,11 +2289,19 @@ struct sym_args { bool near; }; +static bool kern_sym_name_match(const char *kname, const char *name) +{ + size_t n = strlen(name); + + return !strcmp(kname, name) || + (!strncmp(kname, name, n) && kname[n] == '\t'); +} + static bool kern_sym_match(struct sym_args *args, const char *name, char type) { /* A function with the same name, and global or the n'th found or any */ return kallsyms__is_function(type) && - !strcmp(name, args->name) && + kern_sym_name_match(name, args->name) && ((args->global && isupper(type)) || (args->selected && ++(args->cnt) == args->idx) || (!args->global && !args->selected)); -- Gitee From 20e51d3528453128dfd7b2c679d516dc13048aad Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Mon, 14 Nov 2022 17:57:33 +0800 Subject: [PATCH 11/58] bpf, perf: Use subprog name when reporting subprog ksymbol ANBZ: #9620 commit 47df8a2f78bc34ff170d147d05b121f84e252b85 upstream. Since commit bfea9a8574f3 ("bpf: Add name to struct bpf_ksym"), when reporting subprog ksymbol to perf, prog name instead of subprog name is used. The backtrace of bpf program with subprogs will be incorrect as shown below: ffffffffc02deace bpf_prog_e44a3057dcb151f8_overwrite+0x66 ffffffffc02de9f7 bpf_prog_e44a3057dcb151f8_overwrite+0x9f ffffffffa71d8d4e trace_call_bpf+0xce ffffffffa71c2938 perf_call_bpf_enter.isra.0+0x48 overwrite is the entry program and it invokes the overwrite_htab subprog through bpf_loop, but in above backtrace, overwrite program just jumps inside itself. Fixing it by using subprog name when reporting subprog ksymbol. After the fix, the output of perf script will be correct as shown below: ffffffffc031aad2 bpf_prog_37c0bec7d7c764a4_overwrite_htab+0x66 ffffffffc031a9e7 bpf_prog_c7eb827ef4f23e71_overwrite+0x9f ffffffffa3dd8d4e trace_call_bpf+0xce ffffffffa3dc2938 perf_call_bpf_enter.isra.0+0x48 Fixes: bfea9a8574f3 ("bpf: Add name to struct bpf_ksym") Signed-off-by: Hou Tao Signed-off-by: Daniel Borkmann Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/20221114095733.158588-1-houtao@huaweicloud.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 405856d12ed2..01b3c9b90e41 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8815,7 +8815,7 @@ static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog, PERF_RECORD_KSYMBOL_TYPE_BPF, (u64)(unsigned long)subprog->bpf_func, subprog->jited_len, unregister, - prog->aux->ksym.name); + subprog->aux->ksym.name); } } } -- Gitee From 7b1e27ca8b28e768e4a88aa9294edf1225a8e33d Mon Sep 17 00:00:00 2001 From: Chen Zhongjin Date: Fri, 11 Nov 2022 18:36:53 +0800 Subject: [PATCH 12/58] perf: Fix possible memleak in pmu_dev_alloc() ANBZ: #9620 commit e8d7a90c08ce963c592fb49845f2ccc606a2ac21 upstream. In pmu_dev_alloc(), when dev_set_name() failed, it will goto free_dev and call put_device(pmu->dev) to release it. However pmu->dev->release is assigned after this, which makes warning and memleak. Call dev_set_name() after pmu->dev->release = pmu_dev_release to fix it. Device '(null)' does not have a release() function... WARNING: CPU: 2 PID: 441 at drivers/base/core.c:2332 device_release+0x1b9/0x240 ... Call Trace: kobject_put+0x17f/0x460 put_device+0x20/0x30 pmu_dev_alloc+0x152/0x400 perf_pmu_register+0x96b/0xee0 ... kmemleak: 1 new suspected memory leaks (see /sys/kernel/debug/kmemleak) unreferenced object 0xffff888014759000 (size 2048): comm "modprobe", pid 441, jiffies 4294931444 (age 38.332s) backtrace: [<0000000005aed3b4>] kmalloc_trace+0x27/0x110 [<000000006b38f9b8>] pmu_dev_alloc+0x50/0x400 [<00000000735f17be>] perf_pmu_register+0x96b/0xee0 [<00000000e38477f1>] 0xffffffffc0ad8603 [<000000004e162216>] do_one_initcall+0xd0/0x4e0 ... Fixes: abe43400579d ("perf: Sysfs enumeration") Signed-off-by: Chen Zhongjin Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221111103653.91058-1-chenzhongjin@huawei.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 01b3c9b90e41..c35b9df41e05 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10916,13 +10916,15 @@ static int pmu_dev_alloc(struct pmu *pmu) pmu->dev->groups = pmu->attr_groups; device_initialize(pmu->dev); - ret = dev_set_name(pmu->dev, "%s", pmu->name); - if (ret) - goto free_dev; dev_set_drvdata(pmu->dev, pmu); pmu->dev->bus = &pmu_bus; pmu->dev->release = pmu_dev_release; + + ret = dev_set_name(pmu->dev, "%s", pmu->name); + if (ret) + goto free_dev; + ret = device_add(pmu->dev); if (ret) goto free_dev; -- Gitee From c9bd2b3e80a6217456cd9889021dcbee3c39f69e Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:34 +0000 Subject: [PATCH 13/58] perf trace: Return error if a system call doesn't exist ANBZ: #9620 commit d4223e1776c30b2ce8d0e6eaadcbf696e60fca3c upstream. When a system call is not detected, the reason is either because the system call ID is out of scope or failure to find the corresponding path in the sysfs, trace__read_syscall_info() returns zero. Finally, without returning an error value it introduces confusion for the caller. This patch lets the function trace__read_syscall_info() to return -EEXIST when a system call doesn't exist. Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages") Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221121075237.127706-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-trace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a463ce8fb8e0..6392fd36acb2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1774,11 +1774,11 @@ static int trace__read_syscall_info(struct trace *trace, int id) #endif sc = trace->syscalls.table + id; if (sc->nonexistent) - return 0; + return -EEXIST; if (name == NULL) { sc->nonexistent = true; - return 0; + return -EEXIST; } sc->name = name; -- Gitee From 09ab1c3a4e293d60bf7a70ea48c0e220395d5618 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:33 +0000 Subject: [PATCH 14/58] perf trace: Use macro RAW_SYSCALL_ARGS_NUM to replace number ANBZ: #9620 commit eadcab4c7a66e1df03d32da0db55d89fd9343fcc upstream. This patch defines a macro RAW_SYSCALL_ARGS_NUM to replace the open coded number '6'. Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20221121075237.127706-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 03e9a5d8eb55 ("perf trace: Handle failure when trace point folder is missed") Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-trace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6392fd36acb2..74c687bca870 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -87,6 +87,8 @@ # define F_LINUX_SPECIFIC_BASE 1024 #endif +#define RAW_SYSCALL_ARGS_NUM 6 + /* * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 */ @@ -107,7 +109,7 @@ struct syscall_fmt { const char *sys_enter, *sys_exit; } bpf_prog_name; - struct syscall_arg_fmt arg[6]; + struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM]; u8 nr_args; bool errpid; bool timeout; @@ -1216,7 +1218,7 @@ struct syscall { */ struct bpf_map_syscall_entry { bool enabled; - u16 string_args_len[6]; + u16 string_args_len[RAW_SYSCALL_ARGS_NUM]; }; /* @@ -1641,7 +1643,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) { int idx; - if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0) + if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0) nr_args = sc->fmt->nr_args; sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); @@ -1792,7 +1794,8 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = trace_event__tp_format("syscalls", tp_name); } - if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields)) + if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? + RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) return -ENOMEM; if (IS_ERR(sc->tp_format)) -- Gitee From c03f76b0ed3b6273a2cfb60c807dbe33158ce65b Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Mon, 21 Nov 2022 07:52:35 +0000 Subject: [PATCH 15/58] perf trace: Handle failure when trace point folder is missed ANBZ: #9620 commit 03e9a5d8eb552a1bf692a9c8a5ecd50f4e428006 upstream. On Arm64 a case is perf tools fails to find the corresponding trace point folder for system calls listed in the table 'syscalltbl_arm64', e.g. the generated system call table contains "lookup_dcookie" but we cannot find out the matched trace point folder for it. We need to figure out if there have any issue for the generated system call table, on the other hand, we need to handle the case when trace point folder is missed under sysfs, this patch sets the flag syscall::nonexistent as true and returns the error from trace__read_syscall_info(). Another problem is for trace__syscall_info(), it returns two different values if a system call doesn't exist: at the first time calling trace__syscall_info() it returns NULL when the system call doesn't exist, later if call trace__syscall_info() again for the same missed system call, it returns pointer of syscall. trace__syscall_info() checks the condition 'syscalls.table[id].name == NULL', but the name will be assigned in the first invoking even the system call is not found. So checking system call's name in trace__syscall_info() is not the right thing to do, this patch simply checks flag syscall::nonexistent to make decision if a system call exists or not, finally trace__syscall_info() returns the consistent result (NULL) if a system call doesn't existed. Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages") Signed-off-by: Leo Yan Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: bpf@vger.kernel.org Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221121075237.127706-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-trace.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 74c687bca870..d037bd4fd298 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1794,13 +1794,19 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->tp_format = trace_event__tp_format("syscalls", tp_name); } + /* + * Fails to read trace point format via sysfs node, so the trace point + * doesn't exist. Set the 'nonexistent' flag as true. + */ + if (IS_ERR(sc->tp_format)) { + sc->nonexistent = true; + return PTR_ERR(sc->tp_format); + } + if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) return -ENOMEM; - if (IS_ERR(sc->tp_format)) - return PTR_ERR(sc->tp_format); - sc->args = sc->tp_format->format.fields; /* * We need to check and discard the first variable '__syscall_nr' @@ -2117,11 +2123,8 @@ static struct syscall *trace__syscall_info(struct trace *trace, (err = trace__read_syscall_info(trace, id)) != 0) goto out_cant_read; - if (trace->syscalls.table[id].name == NULL) { - if (trace->syscalls.table[id].nonexistent) - return NULL; + if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) goto out_cant_read; - } return &trace->syscalls.table[id]; -- Gitee From a82963d2c82f1385c3eaa0452af96b9908a279e4 Mon Sep 17 00:00:00 2001 From: Ajay Kaher Date: Wed, 23 Nov 2022 15:48:16 +0530 Subject: [PATCH 16/58] perf symbol: correction while adjusting symbol ANBZ: #9620 commit 6f520ce17920b3cdfbd2479b3ccf27f9706219d0 upstream. perf doesn't provide proper symbol information for specially crafted .debug files. Sometimes .debug file may not have similar program header as runtime ELF file. For example if we generate .debug file using objcopy --only-keep-debug resulting file will not contain .text, .data and other runtime sections. That means corresponding program headers will have zero FileSiz and modified Offset. Example: program header of text section of libxxx.so: Type Offset VirtAddr PhysAddr FileSiz MemSiz Flags Align LOAD 0x00000000003d3000 0x00000000003d3000 0x00000000003d3000 0x000000000055ae80 0x000000000055ae80 R E 0x1000 Same program header after executing: objcopy --only-keep-debug libxxx.so libxxx.so.debug LOAD 0x0000000000001000 0x00000000003d3000 0x00000000003d3000 0x0000000000000000 0x000000000055ae80 R E 0x1000 Offset and FileSiz have been changed. Following formula will not provide correct value, if program header taken from .debug file (syms_ss): sym.st_value -= phdr.p_vaddr - phdr.p_offset; Correct program header information is located inside runtime ELF file (runtime_ss). Fixes: 2d86612aacb7805f ("perf symbol: Correct address for bss symbols") Signed-off-by: Ajay Kaher Cc: Alexander Shishkin Cc: Alexey Makhalov Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Srivatsa S. Bhat Cc: Steven Rostedt (VMware) Cc: Vasavi Sirnapalli Link: http://lore.kernel.org/lkml/1669198696-50547-1-git-send-email-akaher@vmware.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 3e423a920015..5221f272f85c 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1247,7 +1247,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, (!used_opd && syms_ss->adjust_symbols)) { GElf_Phdr phdr; - if (elf_read_program_header(syms_ss->elf, + if (elf_read_program_header(runtime_ss->elf, (u64)sym.st_value, &phdr)) { pr_debug4("%s: failed to find program header for " "symbol: %s st_value: %#" PRIx64 "\n", -- Gitee From 6a9a2e5f726f2a7c39e8a3ce41591d458abb807c Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 20 Dec 2022 11:57:00 +0800 Subject: [PATCH 17/58] perf debug: Set debug_peo_args and redirect_to_stderr variable to correct values in perf_quiet_option() ANBZ: #9620 commit 188ac720d364035008a54d249cf47b4cc100f819 upstream. When perf uses quiet mode, perf_quiet_option() sets the 'debug_peo_args' variable to -1, and display_attr() incorrectly determines the value of 'debug_peo_args'. As a result, unexpected information is displayed. Before: # perf record --quiet -- ls > /dev/null ------------------------------------------------------------ perf_event_attr: size 128 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|PERIOD read_format ID|LOST disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 ------------------------------------------------------------ ... After: # perf record --quiet -- ls > /dev/null # redirect_to_stderr is a similar problem. Fixes: f78eaef0e0493f60 ("perf tools: Allow to force redirect pr_debug to stderr.") Fixes: ccd26741f5e6bdf2 ("perf tool: Provide an option to print perf_event_open args and return value") Suggested-by: Adrian Hunter Reviewed-by: Adrian Hunter Signed-off-by: Yang Jihong Cc: Alexander Shishkin Cc: Andi Kleen Cc: Carsten Haitzler Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: martin.lau@kernel.org Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20221220035702.188413-2-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/debug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 0af163abaa62..854dd3d2d8de 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -207,6 +207,10 @@ int perf_quiet_option(void) opt++; } + /* For debug variables that are used as bool types, set to 0. */ + redirect_to_stderr = 0; + debug_peo_args = 0; + return 0; } -- Gitee From 9edc7b66020e93c99a2fa1dbb4e7154a253b3c85 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 20 Dec 2022 14:31:40 -0800 Subject: [PATCH 18/58] perf/core: Call LSM hook after copying perf_event_attr ANBZ: #9620 commit 0a041ebca4956292cadfb14a63ace3a9c1dcb0a3 upstream. It passes the attr struct to the security_perf_event_open() but it's not initialized yet. Fixes: da97e18458fb ("perf_event: Add support for LSM and SELinux checks") Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Joel Fernandes (Google) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20221220223140.4020470-1-namhyung@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c35b9df41e05..f94a70f17d9f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11903,12 +11903,12 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; - /* Do we allow access to perf_event_open(2) ? */ - err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + err = perf_copy_attr(attr_uptr, &attr); if (err) return err; - err = perf_copy_attr(attr_uptr, &attr); + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); if (err) return err; -- Gitee From 4fd1aa34dba2552616bdf044c1e9dedd5480f703 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:39 +0900 Subject: [PATCH 19/58] perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor ANBZ: #9620 commit f828929ab7f0dc3353e4a617f94f297fa8f3dec3 upstream. Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute acccessor functions, so that it can find the specified attribute from abstact origin DIE etc. Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: a9dfc46c67b5 ("perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data") Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/dwarf-aux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 4343356f3cf9..dc02685a1eec 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formudata(&attr, result) != 0) return -ENOENT; @@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formsdata(&attr, result) != 0) return -ENOENT; -- Gitee From f500617738c8283a0ae0fc1eb9a6a5ed78f0fc3d Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 5 Nov 2022 12:01:14 +0900 Subject: [PATCH 20/58] perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data ANBZ: #9620 commit a9dfc46c67b52ad43b8e335e28f4cf8002c67793 upstream. DWARF version 5 standard Sec 2.14 says that Any debugging information entry representing the declaration of an object, module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and DW_AT_decl_column attributes, each of whose value is an unsigned integer constant. So it should be an unsigned integer data. Also, even though the standard doesn't clearly say the DW_AT_call_file is signed or unsigned, the elfutils (eu-readelf) interprets it as unsigned integer data and it is natural to handle it as unsigned integer data as same as DW_AT_decl_file. This changes the DW_AT_call_file as unsigned integer data too. Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances") Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/dwarf-aux.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index dc02685a1eec..f8a10d5148f6 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, return 0; } -/* Get attribute and translate it as a sdata */ -static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, - Dwarf_Sword *result) -{ - Dwarf_Attribute attr; - - if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || - dwarf_formsdata(&attr, result) != 0) - return -ENOENT; - - return 0; -} - /** * die_is_signed_type - Check whether a type DIE is signed or not * @tp_die: a DIE of a type @@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) /* Get the call file index number in CU DIE */ static int die_get_call_fileno(Dwarf_Die *in_die) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) + if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) return (int)idx; else return -ENOENT; @@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die) /* Get the declared file index number in CU DIE */ static int die_get_decl_fileno(Dwarf_Die *pdie) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) + if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) return (int)idx; else return -ENOENT; -- Gitee From dc5ea90f879e1e9f02dd7abcd8a8f89690438091 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 13:09:00 +0400 Subject: [PATCH 21/58] perf tools: Fix resources leak in perf_data__open_dir() ANBZ: #9620 commit 0a6564ebd953c4590663c9a3c99a3ea9920ade6f upstream. In perf_data__open_dir(), opendir() opens the directory stream. Add missing closedir() to release it after use. Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function") Reviewed-by: Adrian Hunter Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index d2c930630857..b11219b68359 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -127,6 +127,7 @@ int perf_data__open_dir(struct perf_data *data) file->size = st.st_size; } + closedir(dir); if (!files) return -EINVAL; @@ -135,6 +136,7 @@ int perf_data__open_dir(struct perf_data *data) return 0; out_err: + closedir(dir); close_dir(files, nr); return ret; } -- Gitee From d8ef8f4c4c651b42069b1414be772c94d0a280d1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 10 Jan 2023 20:56:59 +0200 Subject: [PATCH 22/58] perf auxtrace: Fix address filter duplicate symbol selection ANBZ: #9620 commit cf129830ee820f7fc90b98df193cd49d49344d09 upstream. When a match has been made to the nth duplicate symbol, return success not error. Example: Before: $ cat file.c cat: file.c: No such file or directory $ cat file1.c #include static void func(void) { printf("First func\n"); } void other(void); int main() { func(); other(); return 0; } $ cat file2.c #include static void func(void) { printf("Second func\n"); } void other(void) { func(); } $ gcc -Wall -Wextra -o test file1.c file2.c $ perf record -e intel_pt//u --filter 'filter func @ ./test' -- ./test Multiple symbols with name 'func' #1 0x1149 l func which is near main #2 0x1179 l func which is near other Disambiguate symbol name by inserting #n after the name e.g. func #2 Or select a global symbol by inserting #0 or #g or #G Failed to parse address filter: 'filter func @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test Failed to parse address filter: 'filter func #2 @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. After: $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test First func Second func [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data ] $ perf script --itrace=b -Ftime,flags,ip,sym,addr --ns 1231062.526977619: tr strt 0 [unknown] => 558495708179 func 1231062.526977619: tr end call 558495708188 func => 558495708050 _init 1231062.526979286: tr strt 0 [unknown] => 55849570818d func 1231062.526979286: tr end return 55849570818f func => 55849570819d other Fixes: 1b36c03e356936d6 ("perf record: Add support for using symbols in address filters") Reported-by: Dmitrii Dolgov <9erthalion6@gmail.com> Signed-off-by: Adrian Hunter Tested-by: Dmitry Dolgov <9erthalion6@gmail.com> Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230110185659.15979-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/util/auxtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 4c8f9ec0d885..5d68eed5f05f 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2574,7 +2574,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *size = sym->start - *start; if (idx > 0) { if (*size) - return 1; + return 0; } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { print_duplicate_syms(dso, sym_name); return -EINVAL; -- Gitee From 7d5cd6af7a6ef68b060256efe8e33a5c167b3971 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 14 Mar 2023 04:47:35 +0000 Subject: [PATCH 23/58] perf/core: Fix perf_output_begin parameter is incorrectly invoked in perf_event_bpf_output ANBZ: #9620 commit eb81a2ed4f52be831c9fb879752d89645a312c13 upstream. syzkaller reportes a KASAN issue with stack-out-of-bounds. The call trace is as follows: dump_stack+0x9c/0xd3 print_address_description.constprop.0+0x19/0x170 __kasan_report.cold+0x6c/0x84 kasan_report+0x3a/0x50 __perf_event_header__init_id+0x34/0x290 perf_event_header__init_id+0x48/0x60 perf_output_begin+0x4a4/0x560 perf_event_bpf_output+0x161/0x1e0 perf_iterate_sb_cpu+0x29e/0x340 perf_iterate_sb+0x4c/0xc0 perf_event_bpf_event+0x194/0x2c0 __bpf_prog_put.constprop.0+0x55/0xf0 __cls_bpf_delete_prog+0xea/0x120 [cls_bpf] cls_bpf_delete_prog_work+0x1c/0x30 [cls_bpf] process_one_work+0x3c2/0x730 worker_thread+0x93/0x650 kthread+0x1b8/0x210 ret_from_fork+0x1f/0x30 commit 267fb27352b6 ("perf: Reduce stack usage of perf_output_begin()") use on-stack struct perf_sample_data of the caller function. However, perf_event_bpf_output uses incorrect parameter to convert small-sized data (struct perf_bpf_event) into large-sized data (struct perf_sample_data), which causes memory overwriting occurs in __perf_event_header__init_id. Fixes: 267fb27352b6 ("perf: Reduce stack usage of perf_output_begin()") Signed-off-by: Yang Jihong Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230314044735.56551-1-yangjihong1@huawei.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index f94a70f17d9f..950571080819 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8785,7 +8785,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data) perf_event_header__init_id(&bpf_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, data, event, + ret = perf_output_begin(&handle, &sample, event, bpf_event->event_id.header.size); if (ret) return; -- Gitee From 673f48d09bfe24efb3273a3881ea1ecded2a6b8a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 13 Mar 2023 10:16:08 -0700 Subject: [PATCH 24/58] perf: fix perf_event_context->time ANBZ: #9620 commit baf1b12a67f5b24f395baca03e442ce27cab0c18 upstream. Time readers rely on perf_event_context->[time|timestamp|timeoffset] to get accurate time_enabled and time_running for an event. The difference between ctx->timestamp and ctx->time is the among of time when the context is not enabled. __update_context_time(ctx, false) is used to increase timestamp, but not time. Therefore, it should only be called in ctx_sched_in() when EVENT_TIME was not enabled. Fixes: 09f5e7dc7ad7 ("perf: Fix perf_event_read_local() time") Signed-off-by: Song Liu Signed-off-by: Peter Zijlstra (Intel) Acked-by: Namhyung Kim Link: https://lkml.kernel.org/r/20230313171608.298734-1-song@kernel.org Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 950571080819..bf6cfec1356b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3759,7 +3759,7 @@ ctx_sched_in(struct perf_event_context *ctx, if (likely(!ctx->nr_events)) return; - if (is_active ^ EVENT_TIME) { + if (!(is_active & EVENT_TIME)) { /* start ctx time */ __update_context_time(ctx, false); perf_cgroup_set_timestamp(cpuctx); -- Gitee From d00b34de1b665f5863363405793404923a8f16ae Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 22 Mar 2023 13:24:49 -0700 Subject: [PATCH 25/58] perf/core: Fix the same task check in perf_event_set_output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #9620 commit 24d3ae2f37d8bc3c14b31d353c5d27baf582b6a6 upstream. The same task check in perf_event_set_output has some potential issues for some usages. For the current perf code, there is a problem if using of perf_event_open() to have multiple samples getting into the same mmap’d memory when they are both attached to the same process. https://lore.kernel.org/all/92645262-D319-4068-9C44-2409EF44888E@gmail.com/ Because the event->ctx is not ready when the perf_event_set_output() is invoked in the perf_event_open(). Besides the above issue, before the commit bd2756811766 ("perf: Rewrite core context handling"), perf record can errors out when sampling with a hardware event and a software event as below. $ perf record -e cycles,dummy --per-thread ls failed to mmap with 22 (Invalid argument) That's because that prior to the commit a hardware event and a software event are from different task context. The problem should be a long time issue since commit c3f00c70276d ("perk: Separate find_get_context() from event initialization"). The task struct is stored in the event->hw.target for each per-thread event. It is a more reliable way to determine whether two events are attached to the same task. The event->hw.target was also introduced several years ago by the commit 50f16a8bf9d7 ("perf: Remove type specific target pointers"). It can not only be used to fix the issue with the current code, but also back port to fix the issues with an older kernel. Note: The event->hw.target was introduced later than commit c3f00c70276d. The patch may cannot be applied between the commit c3f00c70276d and commit 50f16a8bf9d7. Anybody that wants to back-port this at that period may have to find other solutions. Fixes: c3f00c70276d ("perf: Separate find_get_context() from event initialization") Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Zhengjun Xing Link: https://lkml.kernel.org/r/20230322202449.512091-1-kan.liang@linux.intel.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index bf6cfec1356b..aafa3569b34b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11744,7 +11744,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event) /* * If its not a per-cpu rb, it must be the same task. */ - if (output_event->cpu == -1 && output_event->ctx != event->ctx) + if (output_event->cpu == -1 && output_event->hw.target != event->hw.target) goto out; /* -- Gitee From e96e6eff2b06682a208cfb1faec21b5e4db7a3e1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Jul 2021 13:06:38 -0300 Subject: [PATCH 26/58] perf sched: Cast PTHREAD_STACK_MIN to int as it may turn into sysconf(__SC_THREAD_STACK_MIN_VALUE) ANBZ: #9620 commit d08c84e01afa7a7eee6badab25d5420fa847f783 upstream. In fedora rawhide the PTHREAD_STACK_MIN define may end up expanded to a sysconf() call, and that will return 'long int', breaking the build: 45 fedora:rawhide : FAIL gcc version 11.1.1 20210623 (Red Hat 11.1.1-6) (GCC) builtin-sched.c: In function 'create_tasks': /git/perf-5.14.0-rc1/tools/include/linux/kernel.h:43:24: error: comparison of distinct pointer types lacks a cast [-Werror] 43 | (void) (&_max1 == &_max2); \ | ^~ builtin-sched.c:673:34: note: in expansion of macro 'max' 673 | (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); | ^~~ cc1: all warnings being treated as errors $ grep __sysconf /usr/include/*/*.h /usr/include/bits/pthread_stack_min-dynamic.h:extern long int __sysconf (int __name) __THROW; /usr/include/bits/pthread_stack_min-dynamic.h:# define PTHREAD_STACK_MIN __sysconf (__SC_THREAD_STACK_MIN_VALUE) /usr/include/bits/time.h:extern long int __sysconf (int); /usr/include/bits/time.h:# define CLK_TCK ((__clock_t) __sysconf (2)) /* 2 is _SC_CLK_TCK */ $ So cast it to int to cope with that. Signed-off-by: Arnaldo Carvalho de Melo Cc: Guenter Roeck Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/builtin-sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 83b6ca10cc8f..0a0aa3cf1dad 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -670,7 +670,7 @@ static void create_tasks(struct perf_sched *sched) err = pthread_attr_init(&attr); BUG_ON(err); err = pthread_attr_setstacksize(&attr, - (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); + (size_t) max(16 * 1024, (int)PTHREAD_STACK_MIN)); BUG_ON(err); err = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(err); -- Gitee From fe7a201b5d892d59781389d3bbf195b716b86ca4 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Mon, 27 Feb 2023 10:35:08 +0800 Subject: [PATCH 27/58] perf/core: Fix hardlockup failure caused by perf throttle ANBZ: #9620 commit 15def34e2635ab7e0e96f1bc32e1b69609f14942 upstream. commit e050e3f0a71bf ("perf: Fix broken interrupt rate throttling") introduces a change in throttling threshold judgment. Before this, compare hwc->interrupts and max_samples_per_tick, then increase hwc->interrupts by 1, but this commit reverses order of these two behaviors, causing the semantics of max_samples_per_tick to change. In literal sense of "max_samples_per_tick", if hwc->interrupts == max_samples_per_tick, it should not be throttled, therefore, the judgment condition should be changed to "hwc->interrupts > max_samples_per_tick". In fact, this may cause the hardlockup to fail, The minimum value of max_samples_per_tick may be 1, in this case, the return value of __perf_event_account_interrupt function is 1. As a result, nmi_watchdog gets throttled, which would stop PMU (Use x86 architecture as an example, see x86_pmu_handle_irq). Fixes: e050e3f0a71b ("perf: Fix broken interrupt rate throttling") Signed-off-by: Yang Jihong Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230227023508.102230-1-yangjihong1@huawei.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index aafa3569b34b..376b9e6a6ee3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9031,8 +9031,8 @@ __perf_event_account_interrupt(struct perf_event *event, int throttle) hwc->interrupts = 1; } else { hwc->interrupts++; - if (unlikely(throttle - && hwc->interrupts >= max_samples_per_tick)) { + if (unlikely(throttle && + hwc->interrupts > max_samples_per_tick)) { __this_cpu_inc(perf_throttled_count); tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS); hwc->interrupts = MAX_INTERRUPTS; -- Gitee From 029c94f58cbb60ba61e05f045a1af2e707f19baa Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 3 Apr 2023 18:48:30 +0300 Subject: [PATCH 28/58] perf auxtrace: Fix address filter entire kernel size ANBZ: #9620 commit 1f9f33ccf0320be21703d9195dd2b36a1c9a07cb upstream. kallsyms is not completely in address order. In find_entire_kern_cb(), calculate the kernel end from the maximum address not the last symbol. Example: Before: $ sudo cat /proc/kallsyms | grep ' [twTw] ' | tail -1 ffffffffc00b8bd0 t bpf_prog_6deef7357e7b4530 [bpf] $ sudo cat /proc/kallsyms | grep ' [twTw] ' | sort | tail -1 ffffffffc15e0cc0 t iwl_mvm_exit [iwlmvm] $ perf.d093603a05aa record -v --kcore -e intel_pt// --filter 'filter *' -- uname |& grep filter Address filter: filter 0xffffffff93200000/0x2ceba000 After: $ perf.8fb0f7a01f8e record -v --kcore -e intel_pt// --filter 'filter *' -- uname |& grep filter Address filter: filter 0xffffffff93200000/0x2e3e2000 Fixes: 1b36c03e356936d6 ("perf record: Add support for using symbols in address filters") Signed-off-by: Adrian Hunter Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230403154831.8651-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/util/auxtrace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5d68eed5f05f..fb136eeea447 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2404,6 +2404,7 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused, char type, u64 start) { struct sym_args *args = arg; + u64 size; if (!kallsyms__is_function(type)) return 0; @@ -2413,7 +2414,9 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused, args->start = start; } /* Don't know exactly where the kernel ends, so we add a page */ - args->size = round_up(start, page_size) + page_size - args->start; + size = round_up(start, page_size) + page_size - args->start; + if (size > args->size) + args->size = size; return 0; } -- Gitee From 006065655249fc9dfeab304ad8141b6837db5d74 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 3 Apr 2023 18:48:31 +0300 Subject: [PATCH 29/58] perf intel-pt: Fix CYC timestamps after standalone CBR ANBZ: #9620 commit 430635a0ef1ce958b7b4311f172694ece2c692b8 upstream. After a standalone CBR (not associated with TSC), update the cycles reference timestamp and reset the cycle count, so that CYC timestamps are calculated relative to that point with the new frequency. Fixes: cc33618619cefc6d ("perf tools: Add Intel PT support for decoding CYC packets") Signed-off-by: Adrian Hunter Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230403154831.8651-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index e4c485f92c02..48fda1a19ab5 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1639,6 +1639,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) decoder->cbr = cbr; decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; + decoder->cyc_ref_timestamp = decoder->timestamp; + decoder->cycle_cnt = 0; intel_pt_mtc_cyc_cnt_cbr(decoder); } -- Gitee From d36d1c7dd6a8ad6d9d983d176b74fda8d88573cc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Apr 2023 10:23:35 -0300 Subject: [PATCH 30/58] perf pmu: zfree() expects a pointer to a pointer to zero it after freeing its contents ANBZ: #9620 commit 57f14b5ae1a97537f2abd2828ee7212cada7036e upstream. An audit showed just this one problem with zfree(), fix it. Fixes: 9fbc61f832ebf432 ("perf pmu: Add support for PMU capabilities") Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8b7243b4636e..530055f15554 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1791,7 +1791,7 @@ static int perf_pmu__new_caps(struct list_head *list, char *name, char *value) return 0; free_name: - zfree(caps->name); + zfree(&caps->name); free_caps: free(caps); -- Gitee From f84f9de1aa6363f2a5f23e80553d7a779f83033b Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 13 Apr 2023 14:46:39 +0200 Subject: [PATCH 31/58] perf map: Delete two variable initialisations before null pointer checks in sort__sym_from_cmp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #9620 commit c160118a90d4acf335993d8d59b02ae2147a524e upstream. Addresses of two data structure members were determined before corresponding null pointer checks in the implementation of the function “sort__sym_from_cmp”. Thus avoid the risk for undefined behaviour by removing extra initialisations for the local variables “from_l” and “from_r” (also because they were already reassigned with the same value behind this pointer check). This issue was detected by using the Coccinelle software. Fixes: 1b9e97a2a95e4941 ("perf tools: Fix report -F symbol_from for data without branch info") Signed-off-by: Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: German Gomez Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/cocci/54a21fea-64e3-de67-82ef-d61b90ffad05@web.de/ Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/sort.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6eca56ff5684..d323e4b8b77e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -873,8 +873,7 @@ static int hist_entry__dso_to_filter(struct hist_entry *he, int type, static int64_t sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) { - struct addr_map_symbol *from_l = &left->branch_info->from; - struct addr_map_symbol *from_r = &right->branch_info->from; + struct addr_map_symbol *from_l, *from_r; if (!left->branch_info || !right->branch_info) return cmp_null(left->branch_info, right->branch_info); -- Gitee From 200fc3589a01177e76e7bbf858c47fb9ac928a7a Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Thu, 27 Apr 2023 01:28:41 +0000 Subject: [PATCH 32/58] perf symbols: Fix return incorrect build_id size in elf_read_build_id() ANBZ: #9620 commit 1511e4696acb715a4fe48be89e1e691daec91c0e upstream. In elf_read_build_id(), if gnu build_id is found, should return the size of the actually copied data. If descsz is greater thanBuild_ID_SIZE, write_buildid data access may occur. Fixes: be96ea8ffa788dcc ("perf symbols: Fix issue with binaries using 16-bytes buildids (v2)") Reported-by: Will Ochowicz Signed-off-by: Yang Jihong Tested-by: Will Ochowicz Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/lkml/CWLP265MB49702F7BA3D6D8F13E4B1A719C649@CWLP265MB4970.GBRP265.PROD.OUTLOOK.COM/T/ Link: https://lore.kernel.org/r/20230427012841.231729-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/symbol-elf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 5221f272f85c..b171d134ce87 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -548,7 +548,7 @@ static int elf_read_build_id(Elf *elf, void *bf, size_t size) size_t sz = min(size, descsz); memcpy(bf, ptr, sz); memset(bf + sz, 0, size - sz); - err = descsz; + err = sz; break; } } -- Gitee From e31b3ad5a52f6f144be5cb136feb6c2b9584e7d8 Mon Sep 17 00:00:00 2001 From: Sohaib Mohamed Date: Fri, 19 Nov 2021 08:14:08 +0200 Subject: [PATCH 33/58] perf bench: Use unbuffered output when pipe/tee'ing to a file ANBZ: #9620 commit f0a29c9647ff8bbb424641f79bc1894e83dec218 upstream. The output of 'perf bench' gets buffered when I pipe it to a file or to tee, in such a way that I can see it only at the end. E.g. $ perf bench internals synthesize -t < output comes out fine after each test run > $ perf bench internals synthesize -t | tee file.txt < output comes out only at the end of all tests > This patch resolves this issue for 'bench' and 'test' subcommands. See, also: $ perf bench mem all | tee file.txt $ perf bench sched all | tee file.txt $ perf bench internals all -t | tee file.txt $ perf bench internals all | tee file.txt Committer testing: It really gets staggered, i.e. outputs in bursts, when the buffer fills up and has to be drained to make up space for more output. Suggested-by: Riccardo Mancini Signed-off-by: Sohaib Mohamed Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Fabian Hemmer Cc: Ian Rogers Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211119061409.78004-1-sohaib.amhmd@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 16203e9cd018 ("perf bench: Add missing setlocale() call to allow usage of %'d style formatting") Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-bench.c | 5 +++-- tools/perf/tests/builtin-test.c | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 62a7b7420a44..609a941ae296 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -225,7 +225,6 @@ static void run_collection(struct collection *coll) if (!bench->fn) break; printf("# Running %s/%s benchmark...\n", coll->name, bench->name); - fflush(stdout); argv[1] = bench->name; run_bench(coll->name, bench->name, bench->fn, 1, argv); @@ -246,6 +245,9 @@ int cmd_bench(int argc, const char **argv) struct collection *coll; int ret = 0; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + if (argc < 2) { /* No collection specified. */ print_usage(); @@ -299,7 +301,6 @@ int cmd_bench(int argc, const char **argv) if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name); - fflush(stdout); ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1); goto end; } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 132bdb3e6c31..73c911dd0c2c 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -793,6 +793,9 @@ int cmd_test(int argc, const char **argv) if (ret < 0) return ret; + /* Unbuffered output */ + setvbuf(stdout, NULL, _IONBF, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc - 1, argv + 1); -- Gitee From 5336b88c380c1b436f41c8a4de6c84c2ba94ab0f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Jun 2023 15:38:25 -0300 Subject: [PATCH 34/58] perf bench: Add missing setlocale() call to allow usage of %'d style formatting ANBZ: #9620 commit 16203e9cd01896b4244100a8e3fb9f6e612ab2b1 upstream. Without this we were not getting the thousands separator for big numbers. Noticed while developing 'perf bench uprobe', but the use of %' predates that, for instance 'perf bench syscall' uses it. Before: # perf bench uprobe all # Running uprobe/baseline benchmark... # Executed 1000 usleep(1000) calls Total time: 1054082243ns 1054082.243000 nsecs/op # After: # perf bench uprobe all # Running uprobe/baseline benchmark... # Executed 1,000 usleep(1000) calls Total time: 1,053,715,144ns 1,053,715.144000 nsecs/op # Fixes: c2a08203052f8975 ("perf bench: Add basic syscall benchmark") Cc: Adrian Hunter Cc: Andre Fredette Cc: Clark Williams Cc: Dave Tucker Cc: Davidlohr Bueso Cc: Derek Barbosa Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tiezhu Yang Link: https://lore.kernel.org/lkml/ZH3lcepZ4tBYr1jv@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-bench.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 609a941ae296..fb3029495c23 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -21,6 +21,7 @@ #include "builtin.h" #include "bench/bench.h" +#include #include #include #include @@ -247,6 +248,7 @@ int cmd_bench(int argc, const char **argv) /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); + setlocale(LC_ALL, ""); if (argc < 2) { /* No collection specified. */ -- Gitee From 51baf83283997769c54f316bb378290676db4632 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Mar 2021 08:59:21 -0300 Subject: [PATCH 35/58] perf script: Fixup 'struct evsel_script' method prefix ANBZ: #9620 commit 297e69bfa4c7aa27259dd456af1377e868337043 upstream. They all operate on 'struct evsel_script' instances, so should be prefixed with evsel_script__, not with perf_evsel_script__. Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: 36d3e4138e1b ("perf script: Fix allocation of evsel->priv related to per-event dump files") Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-script.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8d55de9f1d3c..ead235cdf947 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -295,8 +295,7 @@ static inline struct evsel_script *evsel_script(struct evsel *evsel) return (struct evsel_script *)evsel->priv; } -static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, - struct perf_data *data) +static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data) { struct evsel_script *es = zalloc(sizeof(*es)); @@ -316,7 +315,7 @@ static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, return NULL; } -static void perf_evsel_script__delete(struct evsel_script *es) +static void evsel_script__delete(struct evsel_script *es) { zfree(&es->filename); fclose(es->fp); @@ -324,7 +323,7 @@ static void perf_evsel_script__delete(struct evsel_script *es) free(es); } -static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp) +static int evsel_script__fprintf(struct evsel_script *es, FILE *fp) { struct stat st; @@ -2167,8 +2166,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (!evsel->priv) { if (scr->per_event_dump) { - evsel->priv = perf_evsel_script__new(evsel, - scr->session->data); + evsel->priv = evsel_script__new(evsel, scr->session->data); } else { es = zalloc(sizeof(*es)); if (!es) @@ -2423,7 +2421,7 @@ static void perf_script__fclose_per_event_dump(struct perf_script *script) evlist__for_each_entry(evlist, evsel) { if (!evsel->priv) break; - perf_evsel_script__delete(evsel->priv); + evsel_script__delete(evsel->priv); evsel->priv = NULL; } } @@ -2443,7 +2441,7 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) if (evsel->priv != NULL) continue; - evsel->priv = perf_evsel_script__new(evsel, script->session->data); + evsel->priv = evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; } @@ -2478,8 +2476,8 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script) evlist__for_each_entry(script->session->evlist, evsel) { struct evsel_script *es = evsel->priv; - perf_evsel_script__fprintf(es, stdout); - perf_evsel_script__delete(es); + evsel_script__fprintf(es, stdout); + evsel_script__delete(es); evsel->priv = NULL; } } -- Gitee From 186d67a51792a667794fc0c2c3b9862c6a6d7b45 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 6 Jun 2023 16:11:10 -0300 Subject: [PATCH 36/58] perf script: Fix allocation of evsel->priv related to per-event dump files ANBZ: #9620 commit 36d3e4138e1b6cc9ab179f3f397b5548f8b1eaae upstream. When printing output we may want to generate per event files, where the --per-event-dump option should be used, creating perf.data.EVENT.dump files instead of printing to stdout. The callback thar processes event thus expects that evsel->priv->fp should point to either the per-event FILE descriptor or to stdout. The a3af66f51bd0bca7 ("perf script: Fix crash because of missing evsel->priv") changeset fixed a case where evsel->priv wasn't setup, thus set to NULL, causing a segfault when trying to access evsel->priv->fp. But it did it for the non --per-event-dump case by allocating a 'struct perf_evsel_script' just to set its ->fp to stdout. Since evsel->priv is only freed when --per-event-dump is used, we ended up with a memory leak, detected using ASAN. Fix it by using the same method as perf_script__setup_per_event_dump(), and reuse that static 'struct perf_evsel_script'. Also check if evsel_script__new() failed. Fixes: a3af66f51bd0bca7 ("perf script: Fix crash because of missing evsel->priv") Reported-by: Ian Rogers Tested-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Ravi Bangoria Link: https://lore.kernel.org/lkml/ZH+F0wGAWV14zvMP@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-script.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ead235cdf947..ec22f7c50712 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2147,6 +2147,9 @@ static int process_sample_event(struct perf_tool *tool, return 0; } +// Used when scr->per_event_dump is not set +static struct evsel_script es_stdout; + static int process_attr(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist) { @@ -2155,7 +2158,6 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, struct evsel *evsel, *pos; u64 sample_type; int err; - static struct evsel_script *es; err = perf_event__process_attr(tool, event, pevlist); if (err) @@ -2167,12 +2169,11 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (!evsel->priv) { if (scr->per_event_dump) { evsel->priv = evsel_script__new(evsel, scr->session->data); - } else { - es = zalloc(sizeof(*es)); - if (!es) + if (!evsel->priv) return -ENOMEM; - es->fp = stdout; - evsel->priv = es; + } else { // Replicate what is done in perf_script__setup_per_event_dump() + es_stdout.fp = stdout; + evsel->priv = &es_stdout; } } @@ -2456,7 +2457,6 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) static int perf_script__setup_per_event_dump(struct perf_script *script) { struct evsel *evsel; - static struct evsel_script es_stdout; if (script->per_event_dump) return perf_script__fopen_per_event_dump(script); -- Gitee From 230d103d2fea687ac37a69a15033569380822af4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Jun 2023 16:41:01 -0700 Subject: [PATCH 37/58] perf dwarf-aux: Fix off-by-one in die_get_varname() ANBZ: #9620 commit 3abfcfd847717d232e36963f31a361747c388fe7 upstream. The die_get_varname() returns "(unknown_type)" string if it failed to find a type for the variable. But it had a space before the opening parenthesis and it made the closing parenthesis cut off due to the off-by-one in the string length (14). Signed-off-by: Namhyung Kim Fixes: 88fd633cdfa19060 ("perf probe: No need to use formatting strbuf method") Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230612234102.3909116-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/dwarf-aux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index f8a10d5148f6..443374a77c8d 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1081,7 +1081,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - ret = strbuf_add(buf, " (unknown_type)", 14); + ret = strbuf_add(buf, "(unknown_type)", 14); } return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); -- Gitee From 605d6268dbddfe9780a1646a30455e81b610ac35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Georg=20M=C3=BCller?= Date: Wed, 28 Jun 2023 10:45:50 +0200 Subject: [PATCH 38/58] perf probe: Add test for regression introduced by switch to die_get_decl_file() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #9620 commit 56cbeacf143530576905623ac72ae0964f3293a6 upstream. This patch adds a test to validate that 'perf probe' works for binaries where DWARF info is split into multiple CUs Signed-off-by: Georg Müller Acked-by: Masami Hiramatsu (Google) Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: regressions@lists.linux.dev Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230628084551.1860532-5-georgmueller@gmx.net Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- .../shell/test_uprobe_from_different_cu.sh | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tools/perf/tests/shell/test_uprobe_from_different_cu.sh diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh new file mode 100644 index 000000000000..00d2e0e2e0c2 --- /dev/null +++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# test perf probe of function from different CU +# SPDX-License-Identifier: GPL-2.0 + +set -e + +temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX) + +cleanup() +{ + trap - EXIT TERM INT + if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then + echo "--- Cleaning up ---" + perf probe -x ${temp_dir}/testfile -d foo + rm -f "${temp_dir}/"* + rmdir "${temp_dir}" + fi +} + +trap_cleanup() +{ + cleanup + exit 1 +} + +trap trap_cleanup EXIT TERM INT + +cat > ${temp_dir}/testfile-foo.h << EOF +struct t +{ + int *p; + int c; +}; + +extern int foo (int i, struct t *t); +EOF + +cat > ${temp_dir}/testfile-foo.c << EOF +#include "testfile-foo.h" + +int +foo (int i, struct t *t) +{ + int j, res = 0; + for (j = 0; j < i && j < t->c; j++) + res += t->p[j]; + + return res; +} +EOF + +cat > ${temp_dir}/testfile-main.c << EOF +#include "testfile-foo.h" + +static struct t g; + +int +main (int argc, char **argv) +{ + int i; + int j[argc]; + g.c = argc; + g.p = j; + for (i = 0; i < argc; i++) + j[i] = (int) argv[i][0]; + return foo (3, &g); +} +EOF + +gcc -g -Og -flto -c ${temp_dir}/testfile-foo.c -o ${temp_dir}/testfile-foo.o +gcc -g -Og -c ${temp_dir}/testfile-main.c -o ${temp_dir}/testfile-main.o +gcc -g -Og -o ${temp_dir}/testfile ${temp_dir}/testfile-foo.o ${temp_dir}/testfile-main.o + +perf probe -x ${temp_dir}/testfile --funcs foo +perf probe -x ${temp_dir}/testfile foo + +cleanup -- Gitee From d381f7bc33412fb913d528096a7f4ae4a3d55801 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Nov 2022 22:40:17 +0100 Subject: [PATCH 39/58] perf: Fix function pointer case ANBZ: #9620 commit 1af6239d1d3e61d33fd2f0ba53d3d1a67cc50574 upstream. With the advent of CFI it is no longer acceptible to cast function pointers. The robot complains thusly: kernel-events-core.c:warning:cast-from-int-(-)(struct-perf_cpu_pmu_context-)-to-remote_function_f-(aka-int-(-)(void-)-)-converts-to-incompatible-function-type Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Cixi Geng Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- kernel/events/core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 376b9e6a6ee3..233256e628a9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1149,6 +1149,11 @@ static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) return 0; } +static int perf_mux_hrtimer_restart_ipi(void *arg) +{ + return perf_mux_hrtimer_restart(arg); +} + void perf_pmu_disable(struct pmu *pmu) { int *count = this_cpu_ptr(pmu->pmu_disable_count); @@ -10878,8 +10883,7 @@ perf_event_mux_interval_ms_store(struct device *dev, cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer); - cpu_function_call(cpu, - (remote_function_f)perf_mux_hrtimer_restart, cpuctx); + cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpuctx); } cpus_read_unlock(); mutex_unlock(&mux_interval_mutex); -- Gitee From 1143cdd3bff83b335bf7e003ebcfc8a71ddb8688 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 2 Aug 2023 18:22:14 -0300 Subject: [PATCH 40/58] perf annotate bpf: Don't enclose non-debug code with an assert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #9620 commit 979e9c9fc9c2a761303585e07fe2699bdd88182f upstream. In 616b14b47a86d880 ("perf build: Conditionally define NDEBUG") we started using NDEBUG=1 when DEBUG=1 isn't present, so code that is enclosed with assert() is not called. In dd317df072071903 ("perf build: Make binutil libraries opt in") we stopped linking against binutils-devel, for licensing reasons. Recently people asked me why annotation of BPF programs wasn't working, i.e. this: $ perf annotate bpf_prog_5280546344e3f45c_kfree_skb was returning: case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); This was on a fedora rpm, so its new enough that I had to try to test by rebuilding using BUILD_NONDISTRO=1, only to get it segfaulting on me. This combination made this libopcode function not to be called: assert(bfd_check_format(bfdf, bfd_object)); Changing it to: if (!bfd_check_format(bfdf, bfd_object)) abort(); Made it work, looking at this "check" function made me realize it changes the 'bfdf' internal state, i.e. we better call it. So stop using assert() on it, just call it and abort if it fails. Probably it is better to propagate the error, etc, but it seems it is unlikely to fail from the usage done so far and we really need to stop using libopcodes, so do the quick fix above and move on. With it we have BPF annotation back working when built with BUILD_NONDISTRO=1: ⬢[acme@toolbox perf-tools-next]$ perf annotate --stdio2 bpf_prog_5280546344e3f45c_kfree_skb | head No kallsyms or vmlinux with build-id 939bc71a1a51cdc434e60af93c7e734f7d5c0e7e was found Samples: 12 of event 'cpu-clock:ppp', 4000 Hz, Event count (approx.): 3000000, [percent: local period] bpf_prog_5280546344e3f45c_kfree_skb() bpf_prog_5280546344e3f45c_kfree_skb Percent int kfree_skb(struct trace_event_raw_kfree_skb *args) { nop 33.33 xchg %ax,%ax push %rbp mov %rsp,%rbp sub $0x180,%rsp push %rbx push %r13 ⬢[acme@toolbox perf-tools-next]$ Fixes: 6987561c9e86eace ("perf annotate: Enable annotation of BPF programs") Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Mohamed Mahmoud Cc: Namhyung Kim Cc: Dave Tucker Cc: Derek Barbosa Cc: Song Liu Link: https://lore.kernel.org/lkml/ZMrMzoQBe0yqMek1@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/annotate.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b9edf33e4c69..1fe618a68691 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1730,8 +1730,11 @@ static int symbol__disassemble_bpf(struct symbol *sym, perf_exe(tpath, sizeof(tpath)); bfdf = bfd_openr(tpath, NULL); - assert(bfdf); - assert(bfd_check_format(bfdf, bfd_object)); + if (bfdf == NULL) + abort(); + + if (!bfd_check_format(bfdf, bfd_object)) + abort(); s = open_memstream(&buf, &buf_size); if (!s) { @@ -1779,7 +1782,8 @@ static int symbol__disassemble_bpf(struct symbol *sym, #else disassemble = disassembler(bfdf); #endif - assert(disassemble); + if (disassemble == NULL) + abort(); fflush(s); do { -- Gitee From 5ead8fe8bd2f14b56be62ea2aec229bdb524ad99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 17 Aug 2023 09:11:21 -0300 Subject: [PATCH 41/58] perf top: Don't pass an ERR_PTR() directly to perf_session__delete() ANBZ: #9620 commit ef23cb593304bde0cc046fd4cc83ae7ea2e24f16 upstream. While debugging a segfault on 'perf lock contention' without an available perf.data file I noticed that it was basically calling: perf_session__delete(ERR_PTR(-1)) Resulting in: (gdb) run lock contention Starting program: /root/bin/perf lock contention [Thread debugging using libthread_db enabled] Using host libthread_db library "/lib64/libthread_db.so.1". failed to open perf.data: No such file or directory (try 'perf record' first) Initializing perf session failed Program received signal SIGSEGV, Segmentation fault. 0x00000000005e7515 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2858 2858 if (!session->auxtrace) (gdb) p session $1 = (struct perf_session *) 0xffffffffffffffff (gdb) bt #0 0x00000000005e7515 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2858 #1 0x000000000057bb4d in perf_session__delete (session=0xffffffffffffffff) at util/session.c:300 #2 0x000000000047c421 in __cmd_contention (argc=0, argv=0x7fffffffe200) at builtin-lock.c:2161 #3 0x000000000047dc95 in cmd_lock (argc=0, argv=0x7fffffffe200) at builtin-lock.c:2604 #4 0x0000000000501466 in run_builtin (p=0xe597a8 , argc=2, argv=0x7fffffffe200) at perf.c:322 #5 0x00000000005016d5 in handle_internal_command (argc=2, argv=0x7fffffffe200) at perf.c:375 #6 0x0000000000501824 in run_argv (argcp=0x7fffffffe02c, argv=0x7fffffffe020) at perf.c:419 #7 0x0000000000501b11 in main (argc=2, argv=0x7fffffffe200) at perf.c:535 (gdb) So just set it to NULL after using PTR_ERR(session) to decode the error as perf_session__delete(NULL) is supported. The same problem was found in 'perf top' after an audit of all perf_session__new() failure handling. Fixes: 6ef81c55a2b6584c ("perf session: Return error code for perf_session__new() function on failure") Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Greg Kroah-Hartman Cc: Jeremie Galarneau Cc: Jiri Olsa Cc: Kate Stewart Cc: Mamatha Inamdar Cc: Mukesh Ojha Cc: Nageswara R Sastry Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Shawn Landden Cc: Song Liu Cc: Thomas Gleixner Cc: Tzvetomir Stoyanov Link: https://lore.kernel.org/lkml/ZN4Q2rxxsL08A8rd@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-top.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a19887186a00..c653de8f92e4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1739,6 +1739,7 @@ int cmd_top(int argc, const char **argv) top.session = perf_session__new(NULL, false, NULL); if (IS_ERR(top.session)) { status = PTR_ERR(top.session); + top.session = NULL; goto out_delete_evlist; } -- Gitee From 1870addc2d521ccb8a413a942fbfe199e18dfb04 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 31 Jul 2023 02:49:32 -0700 Subject: [PATCH 42/58] perf hists browser: Fix hierarchy mode header ANBZ: #9620 commit e2cabf2a44791f01c21f8d5189b946926e34142e upstream. The commit ef9ff6017e3c4593 ("perf ui browser: Move the extra title lines from the hists browser") introduced ui_browser__gotorc_title() to help moving non-title lines easily. But it missed to update the title for the hierarchy mode so it won't print the header line on TUI at all. $ perf report --hierarchy Fixes: ef9ff6017e3c4593 ("perf ui browser: Move the extra title lines from the hists browser") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230731094934.1616495-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/ui/browsers/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 933ce4f00694..cbe3e3d32571 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1779,7 +1779,7 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser) hists_browser__scnprintf_hierarchy_headers(browser, headers, sizeof(headers)); - ui_browser__gotorc(&browser->b, 0, 0); + ui_browser__gotorc_title(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); } -- Gitee From 0130e12374ab559fef457d20a48615d1bac91611 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 25 Aug 2023 08:25:49 -0700 Subject: [PATCH 43/58] perf tools: Handle old data in PERF_RECORD_ATTR ANBZ: #9620 commit 9bf63282ea77a531ea58acb42fb3f40d2d1e4497 upstream. The PERF_RECORD_ATTR is used for a pipe mode to describe an event with attribute and IDs. The ID table comes after the attr and it calculate size of the table using the total record size and the attr size. n_ids = (total_record_size - end_of_the_attr_field) / sizeof(u64) This is fine for most use cases, but sometimes it saves the pipe output in a file and then process it later. And it becomes a problem if there is a change in attr size between the record and report. $ perf record -o- > perf-pipe.data # old version $ perf report -i- < perf-pipe.data # new version For example, if the attr size is 128 and it has 4 IDs, then it would save them in 168 byte like below: 8 byte: perf event header { .type = PERF_RECORD_ATTR, .size = 168 }, 128 byte: perf event attr { .size = 128, ... }, 32 byte: event IDs [] = { 1234, 1235, 1236, 1237 }, But when report later, it thinks the attr size is 136 then it only read the last 3 entries as ID. 8 byte: perf event header { .type = PERF_RECORD_ATTR, .size = 168 }, 136 byte: perf event attr { .size = 136, ... }, 24 byte: event IDs [] = { 1235, 1236, 1237 }, // 1234 is missing So it should use the recorded version of the attr. The attr has the size field already then it should honor the size when reading data. Fixes: 2c46dbb517a10b18 ("perf: Convert perf header attrs into attr events") Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Tom Zanussi Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230825152552.112913-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/util/header.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 8ef6fefea339..79e25b04a9b2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -4262,7 +4262,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct evlist **pevlist) { - u32 i, ids, n_ids; + u32 i, n_ids; + u64 *ids; struct evsel *evsel; struct evlist *evlist = *pevlist; @@ -4278,9 +4279,8 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, evlist__add(evlist, evsel); - ids = event->header.size; - ids -= (void *)&event->attr.id - (void *)event; - n_ids = ids / sizeof(u64); + n_ids = event->header.size - sizeof(event->header) - event->attr.attr.size; + n_ids = n_ids / sizeof(u64); /* * We don't have the cpu and thread maps on the header, so * for allocating the perf_sample_id table we fake 1 cpu and @@ -4289,8 +4289,9 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, if (perf_evsel__alloc_id(&evsel->core, 1, n_ids)) return -ENOMEM; + ids = (void *)&event->attr.attr + event->attr.attr.size; for (i = 0; i < n_ids; i++) { - perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, event->attr.id[i]); + perf_evlist__id_add(&evlist->core, &evsel->core, 0, i, ids[i]); } return 0; -- Gitee From 5165d236c80e30a65155fe9a76ed397c409bea9e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 31 Jul 2023 02:49:33 -0700 Subject: [PATCH 44/58] perf hists browser: Fix the number of entries for 'e' key ANBZ: #9620 commit f6b8436bede3e80226e8b2100279c4450c73806a upstream. The 'e' key is to toggle expand/collapse the selected entry only. But the current code has a bug that it only increases the number of entries by 1 in the hierarchy mode so users cannot move under the current entry after the key stroke. This is due to a wrong assumption in the hist_entry__set_folding(). The commit b33f922651011eff ("perf hists browser: Put hist_entry folding logic into single function") factored out the code, but actually it should be handled separately. The hist_browser__set_folding() is to update fold state for each entry so it needs to traverse all (child) entries regardless of the current fold state. So it increases the number of entries by 1. But the hist_entry__set_folding() only cares the currently selected entry and its all children. So it should count all unfolded child entries. This code is implemented in hist_browser__toggle_fold() already so we can just call it. Fixes: b33f922651011eff ("perf hists browser: Put hist_entry folding logic into single function") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230731094934.1616495-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- tools/perf/ui/browsers/hists.c | 58 ++++++++++++++-------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index cbe3e3d32571..63d4ce691d9f 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -407,11 +407,6 @@ static bool hist_browser__selection_has_children(struct hist_browser *browser) return container_of(ms, struct callchain_list, ms)->has_children; } -static bool hist_browser__he_selection_unfolded(struct hist_browser *browser) -{ - return browser->he_selection ? browser->he_selection->unfolded : false; -} - static bool hist_browser__selection_unfolded(struct hist_browser *browser) { struct hist_entry *he = browser->he_selection; @@ -584,8 +579,8 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, return n; } -static void __hist_entry__set_folding(struct hist_entry *he, - struct hist_browser *hb, bool unfold) +static void hist_entry__set_folding(struct hist_entry *he, + struct hist_browser *hb, bool unfold) { hist_entry__init_have_children(he); he->unfolded = unfold ? he->has_children : false; @@ -603,34 +598,12 @@ static void __hist_entry__set_folding(struct hist_entry *he, he->nr_rows = 0; } -static void hist_entry__set_folding(struct hist_entry *he, - struct hist_browser *browser, bool unfold) -{ - double percent; - - percent = hist_entry__get_percent_limit(he); - if (he->filtered || percent < browser->min_pcnt) - return; - - __hist_entry__set_folding(he, browser, unfold); - - if (!he->depth || unfold) - browser->nr_hierarchy_entries++; - if (he->leaf) - browser->nr_callchain_rows += he->nr_rows; - else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { - browser->nr_hierarchy_entries++; - he->has_no_entry = true; - he->nr_rows = 1; - } else - he->has_no_entry = false; -} - static void __hist_browser__set_folding(struct hist_browser *browser, bool unfold) { struct rb_node *nd; struct hist_entry *he; + double percent; nd = rb_first_cached(&browser->hists->entries); while (nd) { @@ -640,6 +613,21 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); hist_entry__set_folding(he, browser, unfold); + + percent = hist_entry__get_percent_limit(he); + if (he->filtered || percent < browser->min_pcnt) + continue; + + if (!he->depth || unfold) + browser->nr_hierarchy_entries++; + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { + browser->nr_hierarchy_entries++; + he->has_no_entry = true; + he->nr_rows = 1; + } else + he->has_no_entry = false; } } @@ -659,8 +647,10 @@ static void hist_browser__set_folding_selected(struct hist_browser *browser, boo if (!browser->he_selection) return; - hist_entry__set_folding(browser->he_selection, browser, unfold); - browser->b.nr_entries = hist_browser__nr_entries(browser); + if (unfold == browser->he_selection->unfolded) + return; + + hist_browser__toggle_fold(browser); } static void ui_browser__warn_lost_events(struct ui_browser *browser) @@ -732,8 +722,8 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l hist_browser__set_folding(browser, true); break; case 'e': - /* Expand the selected entry. */ - hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser)); + /* Toggle expand/collapse the selected entry. */ + hist_browser__toggle_fold(browser); break; case 'H': browser->show_headers = !browser->show_headers; -- Gitee From e8d7da8640d2024ff448eb79b067124f133f6b9e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Oct 2023 15:23:05 -0700 Subject: [PATCH 45/58] perf machine: Avoid out of bounds LBR memory read ANBZ: #9620 commit ab8ce150781d326c6bfbe1e09f175ffde1186f80 upstream. Running perf top with address sanitizer and "--call-graph=lbr" fails due to reading sample 0 when no samples exist. Add a guard to prevent this. Fixes: e2b23483eb1d ("perf machine: Factor out lbr_callchain_add_lbr_ip()") Signed-off-by: Ian Rogers Cc: K Prateek Nayak Cc: Ravi Bangoria Cc: Sandipan Das Cc: Anshuman Khandual Cc: German Gomez Cc: James Clark Cc: Nick Terrell Cc: Sean Christopherson Cc: Changbin Du Cc: liuwenyu Cc: Yang Jihong Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Song Liu Cc: Leo Yan Cc: Kajol Jain Cc: Andi Kleen Cc: Kan Liang Cc: Athira Rajeev Cc: Yanteng Si Cc: Liam Howlett Cc: Paolo Bonzini Link: https://lore.kernel.org/r/20231024222353.3024098-3-irogers@google.com Signed-off-by: Namhyung Kim Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/machine.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 61b27ee56a9f..84e2ccc2fe1f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2398,16 +2398,18 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread, save_lbr_cursor_node(thread, cursor, i); } - /* Add LBR ip from first entries.to */ - ip = entries[0].to; - flags = &entries[0].flags; - *branch_from = entries[0].from; - err = add_callchain_ip(thread, cursor, parent, - root_al, &cpumode, ip, - true, flags, NULL, - *branch_from); - if (err) - return err; + if (lbr_nr > 0) { + /* Add LBR ip from first entries.to */ + ip = entries[0].to; + flags = &entries[0].flags; + *branch_from = entries[0].from; + err = add_callchain_ip(thread, cursor, parent, + root_al, &cpumode, ip, + true, flags, NULL, + *branch_from); + if (err) + return err; + } return 0; } -- Gitee From 86d7ba0699ee17b78eb3cd5d778b03b0c42e4a1f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 24 Oct 2023 15:23:08 -0700 Subject: [PATCH 46/58] perf hist: Add missing puts to hist__account_cycles ANBZ: #9620 commit c1149037f65bcf0334886180ebe3d5efcf214912 upstream. Caught using reference count checking on perf top with "--call-graph=lbr". After this no memory leaks were detected. Fixes: 57849998e2cd ("perf report: Add processing for cycle histograms") Signed-off-by: Ian Rogers Cc: K Prateek Nayak Cc: Ravi Bangoria Cc: Sandipan Das Cc: Anshuman Khandual Cc: German Gomez Cc: James Clark Cc: Nick Terrell Cc: Sean Christopherson Cc: Changbin Du Cc: liuwenyu Cc: Yang Jihong Cc: Masami Hiramatsu Cc: Miguel Ojeda Cc: Song Liu Cc: Leo Yan Cc: Kajol Jain Cc: Andi Kleen Cc: Kan Liang Cc: Athira Rajeev Cc: Yanteng Si Cc: Liam Howlett Cc: Paolo Bonzini Link: https://lore.kernel.org/r/20231024222353.3024098-6-irogers@google.com Signed-off-by: Namhyung Kim Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/hist.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 3d11fa622126..9f1267734b15 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2638,8 +2638,6 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, /* If we have branch cycles always annotate them. */ if (bs && bs->nr && entries[0].flags.cycles) { - int i; - bi = sample__resolve_bstack(sample, al); if (bi) { struct addr_map_symbol *prev = NULL; @@ -2654,7 +2652,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, * Note that perf stores branches reversed from * program order! */ - for (i = bs->nr - 1; i >= 0; i--) { + for (int i = bs->nr - 1; i >= 0; i--) { addr_map_symbol__account_cycles(&bi[i].from, nonany_branch_mode ? NULL : prev, bi[i].flags.cycles); @@ -2663,6 +2661,12 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, if (total_cycles) *total_cycles += bi[i].flags.cycles; } + for (unsigned int i = 0; i < bs->nr; i++) { + map__put(bi[i].to.ms.map); + maps__put(bi[i].to.ms.maps); + map__put(bi[i].from.ms.map); + maps__put(bi[i].from.ms.maps); + } free(bi); } } -- Gitee From 46081d71c6c3e7a0f8199924e950bd915262dbc3 Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Thu, 7 Sep 2023 08:43:07 +0800 Subject: [PATCH 47/58] perf/core: Bail out early if the request AUX area is out of bound ANBZ: #9620 commit 54aee5f15b83437f23b2b2469bcf21bdd9823916 upstream. When perf-record with a large AUX area, e.g 4GB, it fails with: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) and it reveals a WARNING with __alloc_pages(): ------------[ cut here ]------------ WARNING: CPU: 44 PID: 17573 at mm/page_alloc.c:5568 __alloc_pages+0x1ec/0x248 Call trace: __alloc_pages+0x1ec/0x248 __kmalloc_large_node+0xc0/0x1f8 __kmalloc_node+0x134/0x1e8 rb_alloc_aux+0xe0/0x298 perf_mmap+0x440/0x660 mmap_region+0x308/0x8a8 do_mmap+0x3c0/0x528 vm_mmap_pgoff+0xf4/0x1b8 ksys_mmap_pgoff+0x18c/0x218 __arm64_sys_mmap+0x38/0x58 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x58/0x188 do_el0_svc+0x34/0x50 el0_svc+0x34/0x108 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x1a4/0x1a8 'rb->aux_pages' allocated by kcalloc() is a pointer array which is used to maintains AUX trace pages. The allocated page for this array is physically contiguous (and virtually contiguous) with an order of 0..MAX_ORDER. If the size of pointer array crosses the limitation set by MAX_ORDER, it reveals a WARNING. So bail out early with -ENOMEM if the request AUX area is out of bound, e.g.: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) Signed-off-by: Shuai Xue Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 4032cd475000..01351e7e2543 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -691,6 +691,12 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, max_order--; } + /* + * kcalloc_node() is unable to allocate buffer if the size is larger + * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case. + */ + if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER) + return -ENOMEM; rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL, node); if (!rb->aux_pages) -- Gitee From 8e60d7821771b62a0c73c40ac5d0e69708a4b4c6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 16 Jun 2022 11:06:23 -0700 Subject: [PATCH 48/58] perf/core: Add a new read format to get a number of lost samples ANBZ: #9620 commit 119a784c81270eb88e573174ed2209225d646656 upstream. Sometimes we want to know an accurate number of samples even if it's lost. Currenlty PERF_RECORD_LOST is generated for a ring-buffer which might be shared with other events. So it's hard to know per-event lost count. Add event->lost_samples field and PERF_FORMAT_LOST to retrieve it from userspace. Original-patch-by: Jiri Olsa Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220616180623.1358843-1-namhyung@kernel.org Stable-dep-of: 382c27f4ed28 ("perf: Fix perf_event_validate_size()") Signed-off-by: Sasha Levin Conflicts: kernel/events/core.c Signed-off-by: Jing Zhang --- include/linux/perf_event.h | 2 ++ include/uapi/linux/perf_event.h | 5 ++++- kernel/events/core.c | 21 ++++++++++++++++++--- kernel/events/ring_buffer.c | 5 ++++- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1381f54bbee7..f87bffb86878 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -751,6 +751,8 @@ struct perf_event { struct pid_namespace *ns; u64 id; + atomic64_t lost_samples; + u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 73c1589fa140..748574ce8274 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -313,6 +313,7 @@ enum { * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } && !PERF_FORMAT_GROUP * * { u64 nr; @@ -320,6 +321,7 @@ enum { * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING * { u64 value; * { u64 id; } && PERF_FORMAT_ID + * { u64 lost; } && PERF_FORMAT_LOST * } cntr[nr]; * } && PERF_FORMAT_GROUP * }; @@ -329,8 +331,9 @@ enum perf_event_read_format { PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, PERF_FORMAT_GROUP = 1U << 3, + PERF_FORMAT_LOST = 1U << 4, - PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 233256e628a9..412ada28a6d9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1839,6 +1839,9 @@ static int __perf_event_read_size(u64 read_format, int nr_siblings) if (read_format & PERF_FORMAT_ID) entry += sizeof(u64); + if (read_format & PERF_FORMAT_LOST) + entry += sizeof(u64); + if (read_format & PERF_FORMAT_GROUP) { nr += nr_siblings; size += sizeof(u64); @@ -5212,11 +5215,15 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] += perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); for_each_sibling_event(sub, leader) { values[n++] += perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); } unlock: @@ -5270,7 +5277,7 @@ static int perf_read_one(struct perf_event *event, u64 read_format, char __user *buf) { u64 enabled, running; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = __perf_event_read_value(event, &enabled, &running); @@ -5280,6 +5287,8 @@ static int perf_read_one(struct perf_event *event, values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); if (copy_to_user(buf, values, n * sizeof(u64))) return -EFAULT; @@ -6759,7 +6768,7 @@ static void perf_output_read_one(struct perf_output_handle *handle, u64 enabled, u64 running) { u64 read_format = event->attr.read_format; - u64 values[4]; + u64 values[5]; int n = 0; values[n++] = perf_event_count(event); @@ -6773,6 +6782,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, } if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&event->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } @@ -6783,7 +6794,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; - u64 values[5]; + u64 values[6]; int n = 0; values[n++] = 1 + leader->nr_siblings; @@ -6801,6 +6812,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(leader); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&leader->lost_samples); __output_copy(handle, values, n * sizeof(u64)); @@ -6814,6 +6827,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, values[n++] = perf_event_count(sub); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); + if (read_format & PERF_FORMAT_LOST) + values[n++] = atomic64_read(&sub->lost_samples); __output_copy(handle, values, n * sizeof(u64)); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 01351e7e2543..ca27946fdaaf 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -172,8 +172,10 @@ __perf_output_begin(struct perf_output_handle *handle, goto out; if (unlikely(rb->paused)) { - if (rb->nr_pages) + if (rb->nr_pages) { local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); + } goto out; } @@ -254,6 +256,7 @@ __perf_output_begin(struct perf_output_handle *handle, fail: local_inc(&rb->lost); + atomic64_inc(&event->lost_samples); perf_output_put_handle(handle); out: rcu_read_unlock(); -- Gitee From 9d3597a132e46c14b0673353057afc55688464f2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 11 Dec 2023 23:05:44 -0800 Subject: [PATCH 49/58] perf genelf: Set ELF program header addresses properly ANBZ: #9620 commit 1af478903fc48c1409a8dd6b698383b62387adf1 upstream. The text section starts after the ELF headers so PHDR.p_vaddr and others should have the correct addresses. Fixes: babd04386b1df8c3 ("perf jit: Include program header in ELF files") Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Fangrui Song Cc: Ingo Molnar Cc: Jiri Olsa Cc: Lieven Hey Cc: Milian Wolff Cc: Pablo Galindo Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20231212070547.612536-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/genelf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c76b0e4ac254..3a43997b03a7 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -287,9 +287,9 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, */ phdr = elf_newphdr(e, 1); phdr[0].p_type = PT_LOAD; - phdr[0].p_offset = 0; - phdr[0].p_vaddr = 0; - phdr[0].p_paddr = 0; + phdr[0].p_offset = GEN_ELF_TEXT_OFFSET; + phdr[0].p_vaddr = GEN_ELF_TEXT_OFFSET; + phdr[0].p_paddr = GEN_ELF_TEXT_OFFSET; phdr[0].p_filesz = csize; phdr[0].p_memsz = csize; phdr[0].p_flags = PF_X | PF_R; -- Gitee From 4d7b9fd77be0e8d04010116795d342aa0a383e10 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 6 Dec 2023 17:46:55 -0800 Subject: [PATCH 50/58] perf env: Avoid recursively taking env->bpf_progs.lock ANBZ: #9620 commit 9c51f8788b5d4e9f46afbcf563255cfd355690b3 upstream. Add variants of perf_env__insert_bpf_prog_info(), perf_env__insert_btf() and perf_env__find_btf prefixed with __ to indicate the env->bpf_progs.lock is assumed held. Call these variants when the lock is held to avoid recursively taking it and potentially having a thread deadlock with itself. Fixes: f8dfeae009effc0b ("perf bpf: Show more BPF program info in print_bpf_prog_info()") Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Acked-by: Song Liu Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Huacai Chen Cc: Ingo Molnar Cc: K Prateek Nayak Cc: Kan Liang Cc: Mark Rutland Cc: Ming Wang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20231207014655.1252484-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/bpf-event.c | 8 +++--- tools/perf/util/bpf-event.h | 12 ++++----- tools/perf/util/env.c | 50 ++++++++++++++++++++++++------------- tools/perf/util/env.h | 4 +++ tools/perf/util/header.c | 8 +++--- 5 files changed, 50 insertions(+), 32 deletions(-) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index c785191ec9b8..fccc4bf20c74 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -533,9 +533,9 @@ int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env) return evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp) +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp) { __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); @@ -551,7 +551,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, if (info->btf_id) { struct btf_node *node; - node = perf_env__find_btf(env, info->btf_id); + node = __perf_env__find_btf(env, info->btf_id); if (node) btf = btf__new((__u8 *)(node->data), node->data_size); diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 68f315c3df5b..50f7412464df 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -34,9 +34,9 @@ struct btf_node { int machine__process_bpf(struct machine *machine, union perf_event *event, struct perf_sample *sample); int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env); -void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, - struct perf_env *env, - FILE *fp); +void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp); #else static inline int machine__process_bpf(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -51,9 +51,9 @@ static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused, return 0; } -static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, - struct perf_env *env __maybe_unused, - FILE *fp __maybe_unused) +static inline void __bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, + struct perf_env *env __maybe_unused, + FILE *fp __maybe_unused) { } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index b66e24fff0e4..d9047177c06d 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -20,13 +20,19 @@ struct perf_env perf_env; void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) +{ + down_write(&env->bpf_progs.lock); + __perf_env__insert_bpf_prog_info(env, info_node); + up_write(&env->bpf_progs.lock); +} + +void __perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node) { __u32 prog_id = info_node->info_linear->info.id; struct bpf_prog_info_node *node; struct rb_node *parent = NULL; struct rb_node **p; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.infos.rb_node; while (*p != NULL) { @@ -38,15 +44,13 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, p = &(*p)->rb_right; } else { pr_debug("duplicated bpf prog info %u\n", prog_id); - goto out; + return; } } rb_link_node(&info_node->rb_node, parent, p); rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); env->bpf_progs.infos_cnt++; -out: - up_write(&env->bpf_progs.lock); } struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, @@ -75,14 +79,22 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, } bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +{ + bool ret; + + down_write(&env->bpf_progs.lock); + ret = __perf_env__insert_btf(env, btf_node); + up_write(&env->bpf_progs.lock); + return ret; +} + +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; - bool ret = true; - down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; while (*p != NULL) { @@ -94,25 +106,31 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); - ret = false; - goto out; + return false; } } rb_link_node(&btf_node->rb_node, parent, p); rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); env->bpf_progs.btfs_cnt++; -out: - up_write(&env->bpf_progs.lock); - return ret; + return true; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) +{ + struct btf_node *res; + + down_read(&env->bpf_progs.lock); + res = __perf_env__find_btf(env, btf_id); + up_read(&env->bpf_progs.lock); + return res; +} + +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id) { struct btf_node *node = NULL; struct rb_node *n; - down_read(&env->bpf_progs.lock); n = env->bpf_progs.btfs.rb_node; while (n) { @@ -122,13 +140,9 @@ struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) else if (btf_id > node->id) n = n->rb_right; else - goto out; + return node; } - node = NULL; - -out: - up_read(&env->bpf_progs.lock); - return node; + return NULL; } /* purge data in bpf_progs.infos tree */ diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1d847fea6a41..8209c87743e7 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -169,12 +169,16 @@ const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); void perf_env__init(struct perf_env *env); +void __perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node); void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool __perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); +struct btf_node *__perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, int cpu); char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 79e25b04a9b2..7e10ccd940ef 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1757,8 +1757,8 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - bpf_event__print_bpf_prog_info(&node->info_linear->info, - env, fp); + __bpf_event__print_bpf_prog_info(&node->info_linear->info, + env, fp); } up_read(&env->bpf_progs.lock); @@ -3082,7 +3082,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) /* after reading from file, translate offset to address */ bpf_program__bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; - perf_env__insert_bpf_prog_info(env, info_node); + __perf_env__insert_bpf_prog_info(env, info_node); } up_write(&env->bpf_progs.lock); @@ -3129,7 +3129,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) if (__do_read(ff, node->data, data_size)) goto out; - perf_env__insert_btf(env, node); + __perf_env__insert_btf(env, node); node = NULL; } -- Gitee From 12119beb94acae665d82f3fdbaa68e5a47b237f2 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Mon, 12 Jun 2023 15:09:09 +0200 Subject: [PATCH 51/58] perf/core: Fix narrow startup race when creating the perf nr_addr_filters sysfs file ANBZ: #9620 commit 652ffc2104ec1f69dd4a46313888c33527145ccf upstream. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/2023061204-decal-flyable-6090@gregkh Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 412ada28a6d9..465078413d19 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10910,9 +10910,32 @@ static DEVICE_ATTR_RW(perf_event_mux_interval_ms); static struct attribute *pmu_dev_attrs[] = { &dev_attr_type.attr, &dev_attr_perf_event_mux_interval_ms.attr, + &dev_attr_nr_addr_filters.attr, + NULL, +}; + +static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + + if (!pmu->nr_addr_filters) + return 0; + + return a->mode; + + return 0; +} + +static struct attribute_group pmu_dev_attr_group = { + .is_visible = pmu_dev_is_visible, + .attrs = pmu_dev_attrs, +}; + +static const struct attribute_group *pmu_dev_groups[] = { + &pmu_dev_attr_group, NULL, }; -ATTRIBUTE_GROUPS(pmu_dev); static int pmu_bus_running; static struct bus_type pmu_bus = { @@ -10948,18 +10971,11 @@ static int pmu_dev_alloc(struct pmu *pmu) if (ret) goto free_dev; - /* For PMUs with address filters, throw in an extra attribute: */ - if (pmu->nr_addr_filters) - ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters); - - if (ret) - goto del_dev; - - if (pmu->attr_update) + if (pmu->attr_update) { ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); - - if (ret) - goto del_dev; + if (ret) + goto del_dev; + } out: return ret; -- Gitee From 500ef5b55ef08a1648267339769e04d13e45ea68 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Nov 2023 11:07:56 +0100 Subject: [PATCH 52/58] perf: Fix the nr_addr_filters fix ANBZ: #9620 commit 388a1fb7da6aaa1970c7e2a7d7fcd983a87a8484 upstream. Thomas reported that commit 652ffc2104ec ("perf/core: Fix narrow startup race when creating the perf nr_addr_filters sysfs file") made the entire attribute group vanish, instead of only the nr_addr_filters attribute. Additionally a stray return. Insufficient coffee was involved with both writing and merging the patch. Fixes: 652ffc2104ec ("perf/core: Fix narrow startup race when creating the perf nr_addr_filters sysfs file") Reported-by: Thomas Richter Signed-off-by: Peter Zijlstra (Intel) Tested-by: Thomas Richter Link: https://lkml.kernel.org/r/20231122100756.GP8262@noisy.programming.kicks-ass.net Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- kernel/events/core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 465078413d19..f2cb0bc1a373 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10919,12 +10919,10 @@ static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int struct device *dev = kobj_to_dev(kobj); struct pmu *pmu = dev_get_drvdata(dev); - if (!pmu->nr_addr_filters) + if (n == 2 && !pmu->nr_addr_filters) return 0; return a->mode; - - return 0; } static struct attribute_group pmu_dev_attr_group = { -- Gitee From 9fc83c83cf67d7ffb920f571f591adc10ceaa895 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Fri, 19 Jan 2024 04:03:02 +0000 Subject: [PATCH 53/58] perf record: Fix possible incorrect free in record__switch_output() ANBZ: #9620 commit aff10a165201f6f60cff225083ce301ad3f5d8f1 upstream. perf_data__switch() may not assign a legal value to 'new_filename'. In this case, 'new_filename' uses the on-stack value, which may cause a incorrect free and unexpected result. Fixes: 03724b2e9c45 ("perf record: Allow to limit number of reported perf.data files") Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20240119040304.3708522-2-yangjihong1@huawei.com Signed-off-by: Namhyung Kim Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 08f1c2aeeedb..96c6a52d7f09 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1264,8 +1264,8 @@ static int record__switch_output(struct record *rec, bool at_exit) { struct perf_data *data = &rec->data; + char *new_filename = NULL; int fd, err; - char *new_filename; /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; -- Gitee From 6187a62dd8cd84119e391e4b81d0efa6e2d5470e Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Sat, 27 Jan 2024 02:57:56 +0000 Subject: [PATCH 54/58] perf evsel: Fix duplicate initialization of data->id in evsel__parse_sample() ANBZ: #9620 commit 4962aec0d684c8edb14574ccd0da53e4926ff834 upstream. data->id has been initialized at line 2362, remove duplicate initialization. Fixes: 3ad31d8a0df2 ("perf evsel: Centralize perf_sample initialization") Signed-off-by: Yang Jihong Reviewed-by: Arnaldo Carvalho de Melo Reviewed-by: Ian Rogers Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240127025756.4041808-1-yangjihong1@huawei.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/evsel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0025e121969e..cf5fda44b49f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2174,7 +2174,6 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, data->period = evsel->core.attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; data->misc = event->header.misc; - data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; if (event->header.type != PERF_RECORD_SAMPLE) { -- Gitee From 360d528b71c4ceb39e6a5f4764d4dc6259af0901 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Tue, 6 Feb 2024 08:32:28 +0000 Subject: [PATCH 55/58] perf thread_map: Free strlist on normal path in thread_map__new_by_tid_str() ANBZ: #9620 commit 1eb3d924e3c0b8c27388b0583a989d757866efb6 upstream. slist needs to be freed in both error path and normal path in thread_map__new_by_tid_str(). Fixes: b52956c961be3a04 ("perf tools: Allow multiple threads or processes in record, stat, top") Reviewed-by: Arnaldo Carvalho de Melo Signed-off-by: Yang Jihong Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240206083228.172607-6-yangjihong1@huawei.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/thread_map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index c9bfe4696943..cee7fc3b5bb0 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -279,13 +279,13 @@ struct perf_thread_map *thread_map__new_by_tid_str(const char *tid_str) threads->nr = ntasks; } out: + strlist__delete(slist); if (threads) refcount_set(&threads->refcnt, 1); return threads; out_free_threads: zfree(&threads); - strlist__delete(slist); goto out; } -- Gitee From 0de08f91b711d4d4895533acf956dcb1a58723bb Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 9 Feb 2024 12:49:46 -0800 Subject: [PATCH 56/58] perf stat: Avoid metric-only segv ANBZ: #9620 commit 2543947c77e0e224bda86b4e7220c2f6714da463 upstream. Cycles is recognized as part of a hard coded metric in stat-shadow.c, it may call print_metric_only with a NULL fmt string leading to a segfault. Handle the NULL fmt explicitly. Fixes: 088519f318be ("perf stat: Move the display functions to stat-display.c") Signed-off-by: Ian Rogers Reviewed-by: Kan Liang Cc: K Prateek Nayak Cc: James Clark Cc: Kaige Ye Cc: John Garry Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240209204947.3873294-4-irogers@google.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 96fe9c1af336..4a866e6a0815 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -271,7 +271,7 @@ static void print_metric_only(struct perf_stat_config *config, if (color) mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; - color_snprintf(str, sizeof(str), color ?: "", fmt, val); + color_snprintf(str, sizeof(str), color ?: "", fmt ?: "", val); fprintf(out, "%*s ", mlen, str); } -- Gitee From 16c1d2bdf62892b98d2992239eedf902700ab1f6 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Fri, 2 Sep 2022 16:29:18 +0800 Subject: [PATCH 57/58] perf/core: Fix reentry problem in perf_output_read_group() ANBZ: #9620 commit 6b959ba22d34ca793ffdb15b5715457c78e38b1a upstream. perf_output_read_group may respond to IPI request of other cores and invoke __perf_install_in_context function. As a result, hwc configuration is modified. causing inconsistency and unexpected consequences. Interrupts are not disabled when perf_output_read_group reads PMU counter. In this case, IPI request may be received from other cores. As a result, PMU configuration is modified and an error occurs when reading PMU counter: CPU0 CPU1 __se_sys_perf_event_open perf_install_in_context perf_output_read_group smp_call_function_single for_each_sibling_event(sub, leader) { generic_exec_single if ((sub != event) && remote_function (sub->state == PERF_EVENT_STATE_ACTIVE)) | <----RAISE IPI-----+ __perf_install_in_context ctx_resched event_sched_out armpmu_del ... hwc->idx = -1; // event->hwc.idx is set to -1 ... sub->pmu->read(sub); armpmu_read armv8pmu_read_counter armv8pmu_read_hw_counter int idx = event->hw.idx; // idx = -1 u64 val = armv8pmu_read_evcntr(idx); u32 counter = ARMV8_IDX_TO_COUNTER(idx); // invalid counter = 30 read_pmevcntrn(counter) // undefined instruction Signed-off-by: Yang Jihong Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20220902082918.179248-1-yangjihong1@huawei.com Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jing Zhang --- kernel/events/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index f2cb0bc1a373..a40c6840b3a3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6794,9 +6794,16 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; + unsigned long flags; u64 values[6]; int n = 0; + /* + * Disabling interrupts avoids all counter scheduling + * (context switches, timer based rotation and IPIs). + */ + local_irq_save(flags); + values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -6832,6 +6839,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } + + local_irq_restore(flags); } #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ -- Gitee From b1a0150f1ed799abe5a51b9b2fe3dbbbd7e71af8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 28 Feb 2024 23:07:57 -0800 Subject: [PATCH 58/58] libperf evlist: Avoid out-of-bounds access ANBZ: #9620 commit 1947b92464c3268381604bbe2ac977a3fd78192f upstream. Parallel testing appears to show a race between allocating and setting evsel ids. As there is a bounds check on the xyarray it yields a segv like: ``` AddressSanitizer:DEADLYSIGNAL ================================================================= ==484408==ERROR: AddressSanitizer: SEGV on unknown address 0x000000000010 ==484408==The signal is caused by a WRITE memory access. ==484408==Hint: address points to the zero page. #0 0x55cef5d4eff4 in perf_evlist__id_hash tools/lib/perf/evlist.c:256 #1 0x55cef5d4f132 in perf_evlist__id_add tools/lib/perf/evlist.c:274 #2 0x55cef5d4f545 in perf_evlist__id_add_fd tools/lib/perf/evlist.c:315 #3 0x55cef5a1923f in store_evsel_ids util/evsel.c:3130 #4 0x55cef5a19400 in evsel__store_ids util/evsel.c:3147 #5 0x55cef5888204 in __run_perf_stat tools/perf/builtin-stat.c:832 #6 0x55cef5888c06 in run_perf_stat tools/perf/builtin-stat.c:960 #7 0x55cef58932db in cmd_stat tools/perf/builtin-stat.c:2878 ... ``` Avoid this crash by early exiting the perf_evlist__id_add_fd and perf_evlist__id_add is the access is out-of-bounds. Signed-off-by: Ian Rogers Cc: Yang Jihong Signed-off-by: Namhyung Kim Link: https://lore.kernel.org/r/20240229070757.796244-1-irogers@google.com Signed-off-by: Sasha Levin Signed-off-by: Jing Zhang --- tools/lib/perf/evlist.c | 18 ++++++++++++------ tools/lib/perf/include/internal/evlist.h | 4 ++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index b25e150a257a..446d86332dfa 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -227,10 +227,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist) static void perf_evlist__id_hash(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); + struct perf_sample_id *sid = SID(evsel, cpu_map_idx, thread); sid->id = id; sid->evsel = evsel; @@ -240,21 +240,27 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist, void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id) + int cpu_map_idx, int thread, u64 id) { - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); + if (!SID(evsel, cpu_map_idx, thread)) + return; + + perf_evlist__id_hash(evlist, evsel, cpu_map_idx, thread, id); evsel->id[evsel->ids++] = id; } int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd) + int cpu_map_idx, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ u64 id; int ret; + if (!SID(evsel, cpu_map_idx, thread)) + return -1; + ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); if (!ret) goto add; @@ -283,7 +289,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, id = read_data[id_idx]; add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); + perf_evlist__id_add(evlist, evsel, cpu_map_idx, thread, id); return 0; } diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 2d0fa02b036f..8999f2cc8ee4 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -118,10 +118,10 @@ u64 perf_evlist__read_format(struct perf_evlist *evlist); void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, u64 id); + int cpu_map_idx, int thread, u64 id); int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, - int cpu, int thread, int fd); + int cpu_map_idx, int thread, int fd); #endif /* __LIBPERF_INTERNAL_EVLIST_H */ -- Gitee