From 632454497dff8044c61068b3a15fbbad974d1f0a Mon Sep 17 00:00:00 2001 From: xiongzhou4 Date: Tue, 12 Sep 2023 11:58:50 +0800 Subject: [PATCH] GCOV: Add value profile support for kernel. GCC inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I80N6W --------------------------------- Backport from openEuler 22.03 LTS SP2: https://gitee.com/openeuler/kernel/pulls/773 This feature add value profile support for kernel by changing GCOV option "-fprofile-arcs" to "-fprofile-generate" when the new added config "PGO_KERNEL" is set to y. Like GCOV, the symbols required by value profile are migrated from GCC source codes as they cannot be linked to kernel. Specifically, from libgcc/libgcov-profiler.c to kernel/gcov/gcc_base.c. kernel options: CONFIG_PGO_KERNEL=y Signed-off-by: xiongzhou4 Reviewed-by: Li Yancheng --- Makefile | 7 +- arch/arm64/configs/openeuler_defconfig | 1 + arch/um/Makefile-skas | 5 ++ arch/um/scripts/Makefile.rules | 6 ++ arch/x86/configs/openeuler_defconfig | 1 + arch/x86/um/vdso/Makefile | 5 ++ drivers/scsi/lpfc/Makefile | 4 + kernel/gcov/Makefile | 6 ++ kernel/gcov/base.c | 108 +++++++++++++++++++++++++ kernel/gcov/gcc_4_7.c | 16 ++-- lib/Kconfig.debug | 12 +++ 11 files changed, 164 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 2cad844497b0..2e92f2e50a7b 100644 --- a/Makefile +++ b/Makefile @@ -596,7 +596,12 @@ endif # Defaults to vmlinux, but the arch makefile usually adds further targets all: vmlinux -CFLAGS_GCOV := -fprofile-arcs -ftest-coverage \ +ifeq ($(CONFIG_PGO_KERNEL),y) +CFLAGS_GCOV := -fprofile-generate +else +CFLAGS_GCOV := -fprofile-arcs +endif +CFLAGS_GCOV += -ftest-coverage \ $(call cc-option,-fno-tree-loop-im) \ $(call cc-disable-warning,maybe-uninitialized,) export CFLAGS_GCOV diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 60b1db8e190c..b04256636d4b 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -5803,6 +5803,7 @@ CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +# CONFIG_PGO_KERNEL is not set CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 CONFIG_MAGIC_SYSRQ_SERIAL=y diff --git a/arch/um/Makefile-skas b/arch/um/Makefile-skas index ac35de5316a6..9d7886ff2762 100644 --- a/arch/um/Makefile-skas +++ b/arch/um/Makefile-skas @@ -4,7 +4,12 @@ # GPROF_OPT += -pg + +ifeq ($(CONFIG_PGO_KERNEL),y) +GCOV_OPT += -fprofile-generate -ftest-coverage +else GCOV_OPT += -fprofile-arcs -ftest-coverage +endif CFLAGS-$(CONFIG_GCOV) += $(GCOV_OPT) CFLAGS-$(CONFIG_GPROF) += $(GPROF_OPT) diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules index a4dfa7d7636e..60d01d68a84e 100644 --- a/arch/um/scripts/Makefile.rules +++ b/arch/um/scripts/Makefile.rules @@ -22,6 +22,12 @@ $(USER_OBJS) $(UNPROFILE_OBJS): \ CHECKFLAGS := $(patsubst $(NOSTDINC_FLAGS),,$(CHECKFLAGS)) # The stubs can't try to call mcount or update basic block data +ifeq ($(CONFIG_PGO_KERNEL),y) +define unprofile + $(patsubst -pg,,$(patsubst -fprofile-generate -ftest-coverage,,$(1))) +endef +else define unprofile $(patsubst -pg,,$(patsubst -fprofile-arcs -ftest-coverage,,$(1))) endef +endif diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index b089fee914fb..59baeb2973af 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -7344,6 +7344,7 @@ CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_STACK_VALIDATION=y # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set +# CONFIG_PGO_KERNEL is not set CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 CONFIG_MAGIC_SYSRQ_SERIAL=y diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 822ccdba93ad..4437b208ec6d 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -50,8 +50,13 @@ $(vobjs): KBUILD_CFLAGS += $(CFL) # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. # +ifeq ($(CONFIG_PGO_KERNEL),y) +CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-generate -ftest-coverage +CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-generate -ftest-coverage +else CFLAGS_REMOVE_vdso-note.o = -pg -fprofile-arcs -ftest-coverage CFLAGS_REMOVE_um_vdso.o = -pg -fprofile-arcs -ftest-coverage +endif # # The DSO images are built using a special linker script. diff --git a/drivers/scsi/lpfc/Makefile b/drivers/scsi/lpfc/Makefile index 092a971d066b..4381b472e0b0 100644 --- a/drivers/scsi/lpfc/Makefile +++ b/drivers/scsi/lpfc/Makefile @@ -21,7 +21,11 @@ # *******************************************************************/ ###################################################################### +ifeq ($(CONFIG_PGO_KERNEL),y) +ccflags-$(GCOV) := -fprofile-generate -ftest-coverage +else ccflags-$(GCOV) := -fprofile-arcs -ftest-coverage +endif ccflags-$(GCOV) += -O0 ifdef WARNINGS_BECOME_ERRORS diff --git a/kernel/gcov/Makefile b/kernel/gcov/Makefile index ff06d64df397..65f4b1b7eb94 100644 --- a/kernel/gcov/Makefile +++ b/kernel/gcov/Makefile @@ -1,4 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 + +# GCOV must not instrument itself in value profile, or kernel cannot be booted. +ifeq ($(CONFIG_PGO_KERNEL),y) +GCOV_PROFILE := n +endif + ccflags-y := -DSRCTREE='"$(srctree)"' -DOBJTREE='"$(objtree)"' obj-y := base.o fs.o diff --git a/kernel/gcov/base.c b/kernel/gcov/base.c index 9c7c8d5c18f2..8bb4d90e6b10 100644 --- a/kernel/gcov/base.c +++ b/kernel/gcov/base.c @@ -167,3 +167,111 @@ static int __init gcov_init(void) } device_initcall(gcov_init); #endif /* CONFIG_MODULES */ + +#ifdef CONFIG_PGO_KERNEL +/* + * If VALUE is in interval , then increases the + * corresponding counter in COUNTERS. If the VALUE is above or below + * the interval, COUNTERS[STEPS] or COUNTERS[STEPS + 1] is increased + * instead. + */ +void __gcov_interval_profiler(gcov_type *counters, gcov_type value, + int start, unsigned int steps) +{ + gcov_type delta = value - start; + + if (delta < 0) + counters[steps + 1]++; + else if (delta >= steps) + counters[steps]++; + else + counters[delta]++; +} +EXPORT_SYMBOL(__gcov_interval_profiler); + +/* + * If VALUE is a power of two, COUNTERS[1] is incremented. Otherwise + * COUNTERS[0] is incremented. + */ +void __gcov_pow2_profiler(gcov_type *counters, gcov_type value) +{ + if (value == 0 || (value & (value - 1))) + counters[0]++; + else + counters[1]++; +} +EXPORT_SYMBOL(__gcov_pow2_profiler); + +/* + * Tries to determine the most common value among its inputs. Checks if the + * value stored in COUNTERS[0] matches VALUE. If this is the case, COUNTERS[1] + * is incremented. If this is not the case and COUNTERS[1] is not zero, + * COUNTERS[1] is decremented. Otherwise COUNTERS[1] is set to one and + * VALUE is stored to COUNTERS[0]. This algorithm guarantees that if this + * function is called more than 50% of the time with one value, this value + * will be in COUNTERS[0] in the end. + * + * In any case, COUNTERS[2] is incremented. + */ +static inline void __gcov_one_value_profiler_body(gcov_type *counters, + gcov_type value) +{ + if (value == counters[0]) + counters[1]++; + else if (counters[1] == 0) { + counters[1] = 1; + counters[0] = value; + } else + counters[1]--; + + counters[2]++; +} + +void __gcov_one_value_profiler(gcov_type *counters, gcov_type value) +{ + __gcov_one_value_profiler_body(counters, value); +} +EXPORT_SYMBOL(__gcov_one_value_profiler); + +/* + * These two variables are used to actually track caller and callee. + * Discarded __thread keyword as kernel does not support TLS. + * The variables are set directly by GCC instrumented code, so declaration + * here must match one in tree-profile.c. + */ +void *__gcov_indirect_call_callee; +EXPORT_SYMBOL(__gcov_indirect_call_callee); +gcov_type *__gcov_indirect_call_counters; +EXPORT_SYMBOL(__gcov_indirect_call_counters); + +/* + * Tries to determine the most common value among its inputs. + */ +void __gcov_indirect_call_profiler_v2(gcov_type value, void *cur_func) +{ + /* Removed the C++ virtual tables contents as kernel is written in C. */ + if (cur_func == __gcov_indirect_call_callee) + __gcov_one_value_profiler_body(__gcov_indirect_call_counters, + value); +} +EXPORT_SYMBOL(__gcov_indirect_call_profiler_v2); + +/* Counter for first visit of each function. */ +gcov_type __gcov_time_profiler_counter; +EXPORT_SYMBOL(__gcov_time_profiler_counter); + +/* Increase corresponding COUNTER by VALUE. */ +void __gcov_average_profiler(gcov_type *counters, gcov_type value) +{ + counters[0] += value; + counters[1]++; +} +EXPORT_SYMBOL(__gcov_average_profiler); + +/* Bitwise-OR VALUE into COUNTER. */ +void __gcov_ior_profiler(gcov_type *counters, gcov_type value) +{ + *counters |= value; +} +EXPORT_SYMBOL(__gcov_ior_profiler); +#endif diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index ca5e5c0ef853..13fbb4374818 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -297,14 +297,18 @@ struct gcov_info *gcov_info_dup(struct gcov_info *info) for (ct_idx = 0; ct_idx < active; ct_idx++) { cv_size = sizeof(gcov_type) * sci_ptr->num; + /* The situation may exist where cv_size=0 in value + profile. */ + if (cv_size != 0) { + dci_ptr->values = vmalloc(cv_size); - dci_ptr->values = vmalloc(cv_size); + if (!dci_ptr->values) + goto err_free; - if (!dci_ptr->values) - goto err_free; - - dci_ptr->num = sci_ptr->num; - memcpy(dci_ptr->values, sci_ptr->values, cv_size); + dci_ptr->num = sci_ptr->num; + memcpy(dci_ptr->values, sci_ptr->values, + cv_size); + } sci_ptr++; dci_ptr++; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4a78bacd405b..5bf4ad9f95c4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -395,6 +395,18 @@ config DEBUG_FORCE_WEAK_PER_CPU To ensure that generic code follows the above rules, this option forces all percpu variables to be defined as weak. +config PGO_KERNEL + bool "Enable profile guided optimization for kernel" + depends on CC_IS_GCC && GCC_VERSION > 70000 && GCC_VERSION < 80000 + depends on !COMPILE_TEST && DEBUG_KERNEL && DEBUG_FS + select GCOV_KERNEL + select GCOV_PROFILE_ALL + default n + help + This option enables profile guided optimization for kernel. + + If unsure, say N. + endmenu # "Compiler options" config MAGIC_SYSRQ -- Gitee