From 4bdac9041fd1f7ad23b8dd066fc1970dbf9ed38d Mon Sep 17 00:00:00 2001 From: wuying39 <921169248@qq.com> Date: Tue, 26 Aug 2025 10:56:15 +0800 Subject: [PATCH] add enableBpf attr for Python and go && Details_Usage.md --- docs/C_C++_API.md | 2 + docs/Details_Usage.md | 122 +++++++++++++++++++++++++++++++- docs/Go_API.md | 2 + docs/Python_API.md | 2 + go/src/libkperf/kperf/kperf.go | 9 +++ include/pmu.h | 2 +- python/modules/_libkperf/Pmu.py | 38 +++++++--- python/modules/kperf/perror.py | 3 + python/modules/kperf/pmu.py | 9 ++- test/test_perf/CMakeLists.txt | 1 + test/test_perf/test_api.cpp | 30 ++++++++ 11 files changed, 206 insertions(+), 14 deletions(-) diff --git a/docs/C_C++_API.md b/docs/C_C++_API.md index c34322d..03f1482 100644 --- a/docs/C_C++_API.md +++ b/docs/C_C++_API.md @@ -89,6 +89,8 @@ 采集cgroup的个数 * unsigned enableUserAccess 是否直接读取寄存器,仅支持COUNTING模式 + * unsigned enableBpf + 是否基于BPF采集,仅支持COUNTING模式 * 返回值 > 0 初始化成功 返回值 = -1 初始化失败,可通过Perror()查看错误信息 diff --git a/docs/Details_Usage.md b/docs/Details_Usage.md index f55e7bf..6cff999 100644 --- a/docs/Details_Usage.md +++ b/docs/Details_Usage.md @@ -248,8 +248,8 @@ import "fmt" import "time" func main() { - attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, SymbolMode:kperf.ELF, PidList:[]int{0}, CpuList:[]int{-1}, EnableUserAccess:true} - fd, err := kperf.PmuOpen(kperf.COUNT, attr) + attr := kperf.PmuAttr{EvtList:[]string{"cycles"}, PidList:[]int{0}, CpuList:[]int{-1}, EnableUserAccess:true} + fd, err := kperf.PmuOpen(kperf.COUNT, attr) if err != nil { fmt.Printf("perf user access counting open failed : %v\n", err) return @@ -286,6 +286,124 @@ func main() { +#### Bpf Counting + +多线程或cgroup采集场景下消耗大量文件资源描述符,同时较多的上下文切换会导致应用性能劣化。libkperf提供基于内核bpf的counting模式采集功能,此特性在内核v5.10版本后支持。 + +使用该模式采集,请确保内核选项CONFIG_DEBUG_INFO_BTF开启。编译libkperf时需添加'bpf=true'选项启动clang和bpftool工具编译bpf程序,并设置: +- attr.enableBpf=1 +- pidList或cgroupNameList不为空 + +以采集进程为例,以下为完整示例: +
+ 点击查看C++代码示例 + +```c++ +#include +#include +#include "pcerrc.h" +#include "pmu.h" + +int main() { + PmuAttr attr = {0}; + attr.enableBpf = 1; + + char *evtList[2]; + evtList[0] = "cycles"; + evtList[1] = "branch-misses"; + attr.numEvt = 2; + attr.evtList = evtList; + + int pid = 1; + int pidList[1] = {pid}; // 该pid值替换成对应需要采集应用的pid + attr.pidList = pidList; + attr.numPid = 1; + + int pd = PmuOpen(COUNTING, &attr); + if (pd == -1) { + printf("PmuOpen failed : %s\n", Perror()); + PmuClose(pd); + return 0; + } + PmuEnable(pd); + sleep(1); + PmuDisable(pd); + PmuData *data = nullptr; + int len = PmuRead(pd, &data); + if (len <= 0) { + printf("%s\n", Perror()); + } + for (int i = 0; i < len; i++) { + printf("event:%s pid=%d tid=%d cpu=%d count=%llu\n",data[i].evt,data[i].pid,data[i].tid,data[i].cpu,data[i].count); + } + PmuDataFree(data); + PmuClose(pd); + return 0; +} +``` + +
+ +
+ 点击查看Python代码示例 + +```python +import kperf +import time + +evtList = ["cycles", "branch-misses"] +pidList = [1] # 该pid值替换成对应需要采集应用的pid +pmu_attr = kperf.PmuAttr(evtList=evtList, pidList=pidList, enableBpf=True) +pd = kperf.open(kperf.PmuTaskType.COUNTING, pmu_attr) +if pd == -1: + print(kperf.error()) + exit(1) +kperf.enable(pd) +time.sleep(1) +kperf.disable(pd) +pmu_data = kperf.read(pd) +for data in pmu_data.iter: + print(f"cpu {data.cpu} evt {data.evt} count {data.count}") +``` + +
+ +
+ 点击查看Go代码示例 + +```go +import "libkperf/kperf" +import "fmt" +import "time" + +func main() { + pidList := []int{1} // 该pid值替换成对应需要采集应用的pid + attr := kperf.PmuAttr{EvtList:[]string{"cycles", "branch-misses"}, PidList: pidList, EnableBpf: true} + fd, err := kperf.PmuOpen(kperf.COUNT, attr) + if err != nil { + fmt.Printf("kperf pmuopen sample failed, expect err is nil, but is %v\n", err) + return + } + + kperf.PmuEnable(fd) + time.Sleep(time.Second) + kperf.PmuDisable(fd) + + dataVo, err := kperf.PmuRead(fd) + if err != nil { + fmt.Printf("kperf pmuread failed, expect err is nil, but is %v\n", err) + return + } + for _, o := range dataVo.GoData { + fmt.Printf("cpu=%v evt=%v count=%v\n", o.Cpu, o.Evt, o.Count) + } + kperf.PmuDataFree(dataVo) + kperf.PmuClose(fd) +} +``` + +
+ ### Sampling libkperf提供Sampling模式,类似于perf record的如下命令: ``` diff --git a/docs/Go_API.md b/docs/Go_API.md index 5fbdaf8..30e38e9 100644 --- a/docs/Go_API.md +++ b/docs/Go_API.md @@ -80,6 +80,8 @@ func PmuOpen(collectType C.enum_PmuTaskType, attr PmuAttr) (int, error) 采集cgroup的个数 * EnableUserAccess bool 是否直接读取寄存器,仅支持COUNTING模式 + * EnableBpf bool + 是否基于BPF采集,仅支持COUNTING模式 * 返回值是int,error, 如果error不等于nil,则返回的int值为对应采集任务ID diff --git a/docs/Python_API.md b/docs/Python_API.md index 936ea4f..cf6e3df 100644 --- a/docs/Python_API.md +++ b/docs/Python_API.md @@ -83,6 +83,8 @@ kperf.open(collector_type: kperf.PmuTaskType, pmu_attr: kperf.PmuAttr) 采集cgroup的个数 * enableUserAccess 是否直接读取寄存器,仅支持COUNTING模式 + * enableBpf + 是否基于BPF采集,仅支持COUNTING模式 * 返回值是int值 fd > 0 成功初始化 fd == -1 初始化失败,可通过 kperf.error()查看错误信息 diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 217e506..3c0812a 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -80,6 +80,10 @@ void SetEnableUserAccess(struct PmuAttr* attr, unsigned enableUserAccess) { attr->enableUserAccess = enableUserAccess; } +void SetEnableBpf(struct PmuAttr* attr, unsigned enableBpf) { + attr->enableBpf = enableBpf; +} + struct PmuData* IPmuRead(int fd, int* len) { struct PmuData* pmuData = NULL; *len = PmuRead(fd, &pmuData); @@ -345,6 +349,7 @@ type PmuAttr struct { BlockedSample bool // This indicates whether the blocked sample mode is enabled. In this mode, both on Cpu and off Cpu data is collected CgroupNameList []string // cgroup name list, if not user cgroup function, this field will be nullptr.if use cgroup function,use the cgroup name in the cgroupList to apply all event in the Event list EnableUserAccess bool // enable user access counting for current process + EnableBpf bool // enable bpf mode for counting } type CpuTopology struct { @@ -592,6 +597,10 @@ func ToCPmuAttr(attr PmuAttr) (*C.struct_PmuAttr, int) { C.SetEnableUserAccess(cAttr, C.uint(1)) } + if attr.EnableBpf { + C.SetEnableBpf(cAttr, C.uint(1)) + } + return cAttr, 0 } diff --git a/include/pmu.h b/include/pmu.h index 0361455..dd64250 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -176,7 +176,7 @@ struct PmuAttr { // enable user access counting for current process unsigned enableUserAccess : 1; - // enable bpf mode for counting + // enable bpf mode for counting unsigned enableBpf : 1; }; diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index cdb0042..d90039a 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -80,16 +80,22 @@ class CtypesPmuAttr(ctypes.Structure): unsigned freq; // sample frequency }; unsigned useFreq : 1; - unsigned excludeUser : 1; // don't count user - unsigned excludeKernel : 1; // don't count kernel + unsigned excludeUser : 1; // don't count user + unsigned excludeKernel : 1; // don't count kernel enum SymbolMode symbolMode; // refer to comments of SymbolMode - unsigned callStack : 1; // collect complete call stack - unsigned blockedSample : 1; // enable blocked sample + unsigned callStack : 1; // collect complete call stack + unsigned blockedSample : 1; // enable blocked sample // SPE related fields. enum SpeFilter dataFilter; // spe data filter enum SpeEventFilter evFilter; // spe event filter - unsigned long minLatency; // collect only samples with latency or higher - unsigned includeNewFork : 1; // include new fork thread + + unsigned long minLatency; // collect only samples with latency or higher + unsigned includeNewFork : 1; // include new fork thread + unsigned long branchSampleFilter; // if the filter mode is set, branch_sample_stack data is collected in sampling mode + char** cgroupNameList; // cgroup list + unsigned numCgroup; // length of cgroup list + unsigned enableUserAccess : 1; // enable user access counting for current process + unsigned enableBpf : 1; // enable bpf mode for counting }; """ @@ -116,7 +122,8 @@ class CtypesPmuAttr(ctypes.Structure): ('branchSampleFilter', ctypes.c_ulong), ('cgroupNameList', ctypes.POINTER(ctypes.c_char_p)), ('numCgroup', ctypes.c_uint), - ('enableUserAccess', ctypes.c_uint, 1) + ('enableUserAccess', ctypes.c_uint, 1), + ('enableBpf', ctypes.c_uint, 1) ] def __init__(self, @@ -139,6 +146,7 @@ class CtypesPmuAttr(ctypes.Structure): cgroupNameList=None, numCgroup=0, enableUserAccess=False, + enableBpf=False, *args, **kw): super(CtypesPmuAttr, self).__init__(*args, **kw) @@ -201,7 +209,7 @@ class CtypesPmuAttr(ctypes.Structure): self.blockedSample = blockedSample self.includeNewFork = includeNewFork self.enableUserAccess = enableUserAccess - + self.enableBpf = enableBpf class PmuAttr(object): __slots__ = ['__c_pmu_attr'] @@ -224,7 +232,8 @@ class PmuAttr(object): includeNewFork=False, branchSampleFilter=0, cgroupNameList=None, - enableUserAccess=False): + enableUserAccess=False, + enableBpf=False): self.__c_pmu_attr = CtypesPmuAttr( evtList=evtList, @@ -244,7 +253,8 @@ class PmuAttr(object): includeNewFork=includeNewFork, branchSampleFilter=branchSampleFilter, cgroupNameList=cgroupNameList, - enableUserAccess=enableUserAccess + enableUserAccess=enableUserAccess, + enableBpf=enableBpf ) @property @@ -255,6 +265,14 @@ class PmuAttr(object): def enableUserAccess(self, enableUserAccess): self.c_pmu_attr.enableUserAccess = int(enableUserAccess) + @property + def enableBpf(self): + return bool(self.c_pmu_attr.enableBpf) + + @enableBpf.setter + def enableBpf(self, enableBpf): + self.c_pmu_attr.enableBpf = int(enableBpf) + @property def c_pmu_attr(self): return self.__c_pmu_attr diff --git a/python/modules/kperf/perror.py b/python/modules/kperf/perror.py index 1efaabd..304f548 100644 --- a/python/modules/kperf/perror.py +++ b/python/modules/kperf/perror.py @@ -121,6 +121,9 @@ class Error: LIBPERF_ERR_ENABLE_USER_ACCESS_FAILED = 1075 LIBPERF_ERR_ALLOCATE_REGISTER_FAILED = 1076 LIBPERF_ERR_CHECK_USER_ACCESS = 1077 + LIBPERF_ERR_COUNTER_INDEX_IS_ZERO = 1078 + LIBPERF_ERR_BPF_ACT_FAILED = 1079 + LIBPERF_ERR_INVALID_BPF_PARAM = 1080 UNKNOWN_ERROR = 9999 diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index a37523f..73a25b6 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -263,8 +263,13 @@ class PmuAttr(_libkperf.PmuAttr): # SPE related fields: dataFilter: spe data filter. Refer to comments of SpeFilter. evFilter: spe event filter. Refer to comments of SpeEventFilter. + minLatency: collect only samples with latency or higher. includeNewFork: In count mode, enable it you can get the new child thread count, default is disabled. + branchSampleFilter: if the filter mode is set, branch_sample_stack data is collected in sampling mode + cgroupNameList: cgroup name list, can not assigned with pidList. + enableUserAccess: In count mode, enable read the register directly to collect data + enableBpf: In count mode, enable bpf to collect data. """ def __init__(self, evtList = None, @@ -284,7 +289,8 @@ class PmuAttr(_libkperf.PmuAttr): includeNewFork = False, branchSampleFilter = 0, cgroupNameList = None, - enableUserAccess = False): + enableUserAccess = False, + enableBpf = False): super(PmuAttr, self).__init__( evtList=evtList, pidList=pidList, @@ -304,6 +310,7 @@ class PmuAttr(_libkperf.PmuAttr): branchSampleFilter=branchSampleFilter, cgroupNameList=cgroupNameList, enableUserAccess=enableUserAccess, + enableBpf=enableBpf, ) diff --git a/test/test_perf/CMakeLists.txt b/test/test_perf/CMakeLists.txt index 21c3677..751171a 100644 --- a/test/test_perf/CMakeLists.txt +++ b/test/test_perf/CMakeLists.txt @@ -6,6 +6,7 @@ include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/analyzer/metric) include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/decoder) if (BPF) include_directories(${CMAKE_CURRENT_LIST_DIR}/../../pmu/bpf) + add_compile_definitions(BPF_ENABLED) endif() include_directories(${PROJECT_TOP_DIR}/include) add_compile_options(-g) diff --git a/test/test_perf/test_api.cpp b/test/test_perf/test_api.cpp index e816642..aec0db5 100644 --- a/test/test_perf/test_api.cpp +++ b/test/test_perf/test_api.cpp @@ -767,3 +767,33 @@ TEST_F(TestAPI, InvalidUserAccessAttr) pd = PmuOpen(COUNTING, &attr); ASSERT_EQ(pd, -1); } + +TEST_F(TestAPI, InvalidBpfAttr) +{ + PmuAttr attr = {0}; + char *evtList[1]; + evtList[0] = (char *)"cycles"; + attr.evtList = evtList; + attr.numEvt = 1; + int pidList[1] = {0}; + attr.pidList = pidList; + attr.numPid = 1; +#ifdef BPF_ENABLED + attr.enableBpf = 1; + pd = PmuOpen(COUNTING, &attr); + ASSERT_NE(pd, -1); + pd = PmuOpen(SAMPLING, &attr); + ASSERT_EQ(pd, -1); + EvtAttr groupId[1] = {1}; + attr.evtAttr = groupId; + attr.numGroup = 1; + pd = PmuOpen(COUNTING, &attr); + ASSERT_EQ(pd, -1); +#else + pd = PmuOpen(COUNTING, &attr); + ASSERT_NE(pd, -1); + attr.enableBpf = 1; + pd = PmuOpen(COUNTING, &attr); + ASSERT_EQ(pd, -1); +#endif +} -- Gitee