diff --git a/include/pmu.h b/include/pmu.h index 45508c3c216980b2313bad6dcf1cd6d1f0a6a16f..afe85bb8362d09b40f784089fcaabdc00739029f 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -63,6 +63,15 @@ enum SpeEventFilter { SPE_EVENT_MISPREDICTED = 0x80, // mispredict }; +enum SymbolMode { + // in PmuData will be set to NULL. + NO_SYMBOL_RESOLVE = 0, + // Resolve elf only. Fields except lineNum and fileName in Symbol will be valid. + RESOLVE_ELF = 1, + // Resolve elf and dwarf. All fields in Symbol will be valid. + RESOLVE_ELF_DWARF = 2 +}; + struct PmuAttr { char** evtList; // event list unsigned numEvt; // length of event list @@ -78,6 +87,7 @@ struct PmuAttr { unsigned useFreq : 1; unsigned excludeUser : 1; // don't count user unsigned excludeKernel : 1; // don't count kernel + enum SymbolMode symbolMode; // refer to comments of SymbolMode // SPE related fields. enum SpeFilter dataFilter; // spe data filter diff --git a/pmu/evt.h b/pmu/evt.h index 65411366de8c5c9765ed566be73a415dba6ed6eb..a9e259888d3aa4822acf3a5179d4821fa4a00f04 100644 --- a/pmu/evt.h +++ b/pmu/evt.h @@ -48,6 +48,11 @@ public: virtual int MapPerfAttr() = 0; + void SetSymbolMode(const SymbolMode &symMode) + { + this->symMode = symMode; + } + int GetFd() const { return fd; @@ -60,6 +65,7 @@ protected: pid_t pid; struct PmuEvt* evt; ProcMap &procMap; + SymbolMode symMode = NO_SYMBOL_RESOLVE; }; int PerfEventOpen(struct perf_event_attr* attr, pid_t pid, int cpu, int groupFd, unsigned long flags); __u64 ReadOnce(__u64 *head); diff --git a/pmu/evt_list.cpp b/pmu/evt_list.cpp index c7ee07f45f1dc7439c34d7b6eed4cd3129908add..e3885be51fad894d0438a3d540fadf20cabe359a 100644 --- a/pmu/evt_list.cpp +++ b/pmu/evt_list.cpp @@ -95,6 +95,7 @@ int KUNPENG_PMU::EvtList::Init() if (perfEvt == nullptr) { continue; } + perfEvt->SetSymbolMode(symMode); auto err = perfEvt->Init(); if (err != SUCCESS) { return err; diff --git a/pmu/evt_list.h b/pmu/evt_list.h index ec1173558b613776e4a476d66372fc38f0adb1f5..47ef4b2e28602e9b2aaca7b38350bd8e3d3c8dd8 100644 --- a/pmu/evt_list.h +++ b/pmu/evt_list.h @@ -32,8 +32,8 @@ class EvtList { public: using ProcPtr = std::shared_ptr; using CpuPtr = std::shared_ptr; - EvtList(std::vector &cpuList, std::vector &pidList, std::shared_ptr pmuEvt) - : cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt) + EvtList(const SymbolMode &symbolMode, std::vector &cpuList, std::vector &pidList, std::shared_ptr pmuEvt) + : symMode(symbolMode), cpuList(cpuList), pidList(pidList), pmuEvt(pmuEvt) { this->numCpu = this->cpuList.size(); this->numPid = this->pidList.size(); @@ -80,6 +80,7 @@ private: std::set fdList; int64_t ts = 0; std::unordered_map procMap; + SymbolMode symMode = NO_SYMBOL_RESOLVE; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/pmu.cpp b/pmu/pmu.cpp index 44d85d7c4379f94429f60a3c994166380dd49fb6..e2b0a461c17aa3bbc9abfd914293f3607e7f133d 100644 --- a/pmu/pmu.cpp +++ b/pmu/pmu.cpp @@ -247,6 +247,7 @@ int PmuOpen(enum PmuTaskType collectType, struct PmuAttr *attr) return -1; } + KUNPENG_PMU::PmuList::GetInstance()->SetSymbolMode(pd, attr->symbolMode); err = KUNPENG_PMU::PmuList::GetInstance()->Register(pd, taskAttr.get()); if (err != SUCCESS) { PmuList::GetInstance()->Close(pd); diff --git a/pmu/pmu_list.cpp b/pmu/pmu_list.cpp index 2db23f939cb10dcf7c324321404b060f50aa49c1..0d1c0a71163160ca59d75354be1270b595290353 100644 --- a/pmu/pmu_list.cpp +++ b/pmu/pmu_list.cpp @@ -42,7 +42,7 @@ namespace KUNPENG_PMU { int PmuList::Register(const int pd, PmuTaskAttr* taskParam) { - if (taskParam->pmuEvt->collectType != COUNTING) { + if (GetSymbolMode(pd) != NO_SYMBOL_RESOLVE && taskParam->pmuEvt->collectType != COUNTING) { SymResolverInit(); SymResolverRecordKernel(); } @@ -77,7 +77,7 @@ namespace KUNPENG_PMU { } fdNum += cpuTopoList.size() + procTopoList.size(); std::shared_ptr evtList = - std::make_shared(cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt); + std::make_shared(GetSymbolMode(pd), cpuTopoList, procTopoList, pmuTaskAttrHead->pmuEvt); InsertEvtList(pd, evtList); pmuTaskAttrHead = pmuTaskAttrHead->next; } @@ -156,6 +156,8 @@ namespace KUNPENG_PMU { // Read data from prev sampling, // and store data in . auto &evtData = GetDataList(pd); + evtData.pd = pd; + evtData.collectType = static_cast(GetTaskType(pd)); auto ts = GetCurrentTime(); auto eventList = GetEvtList(pd); for (auto item : eventList) { @@ -349,14 +351,20 @@ namespace KUNPENG_PMU { void PmuList::FillStackInfo(EventData &eventData) { + auto symMode = symModeList[eventData.pd]; + if (symMode == NO_SYMBOL_RESOLVE) { + return; + } // Parse dwarf and elf info of each pid and get stack trace for each pmu data. for (size_t i = 0; i < eventData.data.size(); ++i) { auto &pmuData = eventData.data[i]; auto &ipsData = eventData.sampleIps[i]; - if (eventData.collectType == SPE_SAMPLING) { + if (symMode == RESOLVE_ELF) { SymResolverRecordModuleNoDwarf(pmuData.pid); - } else { + } else if (symMode == RESOLVE_ELF_DWARF) { SymResolverRecordModule(pmuData.pid); + } else { + continue; } if (pmuData.stack == nullptr) { pmuData.stack = StackToHash(pmuData.pid, ipsData.ips.data(), ipsData.ips.size()); @@ -626,4 +634,15 @@ namespace KUNPENG_PMU { return SUCCESS; } + void PmuList::SetSymbolMode(const int pd, const SymbolMode &mode) + { + lock_guard lg(dataListMtx); + symModeList[pd] = mode; + } + + SymbolMode PmuList::GetSymbolMode(const unsigned pd) + { + lock_guard lg(dataListMtx); + return symModeList[pd]; + } } \ No newline at end of file diff --git a/pmu/pmu_list.h b/pmu/pmu_list.h index 90f588b6c8129c65199991030c6439aa9df2f1eb..6dae608d4d2b02659eefc28a10e468aaa5404746 100644 --- a/pmu/pmu_list.h +++ b/pmu/pmu_list.h @@ -62,6 +62,7 @@ public: bool IsPdAlive(const int pd) const; void FreeData(PmuData* pmuData); int GetTaskType(const int pd) const; + void SetSymbolMode(const int pd, const SymbolMode &mode); int NewPd(); @@ -113,6 +114,7 @@ private: static void AggregateData(const std::vector& evData, std::vector& newEvData); void AggregateUncoreData(const unsigned pd, const std::vector &evData, std::vector &newEvData); std::vector& GetPreviousData(const unsigned pd); + SymbolMode GetSymbolMode(const unsigned pd); static std::mutex pmuListMtx; static std::mutex dataListMtx; @@ -146,6 +148,8 @@ private: // Value: spe sampling cpu list. std::unordered_map> speCpuList; unsigned maxPd = 0; + + std::unordered_map symModeList; }; } // namespace KUNPENG_PMU #endif diff --git a/pmu/sampler.cpp b/pmu/sampler.cpp index bbaa9914380955b7cc15a2eb9ac52cfc8267d92c..f8ad789527632933ebb858d17755ef461c447ce4 100644 --- a/pmu/sampler.cpp +++ b/pmu/sampler.cpp @@ -129,9 +129,11 @@ void KUNPENG_PMU::PerfSampler::RawSampleProcess( return; } KUNPENG_PMU::PerfRawSample *sample = (KUNPENG_PMU::PerfRawSample *)event->sample.array; - // Copy ips from ring buffer and get stack info later. - for (__u64 i = 0; i < sample->nr; ++i) { - ips->ips.push_back(sample->ips[i]); + if (symMode != NO_SYMBOL_RESOLVE) { + // Copy ips from ring buffer and get stack info later. + for (__u64 i = 0; i < sample->nr; ++i) { + ips->ips.push_back(sample->ips[i]); + } } current->cpu = static_cast(sample->cpu); current->pid = static_cast(sample->pid); diff --git a/test/test_perf/case/CMakeLists.txt b/test/test_perf/case/CMakeLists.txt index bce7eda280c8ef8dfdb224edd75fafddabca3b29..dcba66b92777f75e85753194dcc3057aa5e7f024 100644 --- a/test/test_perf/case/CMakeLists.txt +++ b/test/test_perf/case/CMakeLists.txt @@ -8,5 +8,9 @@ foreach(source_file IN LISTS source_files) get_filename_component(target_name ${source_file} NAME_WE) add_executable("${target_name}" "${source_file}") + if (${target_name} STREQUAL "vectorized_loop") + target_compile_options(${target_name} PRIVATE -g -O3) + endif() + target_link_libraries("${target_name}" pthread numa) endforeach() diff --git a/test/test_perf/case/vectorized_loop.cpp b/test/test_perf/case/vectorized_loop.cpp new file mode 100644 index 0000000000000000000000000000000000000000..076d6b1259ff14c92d0904cb68e3468f675517f2 --- /dev/null +++ b/test/test_perf/case/vectorized_loop.cpp @@ -0,0 +1,12 @@ +int main() +{ + int len = 1000000000; + int *a = new int[len]; + int *b = new int[len]; + int sum = 0; + for (int i = 0; i < len; ++i) { + sum += a[i] + b[i]; + } + + return sum; +} \ No newline at end of file diff --git a/test/test_perf/test_api.cpp b/test/test_perf/test_api.cpp index 6c63e89de1de7ae695aedbca6791d317e56f23e4..84fe470563830fa842cca3cefc37663461694099 100644 --- a/test/test_perf/test_api.cpp +++ b/test/test_perf/test_api.cpp @@ -120,6 +120,7 @@ protected: attr.numCpu = numCpu; attr.freq = 1000; attr.useFreq = 1; + attr.symbolMode = RESOLVE_ELF_DWARF; return attr; } @@ -134,6 +135,7 @@ protected: attr.dataFilter = SPE_DATA_ALL; attr.evFilter = SPE_EVENT_RETIRED; attr.minLatency = 0x40; + attr.symbolMode = RESOLVE_ELF_DWARF; return attr; } @@ -186,6 +188,55 @@ TEST_F(TestAPI, SampleReadSuccess) ASSERT_TRUE(HasExpectSource(data, len)); } +TEST_F(TestAPI, SampleNoSymbol) +{ + auto attr = GetPmuAttribute(); + attr.symbolMode = NO_SYMBOL_RESOLVE; + pd = PmuOpen(SAMPLING, &attr); + int ret = PmuCollect(pd, 1000, collectInterval); + int len = PmuRead(pd, &data); + EXPECT_TRUE(data != nullptr); + + for (int i = 0; i< len ;++i) { + ASSERT_EQ(data[i].stack, nullptr); + } +} + +TEST_F(TestAPI, SampleOnlyElf) +{ + auto attr = GetPmuAttribute(); + attr.symbolMode = RESOLVE_ELF; + pd = PmuOpen(SAMPLING, &attr); + int ret = PmuCollect(pd, 1000, collectInterval); + int len = PmuRead(pd, &data); + EXPECT_TRUE(data != nullptr); + + for (int i = 0; i< len ;++i) { + ASSERT_NE(data[i].stack, nullptr); + auto stack = data[i].stack; + while (stack) { + if (stack->symbol) { + ASSERT_EQ(stack->symbol->lineNum, 0); + } + stack = stack->next; + } + } +} + +TEST_F(TestAPI, SpeNoSymbol) +{ + auto attr = GetSpeAttribute(); + attr.symbolMode = NO_SYMBOL_RESOLVE; + pd = PmuOpen(SPE_SAMPLING, &attr); + int ret = PmuCollect(pd, 1000, collectInterval); + int len = PmuRead(pd, &data); + EXPECT_TRUE(data != nullptr); + + for (int i = 0; i< len ;++i) { + ASSERT_EQ(data[i].stack, nullptr); + } +} + TEST_F(TestAPI, SpeInitSuccess) { auto attr = GetSpeAttribute(); diff --git a/test/test_perf/test_count.cpp b/test/test_perf/test_count.cpp index 06231d36a7b88efdd84a8199b79bf14006f15147..1fda6973fbcf8ee12d92a2d7bd6ddcac86ad1d9c 100644 --- a/test/test_perf/test_count.cpp +++ b/test/test_perf/test_count.cpp @@ -164,6 +164,40 @@ TEST_F(TestCount, NumaFluxWr) ASSERT_GE(cntSum, (1024 * 256 * 4 * 64) / 32); } +TEST_F(TestCount, AggregateUncoreEvents) +{ + // Test aggregate of uncore events. + + char* aggreUncore[1] = {"hisi_sccl1_ddrc/flux_rd/"}; + char* uncoreList[4] = {"hisi_sccl1_ddrc0/flux_rd/", "hisi_sccl1_ddrc1/flux_rd/", "hisi_sccl1_ddrc2/flux_rd/", "hisi_sccl1_ddrc3/flux_rd/"}; + PmuAttr attr = {0}; + attr.evtList = aggreUncore; + attr.numEvt = 1; + int pd1 = PmuOpen(COUNTING, &attr); + attr.evtList = uncoreList; + attr.numEvt = 4; + int pd2 = PmuOpen(COUNTING, &attr); + PmuEnable(pd1); + PmuEnable(pd2); + sleep(2); + PmuDisable(pd1); + PmuDisable(pd2); + + PmuData *data1 = nullptr; + int len1 = PmuRead(pd1, &data1); + ASSERT_EQ(len1, 1); + PmuData *data2 = nullptr; + int len2 = PmuRead(pd2, &data2); + ASSERT_EQ(len2, 4); + + uint64_t aggreCnt = data1[0].count; + unsigned long uncoreSum = 0; + for (int i = 0; i < len2; ++i) { + uncoreSum += data2[i].count; + } + ASSERT_NEAR(aggreCnt, uncoreSum, uncoreSum * 0.5); +} + TEST_F(TestCount, PwritevFile) { // Test data of tracepoint syscalls:sys_enter_pwritev. @@ -189,6 +223,17 @@ TEST_F(TestCount, PwritevFile) ASSERT_GT(data->count, 0); } +TEST_F(TestCount, RawEventCycles) +{ + // Test whether raw event is the same as named event. + string cycles = "cycles"; + string cyclesRaw = "r11"; + vector evts = {cycles, cyclesRaw}; + auto evtMap = CollectProcessEvent("simple", evts); + ASSERT_EQ(evtMap.size(), evts.size()); + ASSERT_NEAR(evtMap[cycles], evtMap[cyclesRaw], evtMap[cyclesRaw] * relativeErr); +} + TEST_F(TestCount, BranchMissRatio) { // Check branch miss ratio of two cases. @@ -244,9 +289,22 @@ TEST_F(TestCount, LLCacheMissRatio) auto evtMap = CollectProcessEvent("cross_socket_access", evts); ASSERT_EQ(evtMap.size(), evts.size()); auto missRatio1 = (double)evtMap[cacheMiss]/evtMap[cache]; - ASSERT_GT(missRatio1, 0.2); + ASSERT_GT(missRatio1, 0.1); evtMap = CollectProcessEvent("in_node_access", evts); auto missRatio2 = (double)evtMap[cacheMiss]/evtMap[cache]; ASSERT_LT(missRatio2, 0.01); ASSERT_GT(missRatio1, missRatio2); } + +TEST_F(TestCount, SimdRatio) +{ + // Test ASE_SPEC and INST_SPEC. + // Run a case with vectorized loop which has many simd instructions. + string aseSpec = "r74"; + string instSpec = "r1b"; + vector evts = {aseSpec, instSpec}; + auto evtMap = CollectProcessEvent("vectorized_loop", evts); + ASSERT_EQ(evtMap.size(), evts.size()); + auto simdRatio = (double)evtMap[aseSpec]/evtMap[instSpec]; + ASSERT_GT(simdRatio, 0.1); +} \ No newline at end of file diff --git a/test/test_perf/test_pmu.cpp b/test/test_perf/test_pmu.cpp index 7538f5f25158bf5bd7fd6efd8264ad03f45f9c7a..7fa272e4e5307bc89b17692d775dd7ab7375f351 100644 --- a/test/test_perf/test_pmu.cpp +++ b/test/test_perf/test_pmu.cpp @@ -43,6 +43,7 @@ protected: attr.numCpu = 0; attr.freq = 1000; attr.useFreq = 1; + attr.symbolMode = RESOLVE_ELF_DWARF; return attr; } diff --git a/test/test_perf/test_spe.cpp b/test/test_perf/test_spe.cpp index 40fb40db2d363c7742ce0ef7a6b58edb4e0e7f79..fb72258a65f5eff0da9bbd792046703a9a6cb73f 100644 --- a/test/test_perf/test_spe.cpp +++ b/test/test_perf/test_spe.cpp @@ -44,6 +44,7 @@ protected: attr.dataFilter = SPE_DATA_ALL; attr.evFilter = SPE_EVENT_RETIRED; attr.minLatency = 0x40; + attr.symbolMode = RESOLVE_ELF_DWARF; return attr; }