diff --git a/example/case/falsesharing_demo.c b/example/case/falsesharing_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..969d320fed142f6627bc8345c028207e8a965fd9 --- /dev/null +++ b/example/case/falsesharing_demo.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include + +#define EXE_TIME 9999999900 +#define NUM_THREADS 2 + +int arr[32]; + +void *sum_a(void*) +{ + int cpu_num = 0; + cpu_set_t mask; + cpu_set_t get; + CPU_ZERO(&mask); + CPU_SET(cpu_num, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask) == -1) { + perror("warning: could not set CPU affinity\n"); + } + CPU_ZERO(&get); + if (sched_getaffinity(0, sizeof(get), &get) == -1) { + perror("warning: could not get CPU affinity\n"); + } + + if (CPU_ISSET(cpu_num, &get)) { + printf("sum_a is running in %d cpu_id: %d\n", get, cpu_num); + } + + int s = 0; + for (int i = 0; i < EXE_TIME; i++) { + s = arr[0]; + arr[0] += 1; + } +} + +void *inc_b(void*) +{ + int cpu_num = 1; + cpu_set_t mask; + cpu_set_t get; + CPU_ZERO(&mask); + CPU_SET(cpu_num, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask) == -1) { + perror("warning: could not set CPU affinity\n"); + } + CPU_ZERO(&get); + if (sched_getaffinity(0, sizeof(get), &get) == -1) { + perror("warning: could not get CPU affinity\n"); + } + + if (CPU_ISSET(cpu_num, &get)) { + printf("sum_a is running in %d cpu_id: %d\n", get, cpu_num); + } + + int s = 0; + for (int i = 0; i < EXE_TIME; i++) { + s = arr[1]; + arr[1] += 1; + } +} + +int main() +{ + int ret; + pthread_t tids[NUM_THREADS]; + ret = pthread_create(&tids[0], NULL, sum_a, NULL); + if (ret != 0) { + printf("pthread_create error: error code %d\n", ret); + return -1; + } + + ret = pthread_create(&tids[0], NULL, inc_b, NULL); + if (ret != 0) { + printf("pthread_create error: error code %d\n", ret); + return -1; + } + + pthread_join(tids[0], NULL); + pthread_join(tids[1], NULL); + return 0; +} + diff --git a/example/pmu_datasrc.cpp b/example/pmu_datasrc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..96c910116c7109661848b339c5f7ae53d1443bd4 --- /dev/null +++ b/example/pmu_datasrc.cpp @@ -0,0 +1,220 @@ +/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + * libkperf licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * http://license.coscl.org.cn/MulanPSL2 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR + * PURPOSE. + * See the Mulan PSL v2 for more details. + * Author: Mr.Li + * Create: 2025-10-21 + * Description: data source analyze for spe sampling. + ******************************************************************************/ +/** +g++ -g pmu_datasrc.cpp -I ../output/include/ -L ../output/lib/ -lkperf -lsym -O3 -o pmu_datasrc +cd case +g++ -o falsesharing_demo falsesharing_demo.cpp -lpthread +cd .. +./pmu_datasrc -d 2 case/falsesharing_demo +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "symbol.h" +#include "pmu.h" +#include "pcerrc.h" + + +static std::map HIP_STR_MAP = { + {HIP_PEER_CPU, "HIP_PEER_CPU"}, + {HIP_PEER_CPU_HITM, "HIP_PEER_CPU_HITM"}, + {HIP_L3, "HIP_L3"}, + {HIP_L3_HITM, "HIP_L3_HITM"}, + {HIP_PEER_CLUSTER, "HIP_PEER_CLUSTER"}, + {HIP_PEER_CLUSTER_HITM, "HIP_PEER_CLUSTER_HITM"}, + {HIP_REMOTE_SOCKET, "HIP_REMOTE_SOCKET"}, + {HIP_REMOTE_SOCKET_HITM, "HIP_REMOTE_SOCKET_HITM"}, + {HIP_LOCAL_MEM, "HIP_LOCAL_MEM"}, + {HIP_REMOTE_MEM, "HIP_REMOTE_MEM"}, + {HIP_NC_DEV, "HIP_NC_DEV"}, + {HIP_L2, "HIP_L2"}, + {HIP_L2_HITM, "HIP_L2_HITM"}, + {HIP_L1, "HIP_L1"}, +}; + +const char* SHORT_OPS = "p:d:h"; +const struct option LONG_OPS[] = +{ + {"pid", required_argument, nullptr, 'p'}, + {"duration", required_argument, nullptr, 'd'}, + {"help", required_argument, nullptr, 'h'}, + {nullptr, required_argument, nullptr, 0}, +}; + +int ExecCommand(std::vector& comms) +{ + pid_t pid = fork(); + if (pid == -1) { + perror("fork failed!"); + return -1; + } else if (pid == 0) { + char **argv = new char*[comms.size() + 1]; + for (size_t i = 0; i < comms.size(); ++i) { + argv[i] = strdup(comms[i].c_str()); + } + argv[comms.size()] = NULL; + execvp(argv[0], argv); + perror("exec commands failed!"); + for (size_t i = 0; i < comms.size(); ++i) { + free(argv[i]); + } + delete []argv; + exit(EXIT_FAILURE); + } else { + return pid; + } + return -1; +} + +int ParseArgv(int argc, char** argv, int& pid, int& duration, bool& isLaunch) +{ + int longIndex; + int ret; + int curIndex = 0; + while((ret = getopt_long(argc, argv, SHORT_OPS, LONG_OPS, &longIndex)) != -1) { + switch(ret) { + case 'p': + curIndex += 2; + try { + pid = std::stoi(optarg); + } catch(...) { + std::cout << "pid is number, can't be: " << optarg << std::endl; + return -1; + } + break; + case 'd': + curIndex += 2; + try { + duration = std::stoi(optarg); + } catch(...) { + std::cout << "duration is number, can't be: " << optarg << std::endl; + return -1; + } + break; + case 'h': + curIndex += 2; + std::cout << "usage pmu_datasrc -d 2 -p 10001 or pmu_datasrc -d 2 /home/test/falsesharing_demo" << std::endl; + return -1; + default: + return -1; + } + } + + if (pid == -1 && argc > curIndex + 1) { + std::vector comms; + for (int i = curIndex + 1; i < argc; ++i) { + comms.push_back(argv[i]); + } + pid = ExecCommand(comms); + isLaunch = true; + } + return 0; +} + +std::string ParseSymbol(Symbol* sym) +{ + std::stringstream ss; + ss << std::hex << sym->addr << " " << sym->symbolName << "+0x" << sym->offset << " " << std::dec << sym->fileName << ":" << sym->lineNum; + return ss.str(); +} + +typedef std::pair SYMBOL_NUM_PAIR; + +bool SortBySymValue(const SYMBOL_NUM_PAIR& t1, const SYMBOL_NUM_PAIR& t2) +{ + return t1.second > t2.second; +} + +int main(int argc, char** argv) +{ + int pid = -1; + int duration = 10; + bool isLaunch = false; + + int err = ParseArgv(argc, argv, pid, duration, isLaunch); + if (err == -1) { + return -1; + } + + if (pid == -1) { + std::cout << "usage pmu_datasrc -d 2 -p 10001 or pmu_datasrc -d 2 /home/test/falsesharing_demo" << std::endl; + return -1; + } + + PmuAttr attr = {0}; + int pidList[1]; + pidList[0] = pid; + attr.pidList = pidList; + attr.numPid = 1; + attr.period = 1024; + attr.dataFilter = SPE_DATA_ALL; + attr.evFilter = SPE_EVENT_RETIRED; + attr.symbolMode = SymbolMode::RESOLVE_ELF_DWARF; + + int pd = PmuOpen(SPE_SAMPLING, &attr); + if (pd == -1) { + std::cout << "kperf pmu open spe failed, err is: " << Perror() << std::endl; + return -1; + } + PmuEnable(pd); + sleep(duration); + PmuDisable(pd); + + PmuData* data = nullptr; + int len = PmuRead(pd, &data); + std::map sourceList; + std::map> sourceSymList; + + for (int i = 0; i < len; i++) + { + auto o = data[i]; + if (HIP_STR_MAP.find(o.ext->source) == HIP_STR_MAP.end()) { + continue; + } + auto sym = o.stack->symbol; + if (sym) { + std::string symStr = ParseSymbol(sym); + sourceSymList[o.ext->source][symStr] += 1; + } + sourceList[o.ext->source] += 1; + } + + for (const auto& item : sourceList) { + auto source = item.first; + auto source_num = item.second; + std::cout << HIP_STR_MAP[source] << " " << source_num << std::endl; + if (sourceSymList.find(source) == sourceSymList.end()) { + continue; + } + auto symList = sourceSymList[source]; + std::vector sortVec(symList.begin(), symList.end()); + std::sort(sortVec.begin(), sortVec.end(), SortBySymValue); + for (const auto& symItem : sortVec) { + std::cout << " " << "|——" << symItem.first << " [" << symItem.second << "]" << std::endl; + } + } + PmuClose(pd); + if (isLaunch) { + kill(pid, 9); + } +} \ No newline at end of file diff --git a/go/src/libkperf/kperf/kperf.go b/go/src/libkperf/kperf/kperf.go index 04de53a9e4afafcf68132ad75b210c8a5615153f..c1e7e7c27da89fc74ac4251b4db96c0e286f0613 100644 --- a/go/src/libkperf/kperf/kperf.go +++ b/go/src/libkperf/kperf/kperf.go @@ -31,6 +31,7 @@ struct SpeDataExt { unsigned long va; // virtual address unsigned long event; // event id, which is a bit map of mixed events, event bit is defined in SPE_EVENTS. unsigned short lat; // latency, Number of cycles between the time when an operation is dispatched and the time when the operation is executed. + unsigned short source; // data source, used to record the source of data accessed by a load operation. }; struct MetricDataExt { @@ -105,6 +106,7 @@ void IPmuGetSpeDataExt(struct PmuData* data, struct SpeDataExt* speExt) { speExt->va = data->ext->va; speExt->event = data->ext->event; speExt->lat = data->ext->lat; + speExt->source = data->ext->source; } } @@ -221,6 +223,24 @@ var ( SPE_EVENT_MISPREDICTED C.enum_SpeEventFilter = C.SPE_EVENT_MISPREDICTED ) +// spe data source hit, used for determining data source type +var ( + HIP_PEER_CPU uint16 = 0 + HIP_PEER_CPU_HITM uint16 = 1 + HIP_L3 uint16 = 2 + HIP_L3_HITM uint16 = 3 + HIP_PEER_CLUSTER uint16 = 4 + HIP_PEER_CLUSTER_HITM uint16 = 5 + HIP_REMOTE_SOCKET uint16 = 6 + HIP_REMOTE_SOCKET_HITM uint16 = 7 + HIP_LOCAL_MEM uint16 = 8 + HIP_REMOTE_MEM uint16 = 9 + HIP_NC_DEV uint16 = 13 + HIP_L2 uint16 = 16 + HIP_L2_HITM uint16 = 17 + HIP_L1 uint16 = 18 +) + // branch sample type, for pmuAttr.BranchSampleFilter var ( /** @@ -363,6 +383,7 @@ type SpeDataExt struct { Va uint64 // virtual address Event uint64 // event id, which is a bit map of mixed events, events bits Lat uint16 // latency, Number of cycles between the time when an operation is dispatched and the time when the operation is executed + Source uint16 // data source, used to record the source of data accessed by a load operation. } type BranchSampleRecord struct { @@ -1295,7 +1316,7 @@ func transferCPmuDataToGoData(cPmuData *C.struct_PmuData, dataLen int, fd int) [ func (data *PmuData) appendSpeExt(pmuData C.struct_PmuData) { speDataExt := C.struct_SpeDataExt{} C.IPmuGetSpeDataExt(&pmuData, &speDataExt) - data.SpeExt = SpeDataExt{Pa:uint64(speDataExt.pa), Va: uint64(speDataExt.va), Event: uint64(speDataExt.event), Lat: uint16(speDataExt.lat)} + data.SpeExt = SpeDataExt{Pa:uint64(speDataExt.pa), Va: uint64(speDataExt.va), Event: uint64(speDataExt.event), Lat: uint16(speDataExt.lat), Source: uint16(speDataExt.source)} } func (data *PmuData) appendSymbols(pmuData C.struct_PmuData) { diff --git a/go/src/libkperf_test/libkperf_test.go b/go/src/libkperf_test/libkperf_test.go index c8c284ceb7f2e547ecfb6c3bfc94975f9562ccf1..9b4d63947c1caf08346cc3d575bfe3bb6e2f1104 100644 --- a/go/src/libkperf_test/libkperf_test.go +++ b/go/src/libkperf_test/libkperf_test.go @@ -109,7 +109,7 @@ func TestSpe(t *testing.T) { for _, o := range dataVo.GoData { t.Logf("================================Get Spe data success================================") t.Logf("spe base info comm=%v, evt=%v, pid=%v, tid=%v, coreId=%v, numaId=%v, sockedId=%v", o.Comm, o.Evt, o.Pid, o.Tid, o.CpuTopo.CoreId, o.CpuTopo.NumaId, o.CpuTopo.SocketId) - t.Logf("spe ext info pa=%v, va=%v, event=%v, latency=%v", o.SpeExt.Pa, o.SpeExt.Va, o.SpeExt.Event, o.SpeExt.Lat) + t.Logf("spe ext info pa=%v, va=%v, event=%v, latency=%v, source=%v", o.SpeExt.Pa, o.SpeExt.Va, o.SpeExt.Event, o.SpeExt.Lat, o.SpeExt.Source) for _, s := range o.Symbols { t.Logf("symbol info module=%v, symbolName=%v, mangleName=%v, addr=%#x, lineNum=%v fileName=%v", s.Module, s.SymbolName, s.MangleName, s.Addr, s.LineNum, s.FileName) } diff --git a/include/pmu.h b/include/pmu.h index 4db13cdae6e0b0bb89ea6ace35c29dbf711237ee..c8115c9358a0cca402ac2d0240fbaa6de3ffcfe8 100644 --- a/include/pmu.h +++ b/include/pmu.h @@ -218,6 +218,23 @@ enum SPE_EVENTS { SPE_EV_EMPTY_PRED = 1 << 18, }; +enum HIP_DATA_SOURCE { + HIP_PEER_CPU = 0, + HIP_PEER_CPU_HITM = 1, + HIP_L3 = 2, + HIP_L3_HITM = 3, + HIP_PEER_CLUSTER = 4, + HIP_PEER_CLUSTER_HITM = 5, + HIP_REMOTE_SOCKET = 6, + HIP_REMOTE_SOCKET_HITM = 7, + HIP_LOCAL_MEM = 8, + HIP_REMOTE_MEM = 9, + HIP_NC_DEV = 13, + HIP_L2 = 16, + HIP_L2_HITM = 17, + HIP_L1 = 18, +}; + struct BranchSampleRecord { unsigned long fromAddr; unsigned long toAddr; @@ -233,6 +250,7 @@ struct PmuDataExt { unsigned long va; // virtual address unsigned long event; // event id, which is a bit map of mixed events, event bit is defined in SPE_EVENTS. unsigned short lat; // latency, Number of cycles between the time when an operation is dispatched and the time when the operation is executed. + unsigned short source; // data source, used to record the source of data accessed by a load operation. }; struct { diff --git a/pmu/decoder/arm_spe_decoder.cpp b/pmu/decoder/arm_spe_decoder.cpp index 4313a67dedbac573409fdf47b8c964ed8ba848d6..41a6bb41f0f6e71e0bf03cf5f3f89184cd23df21 100644 --- a/pmu/decoder/arm_spe_decoder.cpp +++ b/pmu/decoder/arm_spe_decoder.cpp @@ -74,6 +74,7 @@ static uint8_t* Get0B01Pkt(uint16_t header, struct SpePacket *pkt, uint8_t *buf) pkt->type = SpePacketType::SPE_PACKET_DATA_SOURCE; buf += sizeof(uint8_t); pkt->payloadSize = 1 << ((header & 0b110000) >> 4); + SetPktPayload(pkt, buf); buf += pkt->payloadSize; } else if ((header >> 2) == 0b011001) { pkt->type = SpePacketType::SPE_PACKET_CONTEXT; @@ -216,6 +217,11 @@ static void DecodeTimestampPkt(struct SpePacket *pkt, struct SpeRecord *record) record->timestamp = pkt->payload; } +static void DecodeDataSrcPkt(struct SpePacket *pkt, struct SpeRecord *record) +{ + record->source = pkt->payload; +} + static void DecodePkt(struct SpePacket *pkt, struct SpeRecord *record) { switch (pkt->type) { @@ -229,6 +235,7 @@ static void DecodePkt(struct SpePacket *pkt, struct SpeRecord *record) DecodeCounterPkt(pkt, record); break; case SpePacketType::SPE_PACKET_DATA_SOURCE: + DecodeDataSrcPkt(pkt, record); break; case SpePacketType::SPE_PACKET_END: break; @@ -253,6 +260,7 @@ SpeRecord *SpeGetRecord(uint8_t *buf, uint8_t *end, struct SpeRecord *rec, int * rec->pid = -1; rec->tid = -1; + rec->source = -1; while (buf < end) { if (*remainSize < 1) { break; @@ -265,6 +273,7 @@ SpeRecord *SpeGetRecord(uint8_t *buf, uint8_t *end, struct SpeRecord *rec, int * *remainSize -= 1; rec->pid = -1; rec->tid = -1; + rec->source = -1; } } diff --git a/pmu/spe.h b/pmu/spe.h index d71f63bb5549dd0e1eb0c07ef5e7336621ab84f3..dd23d224b1f7e1a027d476500ee0de8497693dec 100644 --- a/pmu/spe.h +++ b/pmu/spe.h @@ -87,6 +87,7 @@ struct SpeRecord { uint64_t timestamp; uint64_t pc; uint16_t lat; + uint16_t source; }; struct PerfEventSample { diff --git a/pmu/spe_sampler.cpp b/pmu/spe_sampler.cpp index 253839e1bafb24622fcb6a0be5414fa8f8bffab9..1e2f139df3956838842d3db48bc5aecb0a05980c 100644 --- a/pmu/spe_sampler.cpp +++ b/pmu/spe_sampler.cpp @@ -121,6 +121,7 @@ namespace KUNPENG_PMU { current.ext->va = rec->va; current.ext->pa = rec->pa; current.ext->lat = rec->lat; + current.ext->source = rec->source; current.ts = static_cast(rec->timestamp); current.comm = procTopo ? procTopo->comm : nullptr; // Assign pc, which will be parsed to Symbol in PmuRead. diff --git a/python/modules/_libkperf/Pmu.py b/python/modules/_libkperf/Pmu.py index cc42c50cc9790daf3f9a6af3d69d0e6c2e3f6e05..b1cc94fcf7f61abc2883538bba35220fe2710c52 100644 --- a/python/modules/_libkperf/Pmu.py +++ b/python/modules/_libkperf/Pmu.py @@ -1088,18 +1088,21 @@ class CytpesSpeDataExt(ctypes.Structure): ('va', ctypes.c_ulong), ('event', ctypes.c_ulong), ('lat', ctypes.c_ushort), + ('source', ctypes.c_ushort), ] def __init__(self, pa=0, va=0, event=0, lat=0, + source=0, *args, **kw): super(CytpesSpeDataExt, self).__init__(*args, **kw) self.pa = ctypes.c_ulong(pa) self.va = ctypes.c_ulong(va) self.event = ctypes.c_ulong(event) self.lat = ctypes.c_ushort(lat) + self.source = ctypes.c_ushort(source) class PmuDataExtUnion(ctypes.Union): _fields_ = [ @@ -1143,6 +1146,10 @@ class PmuDataExt: def lat(self): return self.c_pmu_data_ext.ext.speDataExt.lat + @property + def source(self): + return self.c_pmu_data_ext.ext.speDataExt.source + @property def branchRecords(self): if self.__c_pmu_data_ext.ext.branchRecords.branchRecords: diff --git a/python/modules/kperf/pmu.py b/python/modules/kperf/pmu.py index dfc1d4485eb2fb51aabbe63a73e2008a61c207d9..4ee579e99d44d341d322607e0a22dd0c99bcec75 100644 --- a/python/modules/kperf/pmu.py +++ b/python/modules/kperf/pmu.py @@ -72,6 +72,21 @@ class SpeEvent: SPE_EV_PARTIAL_PRED = 1 << 17 SPE_EV_EMPTY_PRED = 1 << 18 +class HitDataSource: + HIP_PEER_CPU = 0 + HIP_PEER_CPU_HITM = 1 + HIP_L3 = 2 + HIP_L3_HITM = 3 + HIP_PEER_CLUSTER = 4 + HIP_PEER_CLUSTER_HITM = 5 + HIP_REMOTE_SOCKET = 6 + HIP_REMOTE_SOCKET_HITM = 7 + HIP_LOCAL_MEM = 8 + HIP_REMOTE_MEM = 9 + HIP_NC_DEV = 13 + HIP_L2 = 16 + HIP_L2_HITM = 17 + HIP_L1 = 18 class BranchSampleFilter: KPERF_NO_BRANCH_SAMPLE = 0 @@ -609,6 +624,7 @@ __all__ = [ 'SpeFilter', 'SpeEventFilter', 'SpeEvent', + 'HitDataSource', 'SymbolMode', 'PmuAttr', 'PmuDeviceMetric',