2 Star 8 Fork 9

OpenCloudOS/perf-prof

加入 Gitee
与超过 1400万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
perfeval.c 7.63 KB
一键复制 编辑 原始数据 按行查看 历史
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <linux/hashtable.h>
#include <linux/thread_map.h>
#include <monitor.h>
/*
* Performance evaluation
*
* Sampling events can have a performance impact. Events are attached to the CPU, which
* will cause the CPU utilization to increase. Events attached to thread context will
* cause thread utilization to increase, and calls to __perf_event_task_sched_in()/_out()
* will also cause more increases.
*
* In addition, event filtering also causes more increases. The more filter logic operations
* there are, the more increases.
*
* On the trend, the more events are sampled, the more the utilization increases. Therefore,
* the performance evaluation is to count the number of events sampled in the cpu/thread
* context. Above a certain limit, sampling is disabled.
*/
#define PERFEVAL_HASHBITS (6)
struct perfeval_node {
struct hlist_node node;
u32 cpu_tid; // cpu or tid;
u64 samples;
};
#define perfeval_node_add(hashtable, obj, key) \
obj->cpu_tid = (key); \
hlist_add_head(&obj->node, &hashtable[hash_min((key), PERFEVAL_HASHBITS)])
#define perfeval_node_find(hashtable, obj, key) \
hlist_for_each_entry(obj, &hashtable[hash_min((key), PERFEVAL_HASHBITS)], node) \
if (obj->cpu_tid == (key))
#define perfeval_node_for_each(hashtable, obj, bkt) \
for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < (1 << PERFEVAL_HASHBITS); (bkt)++) \
hlist_for_each_entry(obj, &hashtable[bkt], node)
void perfeval_sample(struct prof_dev *dev, union perf_event *event, int instance)
{
struct perfeval_node *node;
u32 cpu_tid[2] = {0};
int i;
if (likely(!(dev->perfeval[0].hashmap || dev->perfeval[1].hashmap)))
return;
if (event->header.type == PERF_RECORD_DEV) {
cpu_tid[0] = ((struct perf_record_dev *)event)->cpu;
cpu_tid[1] = ((struct perf_record_dev *)event)->tid;
} else {
if (dev->perfeval[0].hashmap)
cpu_tid[0] = *(u32 *)((void *)event->sample.array + dev->perfeval[0].mem_pos);
if (dev->perfeval[1].hashmap)
cpu_tid[1] = *(u32 *)((void *)event->sample.array + dev->perfeval[1].mem_pos);
}
// Count events that occurred in the specified cpu/tid context.
for (i = 0; i < 2; i ++) {
struct performance_evaluation *perfeval = &dev->perfeval[i];
if (perfeval->hashmap) {
perfeval->sampled_events ++;
perfeval_node_find(perfeval->hashmap, node, cpu_tid[i]) {
if (node->samples++ == 0)
perfeval->nr_ins ++;
perfeval->matched_events ++;
break;
}
}
}
}
void perfeval_evaluate(struct prof_dev *dev)
{
const char *str[2] = {"cpu", "tid"};
struct perfeval_node *node;
int i, j;
if (likely(!(dev->perfeval[0].hashmap || dev->perfeval[1].hashmap)))
return;
for (i = 0; i < 2; i ++) {
struct performance_evaluation *perfeval = &dev->perfeval[i];
if (perfeval->hashmap && perfeval->sampled_events) {
long sampled_interval = perfeval->sampled_events * 1000 / dev->env->interval;
long matched_interval = perfeval->matched_events * 1000 / dev->env->interval;
bool disable = false;
// Exceeding the limit, exit perf-prof.
if (dev->env->sampling_limit && matched_interval / perfeval->nr_ins > dev->env->sampling_limit) {
disable = true;
prof_dev_close(dev);
}
print_time(stdout);
printf("%s: perfeval %s(%s): sampled %lu events matched %lu on %d instances%s\n", dev->prof->name,
i == 0 ? "cpus" : "pids", i == 0 ? dev->env->perfeval_cpus : dev->env->perfeval_pids,
sampled_interval, matched_interval, perfeval->nr_ins,
disable ? ", exceeds the limit, exit" : "");
perfeval_node_for_each(perfeval->hashmap, node, j) {
if (node->samples) {
if (disable) {
print_time(stdout);
printf("%s: perfeval: %s %u sampled %lu events\n", dev->prof->name, str[i],
node->cpu_tid, node->samples);
}
node->samples = 0;
}
}
perfeval->nr_ins = 0;
perfeval->matched_events = 0;
perfeval->sampled_events = 0;
}
}
}
void perfeval_free(struct prof_dev *dev)
{
struct env *env = dev->env;
if (!env->interval ||
!(env->perfeval_cpus || env->perfeval_pids))
return ;
if (dev->perfeval[0].hashmap)
free(dev->perfeval[0].hashmap);
if (dev->perfeval[1].hashmap)
free(dev->perfeval[1].hashmap);
}
int perfeval_init(struct prof_dev *dev)
{
struct env *env = dev->env;
struct prof_dev *source, *tmp;
struct perf_cpu_map *cpus = NULL;
struct perf_thread_map *threads = NULL;
struct performance_evaluation *perfeval;
struct hlist_head *hash;
struct perfeval_node *node;
int idx, cpu, thread;
int tid_pos = dev->pos.tid_pos;
int cpu_pos = dev->pos.cpu_pos;
if (!env->interval ||
!(env->perfeval_cpus || env->perfeval_pids))
return 0;
if (env->perfeval_cpus) {
if (cpu_pos < 0)
goto out_free;
// Check the cpu_pos of the forwarding source device.
for_each_source_dev_get(source, tmp, dev)
if (source->pos.cpu_pos < 0 ||
cpu_pos != sizeof(u32)+sizeof(u32)+sizeof(u64)+sizeof(u64) /* struct perf_record_dev cpu pos */) {
prof_dev_put(source);
goto out_free;
}
perfeval = &dev->perfeval[0];
perfeval->mem_pos = cpu_pos;
cpus = perf_cpu_map__new(env->perfeval_cpus);
perfeval->hashmap = zalloc((sizeof(struct hlist_head) << PERFEVAL_HASHBITS) +
sizeof(*node) * perf_cpu_map__nr(cpus));
if (!cpus || !perfeval->hashmap)
goto out_free;
hash = perfeval->hashmap;
node = (void *)(hash + (1 << PERFEVAL_HASHBITS));
__hash_init(hash, 1 << PERFEVAL_HASHBITS);
perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
perfeval_node_add(hash, node, cpu);
node ++;
}
}
if (env->perfeval_pids) {
if (tid_pos < 0)
goto out_free;
// Check the tid_pos of the forwarding source device.
for_each_source_dev_get(source, tmp, dev)
if (source->pos.tid_pos < 0 ||
tid_pos != sizeof(u32) /* struct perf_record_dev tid pos */) {
prof_dev_put(source);
goto out_free;
}
perfeval = &dev->perfeval[1];
perfeval->mem_pos = tid_pos + sizeof(u32); // tid
threads = thread_map__new_str(env->perfeval_pids, NULL, 0, 0);
perfeval->hashmap = zalloc((sizeof(struct hlist_head) << PERFEVAL_HASHBITS) +
sizeof(*node) * perf_thread_map__nr(threads));
if (!threads || !perfeval->hashmap)
goto out_free;
hash = perfeval->hashmap;
node = (void *)(hash + (1 << PERFEVAL_HASHBITS));
__hash_init(hash, 1 << PERFEVAL_HASHBITS);
perf_thread_map__for_each_thread(thread, idx, threads) {
perfeval_node_add(hash, node, thread);
node ++;
}
}
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
return 0;
out_free:
fprintf(stderr, "perfeval init failed\n");
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
perfeval_free(dev);
return -1;
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/OpenCloudOS/perf-prof.git
git@gitee.com:OpenCloudOS/perf-prof.git
OpenCloudOS
perf-prof
perf-prof
main

搜索帮助