From acadd780433f5ce9603f65b39d39b9ce830bea59 Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Fri, 10 Nov 2023 11:21:19 +0800 Subject: [PATCH 1/2] anolis: sched: fix percpu account for CPUTIME_*IDLE ANBZ: #6201 Since commit 0a1658bedfa7 ("sched/core: add forced idle accounting for cgroups") add percpu status CPUTIME_FORCEIDLE, but task_group_account_field() will add time delta on the current cpu instead of the forceidle cpu. As a result, the total sum is correct, but percpu value is wrong. Fix it by adding delta to the right cpu. Signed-off-by: Tianchen Ding --- kernel/sched/cputime.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index dd50c054cf54..b8cd46ef54c3 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -233,12 +233,16 @@ void account_idle_time(u64 cputime) */ void __account_sibidle_time(struct task_struct *p, u64 delta, bool fi) { + unsigned int cpu = task_cpu(p); + __schedstat_add(p->se.statistics.core_sibidle_sum, delta); - task_group_account_field(p, CPUTIME_SIBIDLE, delta); + kcpustat_cpu(cpu).cpustat[CPUTIME_SIBIDLE] += delta; + cgroup_account_cputime_field(p, CPUTIME_SIBIDLE, delta); #ifdef CONFIG_SCHED_CORE if (fi) { __schedstat_add(p->se.statistics.core_forceidle_sum, delta); - task_group_account_field(p, CPUTIME_FORCEIDLE, delta); + kcpustat_cpu(cpu).cpustat[CPUTIME_FORCEIDLE] += delta; + cgroup_account_cputime_field(p, CPUTIME_FORCEIDLE, delta); } #endif } -- Gitee From 1813864efb12019d5d265672dc85248144a9a348 Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Thu, 9 Nov 2023 14:30:03 +0800 Subject: [PATCH 2/2] anolis: sched: add forceidled time for task and cgroup ANBZ: #6201 Account forceidled time for task in procfs.(previously only has forceidle time but no forceidled time) Account forceidled time for cgroup in cpu.sched_cfs_statistics(cgroup v2) or cpuacct.sched_cfs_statistics(cgroup v1), take the forceidled time out of "Queue other time", become the 6th value "Force idled time". Signed-off-by: Tianchen Ding --- include/linux/sched.h | 4 ++-- kernel/sched/core.c | 12 ++++++++---- kernel/sched/cpuacct.c | 12 ++++++++---- kernel/sched/debug.c | 2 ++ kernel/sched/fair.c | 21 +++++++++++++++------ kernel/sched/sched.h | 20 ++++++++++++++++++++ 6 files changed, 55 insertions(+), 16 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index cf7602442fcc..a1209d7b6d55 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -504,8 +504,8 @@ struct sched_statistics { u64 core_sibidle_sum; #endif - CK_KABI_RESERVE(1) - CK_KABI_RESERVE(2) + CK_KABI_USE(1, unsigned long forceidled_sum) + CK_KABI_USE(2, unsigned long forceidled_sum_base) CK_KABI_RESERVE(3) CK_KABI_RESERVE(4) CK_KABI_RESERVE(5) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5bc3d04a3292..29fdd5c0f8a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10097,7 +10097,7 @@ static int cpu_sched_cfs_show(struct seq_file *sf, void *v) struct task_group *tg = css_tg(seq_css(sf)); struct sched_entity *se; int cpu; - u64 wait_max = 0, wait_sum = 0, wait_sum_other = 0, exec_sum = 0; + u64 wait_max = 0, wait_sum = 0, wait_sum_other = 0, wait_sum_fi = 0, exec_sum = 0; if (!schedstat_enabled()) goto out_show; @@ -10110,16 +10110,20 @@ static int cpu_sched_cfs_show(struct seq_file *sf, void *v) exec_sum += schedstat_val(se->sum_exec_runtime); wait_sum_other += schedstat_val(se->statistics.parent_wait_contrib); + wait_sum_fi += schedstat_val(se->statistics.forceidled_sum); wait_sum += schedstat_val(se->statistics.wait_sum); wait_max = max(wait_max, schedstat_val(se->statistics.wait_max)); } rcu_read_unlock(); out_show: - /* [Serve time] [On CPU time] [Queue other time] [Queue sibling time] [Queue max time] */ - seq_printf(sf, "%lld %lld %lld %lld %lld\n", + /* + * [Serve time] [On CPU time] [Queue other time] + * [Queue sibling time] [Queue max time] [Force idled time] + */ + seq_printf(sf, "%lld %lld %lld %lld %lld %lld\n", exec_sum + wait_sum, exec_sum, wait_sum_other, - wait_sum - wait_sum_other, wait_max); + wait_sum - wait_sum_other - wait_sum_fi, wait_max, wait_sum_fi); return 0; } diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 81c2f9452d23..5e7f17a1796a 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -1013,7 +1013,7 @@ static int cpuacct_sched_cfs_show(struct seq_file *sf, void *v) struct task_group *tg = cgroup_tg(cgrp); struct sched_entity *se; int cpu; - u64 wait_max = 0, wait_sum = 0, wait_sum_other = 0, exec_sum = 0; + u64 wait_max = 0, wait_sum = 0, wait_sum_other = 0, wait_sum_fi = 0, exec_sum = 0; u64 expel_sum = 0, steal_high = 0; if (!schedstat_enabled()) @@ -1035,6 +1035,7 @@ static int cpuacct_sched_cfs_show(struct seq_file *sf, void *v) exec_sum += schedstat_val(se->sum_exec_runtime); wait_sum_other += schedstat_val(se->statistics.parent_wait_contrib); + wait_sum_fi += schedstat_val(se->statistics.forceidled_sum); wait_sum += schedstat_val(se->statistics.wait_sum); wait_max = max(wait_max, schedstat_val(se->statistics.wait_max)); @@ -1044,10 +1045,13 @@ static int cpuacct_sched_cfs_show(struct seq_file *sf, void *v) rcu_unlock_show: rcu_read_unlock(); out_show: - /* [Serve time] [On CPU time] [Queue other time] [Queue sibling time] [Queue max time] */ - seq_printf(sf, "%lld %lld %lld %lld %lld\n", + /* + * [Serve time] [On CPU time] [Queue other time] + * [Queue sibling time] [Queue max time] [Force idled time] + */ + seq_printf(sf, "%lld %lld %lld %lld %lld %lld\n", exec_sum + wait_sum, exec_sum, wait_sum_other, - wait_sum - wait_sum_other, wait_max); + wait_sum - wait_sum_other - wait_sum_fi, wait_max, wait_sum_fi); seq_printf(sf, "%lld %lld %lld %lld\n", steal_high, 0llu, expel_sum, 0llu); diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2a245bcaf7dc..c0eadb324a67 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -451,6 +451,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group PN_SCHEDSTAT(se->statistics.wait_max); PN_SCHEDSTAT(se->statistics.wait_sum); PN_SCHEDSTAT(se->statistics.parent_wait_contrib); + PN_SCHEDSTAT(se->statistics.forceidled_sum); P_SCHEDSTAT(se->statistics.wait_count); } @@ -988,6 +989,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN_SCHEDSTAT(se.statistics.wait_max); PN_SCHEDSTAT(se.statistics.wait_sum); PN_SCHEDSTAT(se.statistics.parent_wait_contrib); + PN_SCHEDSTAT(se.statistics.forceidled_sum); P_SCHEDSTAT(se.statistics.wait_count); PN_SCHEDSTAT(se.statistics.iowait_sum); P_SCHEDSTAT(se.statistics.iowait_count); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9dd91723104f..03012e0ed6fd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2611,9 +2611,10 @@ static void update_curr_fair(struct rq *rq) static inline void update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) { - u64 wait_start, prev_wait_start; - u64 parent_wait_sum, delta, clock = rq_clock(rq_of(cfs_rq)); + u64 wait_start, prev_wait_start, parent_wait_sum, forceidled_sum, delta; struct sched_entity *pse = parent_entity(se); + struct rq *rq = rq_of(cfs_rq); + u64 clock = rq_clock(rq); if (!schedstat_enabled()) return; @@ -2627,6 +2628,9 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) __schedstat_set(se->statistics.wait_start, wait_start); + forceidled_sum = get_forceidled_sum(rq); + __schedstat_set(se->statistics.forceidled_sum_base, forceidled_sum); + if (!pse) return; @@ -2641,10 +2645,11 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) static inline void update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct task_struct *p; + u64 parent_wait_sum, forceidled_sum, forceidled_delta, delta; struct sched_entity *pse = parent_entity(se); - u64 parent_wait_sum, clock = rq_clock(rq_of(cfs_rq)); - u64 delta; + struct rq *rq = rq_of(cfs_rq); + u64 clock = rq_clock(rq); + struct task_struct *p; if (!schedstat_enabled()) return; @@ -2672,6 +2677,10 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) __schedstat_add(se->statistics.wait_sum, delta); __schedstat_set(se->statistics.wait_start, 0); + forceidled_sum = get_forceidled_sum(rq); + forceidled_delta = forceidled_sum - schedstat_val(se->statistics.forceidled_sum_base); + __schedstat_add(se->statistics.forceidled_sum, forceidled_delta); + if (!pse) return; @@ -2681,7 +2690,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) else delta = 0; parent_wait_sum = schedstat_val(pse->statistics.wait_sum) + delta; - delta = parent_wait_sum - + delta = parent_wait_sum - forceidled_delta - schedstat_val(se->statistics.parent_wait_sum_base); __schedstat_add(se->statistics.parent_wait_contrib, delta); } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b2edbc4bd93e..b95e0e0f14b6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1483,6 +1483,21 @@ static inline bool sched_core_enqueued(struct task_struct *p) return !RB_EMPTY_NODE(&p->core_node); } +static inline u64 get_forceidled_sum(struct rq *rq) +{ + const int cpu = cpu_of(rq); + const struct cpumask *smt_mask = cpu_smt_mask(cpu); + int i; + + /* We assume smt == 2 here. */ + for_each_cpu(i, smt_mask) { + if (i != cpu) + return kcpustat_cpu(i).cpustat[CPUTIME_FORCEIDLE]; + } + + return 0; +} + extern void sched_core_enqueue(struct rq *rq, struct task_struct *p); extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags); @@ -1528,6 +1543,11 @@ static inline bool sched_group_cookie_match(struct rq *rq, { return true; } + +static inline u64 get_forceidled_sum(struct rq *rq) +{ + return 0; +} #endif /* CONFIG_SCHED_CORE */ static inline void lockdep_assert_rq_held(struct rq *rq) -- Gitee