diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index d702ed7d2268f725c5d111da5c500760cd847eba..aa02523f2426fef2506460da1565e10b620a5042 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -328,6 +328,14 @@ struct cgroup_base_stat { #endif }; +struct cgroup_base_stat_task { +#ifdef CONFIG_SCHED_CORE + unsigned long forceidle_task_sum; +#endif +#if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) + unsigned long sibidle_task_sum; +#endif +}; /* * rstat - cgroup scalable recursive statistics. Accounting is done * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the @@ -375,6 +383,8 @@ struct cgroup_rstat_cpu { */ struct cgroup *updated_children; /* terminated by self cgroup */ struct cgroup *updated_next; /* NULL iff not on the list */ + CK_KABI_EXTEND(struct cgroup_base_stat_task bstat_task) + CK_KABI_EXTEND(struct cgroup_base_stat_task last_bstat_task) }; struct cgroup_freezer_state { @@ -534,10 +544,8 @@ struct cgroup { struct kernfs_root *hidden_place; /* tree to hide cgroup in pool. */ struct delayed_work supply_pool_work; - CK_KABI_RESERVE(1) - CK_KABI_RESERVE(2) - CK_KABI_RESERVE(3) - CK_KABI_RESERVE(4) + CK_KABI_USE(1, 2, struct cgroup_base_stat_task last_bstat_task) + CK_KABI_USE(3, 4, struct cgroup_base_stat_task bstat_task) /* ids of the ancestors at each level including self */ u64 ancestor_ids[]; diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 3b3a4467e006a6af9869d2fcbfd20b284b14fb53..0d4ba013e80f70352aadf9569bd9471ac77d1edf 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -30,9 +30,11 @@ enum cpu_usage_stat { CPUTIME_GUEST_NICE, #ifdef CONFIG_SCHED_CORE CPUTIME_FORCEIDLE, + CPUTIME_FORCEIDLE_TASK, #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) CPUTIME_SIBIDLE, + CPUTIME_SIBIDLE_TASK, #endif NR_STATS, }; @@ -123,7 +125,7 @@ extern void account_process_tick(struct task_struct *, int user); extern void account_idle_ticks(unsigned long ticks); #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) -extern void __account_sibidle_time(struct task_struct *tsk, u64 delta, bool fi); +extern void __account_sibidle_time(struct task_struct *tsk, u64 delta, u64 delta_task, bool fi); #endif #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2b9e025b5c9d0f175da87759c034d4faadddc9ab..643affda5cfb7652d4439032561787387726e9dd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -506,8 +506,16 @@ struct sched_statistics { CK_KABI_USE(1, unsigned long forceidled_sum) CK_KABI_USE(2, unsigned long forceidled_sum_base) +#ifdef CONFIG_SCHED_CORE + CK_KABI_USE(3, unsigned long core_forceidle_task_sum) +#else CK_KABI_RESERVE(3) +#endif +#if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) + CK_KABI_USE(4, unsigned long core_sibidle_task_sum) +#else CK_KABI_RESERVE(4) +#endif CK_KABI_RESERVE(5) CK_KABI_RESERVE(6) CK_KABI_RESERVE(7) diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 9419a590a6fce161ede7e35f595238a224d09705..a08b3d8aa087f3ce7ef49242e657d7b35d43ce3c 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -305,6 +305,17 @@ static void cgroup_base_stat_add(struct cgroup_base_stat *dst_bstat, #endif } +static void cgroup_base_stat_task_add(struct cgroup_base_stat_task *dst_bstat_task, + struct cgroup_base_stat_task *src_bstat_task) +{ +#ifdef CONFIG_SCHED_CORE + dst_bstat_task->forceidle_task_sum += src_bstat_task->forceidle_task_sum; +#endif +#if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) + dst_bstat_task->sibidle_task_sum += src_bstat_task->sibidle_task_sum; +#endif +} + static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, struct cgroup_base_stat *src_bstat) { @@ -319,17 +330,30 @@ static void cgroup_base_stat_sub(struct cgroup_base_stat *dst_bstat, #endif } +static void cgroup_base_stat_task_sub(struct cgroup_base_stat_task *dst_bstat_task, + struct cgroup_base_stat_task *src_bstat_task) +{ +#ifdef CONFIG_SCHED_CORE + dst_bstat_task->forceidle_task_sum -= src_bstat_task->forceidle_task_sum; +#endif +#if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) + dst_bstat_task->sibidle_task_sum -= src_bstat_task->sibidle_task_sum; +#endif +} + static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_rstat_cpu *rstatc = cgroup_rstat_cpu(cgrp, cpu); struct cgroup_base_stat delta; + struct cgroup_base_stat_task delta_task; unsigned seq; /* fetch the current per-cpu values */ do { seq = __u64_stats_fetch_begin(&rstatc->bsync); delta = rstatc->bstat; + delta_task = rstatc->bstat_task; } while (__u64_stats_fetch_retry(&rstatc->bsync, seq)); /* propagate percpu delta to global */ @@ -337,12 +361,21 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) cgroup_base_stat_add(&cgrp->bstat, &delta); cgroup_base_stat_add(&rstatc->last_bstat, &delta); + cgroup_base_stat_task_sub(&delta_task, &rstatc->last_bstat_task); + cgroup_base_stat_task_add(&cgrp->bstat_task, &delta_task); + cgroup_base_stat_task_add(&rstatc->last_bstat_task, &delta_task); + /* propagate global delta to parent */ if (parent) { delta = cgrp->bstat; cgroup_base_stat_sub(&delta, &cgrp->last_bstat); cgroup_base_stat_add(&parent->bstat, &delta); cgroup_base_stat_add(&cgrp->last_bstat, &delta); + + delta_task = cgrp->bstat_task; + cgroup_base_stat_task_sub(&delta_task, &cgrp->last_bstat_task); + cgroup_base_stat_task_add(&parent->bstat_task, &delta_task); + cgroup_base_stat_task_add(&cgrp->last_bstat_task, &delta_task); } } @@ -394,11 +427,17 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, case CPUTIME_FORCEIDLE: rstatc->bstat.forceidle_sum += delta_exec; break; + case CPUTIME_FORCEIDLE_TASK: + rstatc->bstat_task.forceidle_task_sum += delta_exec; + break; #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) case CPUTIME_SIBIDLE: rstatc->bstat.sibidle_sum += delta_exec; break; + case CPUTIME_SIBIDLE_TASK: + rstatc->bstat_task.sibidle_task_sum += delta_exec; + break; #endif default: break; @@ -413,12 +452,14 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, * with how it is done by __cgroup_account_cputime_field for each bit of * cpu time attributed to a cgroup. */ -static void root_cgroup_cputime(struct cgroup_base_stat *bstat) +static void root_cgroup_cputime(struct cgroup_base_stat *bstat, + struct cgroup_base_stat_task *bstat_task) { struct task_cputime *cputime = &bstat->cputime; int i; memset(bstat, 0, sizeof(*bstat)); + memset(bstat_task, 0, sizeof(*bstat_task)); for_each_possible_cpu(i) { struct kernel_cpustat kcpustat; u64 *cpustat = kcpustat.cpustat; @@ -442,9 +483,11 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) #ifdef CONFIG_SCHED_CORE bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; + bstat_task->forceidle_task_sum += cpustat[CPUTIME_FORCEIDLE_TASK]; #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) bstat->sibidle_sum += cpustat[CPUTIME_SIBIDLE]; + bstat_task->sibidle_task_sum += cpustat[CPUTIME_SIBIDLE_TASK]; #endif } } @@ -454,11 +497,14 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) struct cgroup *cgrp = seq_css(seq)->cgroup; u64 usage, utime, stime; struct cgroup_base_stat bstat; + struct cgroup_base_stat_task bstat_task; #ifdef CONFIG_SCHED_CORE u64 forceidle_time; + u64 forceidle_task_time; #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) u64 sibidle_time; + u64 sibidle_task_time; #endif if (cgroup_parent(cgrp)) { @@ -468,21 +514,25 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) &utime, &stime); #ifdef CONFIG_SCHED_CORE forceidle_time = cgrp->bstat.forceidle_sum; + forceidle_task_time = cgrp->bstat_task.forceidle_task_sum; #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) sibidle_time = cgrp->bstat.sibidle_sum; + sibidle_task_time = cgrp->bstat_task.sibidle_task_sum; #endif cgroup_rstat_flush_release(); } else { - root_cgroup_cputime(&bstat); + root_cgroup_cputime(&bstat, &bstat_task); usage = bstat.cputime.sum_exec_runtime; utime = bstat.cputime.utime; stime = bstat.cputime.stime; #ifdef CONFIG_SCHED_CORE forceidle_time = bstat.forceidle_sum; + forceidle_task_time = bstat_task.forceidle_task_sum; #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) sibidle_time = bstat.sibidle_sum; + sibidle_task_time = bstat_task.sibidle_task_sum; #endif } @@ -491,9 +541,11 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) do_div(stime, NSEC_PER_USEC); #ifdef CONFIG_SCHED_CORE do_div(forceidle_time, NSEC_PER_USEC); + do_div(forceidle_task_time, NSEC_PER_USEC); #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) do_div(sibidle_time, NSEC_PER_USEC); + do_div(sibidle_task_time, NSEC_PER_USEC); #endif seq_printf(seq, "usage_usec %llu\n" @@ -503,8 +555,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq) #ifdef CONFIG_SCHED_CORE seq_printf(seq, "core_sched.force_idle_usec %llu\n", forceidle_time); + seq_printf(seq, "core_sched.force_idle_task_usec %llu\n", forceidle_task_time); #endif #if defined(CONFIG_SCHED_ACPU) || defined(CONFIG_SCHED_CORE) seq_printf(seq, "sibidle_usec %llu\n", sibidle_time); + seq_printf(seq, "sibidle_task_usec %llu\n", sibidle_task_time); #endif } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1549c9ffcf301f413dd449c1e72e5bba540cb76..c8218558bbb1050e5e6f03311cc4c6e70897205b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -309,6 +309,7 @@ static void __sched_core_flip(bool enabled) cpu_rq(t)->core_enabled = enabled; cpu_rq(cpu)->core->core_sibidle_start = 0; + cpu_rq(cpu)->core->core_sibidle_start_task = 0; sched_core_unlock(cpu, &flags); @@ -3968,8 +3969,9 @@ static void update_acpu(struct rq *rq, struct task_struct *prev, struct task_str const int cpu = cpu_of(rq); const struct cpumask *smt_mask = cpu_smt_mask(cpu); u64 now = rq_clock(rq); - u64 sibidle_sum, last_update_time; - s64 delta, last; + u64 now_task = rq_clock_task(rq); + u64 sibidle_sum, sibidle_task_sum, last_update_time, last_update_time_task; + s64 delta, delta_task, last, last_task; int i; if (!static_branch_likely(&acpu_enabled) || !schedstat_enabled()) @@ -4005,29 +4007,44 @@ static void update_acpu(struct rq *rq, struct task_struct *prev, struct task_str rq_i->last_acpu_update_time); last_update_time = last >= 0 ? rq->last_acpu_update_time : rq_i->last_acpu_update_time; + last_task = (s64)(rq->last_acpu_update_time_task - + rq_i->last_acpu_update_time_task); + last_update_time_task = last_task >= 0 ? + rq->last_acpu_update_time_task : + rq_i->last_acpu_update_time_task; /* * Sibling may update acpu at the same time, and it's * timestamp may be newer than this rq. */ delta = now - last_update_time; delta = delta > 0 ? delta : 0; + delta_task = now_task - last_update_time_task; + delta_task = delta_task > 0 ? delta_task : 0; /* Add the delta to improve accuracy. */ sibidle_sum = last >= 0 ? rq->sibidle_sum : rq_i->acpu_idle_sum; - if (curr_i == rq_i->idle) + sibidle_task_sum = last_task >= 0 ? rq->sibidle_task_sum : + rq_i->acpu_idle_sum; + if (curr_i == rq_i->idle) { sibidle_sum += delta; + sibidle_task_sum += delta_task; + } } } if (prev != rq->idle) { delta = sibidle_sum - rq->sibidle_sum; delta = delta > 0 ? delta : 0; - __account_sibidle_time(prev, delta, false); + delta_task = sibidle_task_sum - rq->sibidle_task_sum; + delta_task = delta_task > 0 ? delta_task : 0; + __account_sibidle_time(prev, delta, delta_task, false); } rq->sibidle_sum = sibidle_sum; + rq->sibidle_task_sum = sibidle_task_sum; out: rq->last_acpu_update_time = now; + rq->last_acpu_update_time_task = now_task; } #else static inline void update_acpu(struct rq *rq, struct task_struct *prev, struct task_struct *next) @@ -5054,6 +5071,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) sched_core_account_sibidle(rq); /* reset after accounting force idle */ rq->core->core_sibidle_start = 0; + rq->core->core_sibidle_start_task = 0; rq->core->core_sibidle_count = 0; rq->core->core_sibidle_occupation = 0; if (rq->core->core_forceidle_count) { @@ -5149,6 +5167,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (schedstat_enabled() && rq->core->core_sibidle_count) { rq->core->core_sibidle_start = rq_clock(rq->core); + rq->core->core_sibidle_start_task = rq_clock_task(rq->core); rq->core->core_sibidle_occupation = occ; } @@ -5427,6 +5446,7 @@ static void sched_core_cpu_deactivate(unsigned int cpu) * have a cookie. */ core_rq->core_sibidle_start = 0; + core_rq->core_sibidle_start_task = 0; /* install new leader */ for_each_cpu(t, smt_mask) { @@ -8335,6 +8355,7 @@ void __init sched_init(void) rq->core_sibidle_count = 0; rq->core_sibidle_occupation = 0; rq->core_sibidle_start = 0; + rq->core_sibidle_start_task = 0; rq->core_id = i; rq->cfs.core = &rq->cfs; diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index 11a04a80f57f0efd96e20a1edc22053072824836..33127983aca43cd51132edb2b2b77871d005e007 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -327,6 +327,7 @@ void __sched_core_account_sibidle(struct rq *rq) { const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq)); u64 delta, now = rq_clock(rq->core); + u64 delta_task, now_task = rq_clock_task(rq->core); struct rq *rq_i; struct task_struct *p; int i; @@ -344,10 +345,12 @@ void __sched_core_account_sibidle(struct rq *rq) goto out; delta = now - rq->core->core_sibidle_start; + delta_task = now_task - rq->core->core_sibidle_start_task; if (unlikely((s64)delta <= 0)) goto out; rq->core->core_sibidle_start = now; + rq->core->core_sibidle_start_task = now_task; if (rq->core->core_sibidle_count > 1 || rq->core->core_sibidle_occupation > 1) { @@ -358,6 +361,8 @@ void __sched_core_account_sibidle(struct rq *rq) */ delta *= rq->core->core_sibidle_count; delta = div_u64(delta, rq->core->core_sibidle_occupation); + delta_task *= rq->core->core_sibidle_count; + delta_task = div_u64(delta_task, rq->core->core_sibidle_occupation); } for_each_cpu(i, smt_mask) { @@ -371,8 +376,9 @@ void __sched_core_account_sibidle(struct rq *rq) * Note: this will account sibidle to the current cpu, even * if it comes from our SMT sibling. */ - __account_sibidle_time(p, delta, !!rq->core->core_forceidle_count); - account_ht_aware_quota(p, delta); + __account_sibidle_time(p, delta, delta_task, + !!rq->core->core_forceidle_count); + account_ht_aware_quota(p, delta_task); } out:; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index b8cd46ef54c38f4e062973a6588de5d52e53d22e..6c272d8e29d2a1f6aad79d27c417e5ad1c2447b1 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -231,18 +231,24 @@ void account_idle_time(u64 cputime) * * REQUIRES: schedstat is enabled. */ -void __account_sibidle_time(struct task_struct *p, u64 delta, bool fi) +void __account_sibidle_time(struct task_struct *p, u64 delta, u64 delta_task, bool fi) { unsigned int cpu = task_cpu(p); __schedstat_add(p->se.statistics.core_sibidle_sum, delta); + __schedstat_add(p->se.statistics.core_sibidle_task_sum, delta_task); kcpustat_cpu(cpu).cpustat[CPUTIME_SIBIDLE] += delta; + kcpustat_cpu(cpu).cpustat[CPUTIME_SIBIDLE_TASK] += delta_task; cgroup_account_cputime_field(p, CPUTIME_SIBIDLE, delta); + cgroup_account_cputime_field(p, CPUTIME_SIBIDLE_TASK, delta_task); #ifdef CONFIG_SCHED_CORE if (fi) { __schedstat_add(p->se.statistics.core_forceidle_sum, delta); + __schedstat_add(p->se.statistics.core_forceidle_task_sum, delta_task); kcpustat_cpu(cpu).cpustat[CPUTIME_FORCEIDLE] += delta; + kcpustat_cpu(cpu).cpustat[CPUTIME_FORCEIDLE_TASK] += delta_task; cgroup_account_cputime_field(p, CPUTIME_FORCEIDLE, delta); + cgroup_account_cputime_field(p, CPUTIME_FORCEIDLE_TASK, delta_task); } #endif } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index c0eadb324a6730fcd97530f877357dd21563fdb5..86625e7471c2a51b97a9d38831350117080ddfba 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1030,9 +1030,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) #ifdef CONFIG_SCHED_CORE PN_SCHEDSTAT(se.statistics.core_forceidle_sum); + PN_SCHEDSTAT(se.statistics.core_forceidle_task_sum); #endif #ifdef CONFIG_SCHED_ACPU PN_SCHEDSTAT(se.statistics.core_sibidle_sum); + PN_SCHEDSTAT(se.statistics.core_sibidle_task_sum); #endif } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0bddfefbed63b19eb6ed76cb26406fb86ecc3fe1..e0e09ddf2c55435cc1382ee29925ecd60670ee08 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1347,9 +1347,18 @@ struct rq { #else CK_KABI_RESERVE(3) #endif +#ifdef CONFIG_SCHED_CORE + CK_KABI_USE(4, u64 core_sibidle_start_task) +#else CK_KABI_RESERVE(4) +#endif +#ifdef CONFIG_SCHED_ACPU + CK_KABI_USE(5, u64 sibidle_task_sum) + CK_KABI_USE(6, u64 last_acpu_update_time_task) +#else CK_KABI_RESERVE(5) CK_KABI_RESERVE(6) +#endif CK_KABI_RESERVE(7) CK_KABI_RESERVE(8) };