diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 42bb0fb18c508e719901b671bddc2520fc480209..597c09d37bf8dc2c36f192a1d70775681b3dec9d 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -168,6 +168,7 @@ CONFIG_CGROUP_SCHED=y CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED_PRIO_LB=y CONFIG_FAIR_GROUP_SCHED=y +CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_SCHED_MM_CID=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index fa3673f2d435ea46f2354774c312dca5875aac97..1d82a72a8ecd3f2d4e3f4706888f9b94312d20cf 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -190,6 +190,7 @@ CONFIG_CGROUP_SCHED=y CONFIG_QOS_SCHED=y CONFIG_QOS_SCHED_PRIO_LB=y CONFIG_FAIR_GROUP_SCHED=y +CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_SCHED_MM_CID=y diff --git a/include/linux/sched.h b/include/linux/sched.h index fe8556ff7fb347330d5bc36d6f9cf0a0111dca2e..9fdd08aa96263bd2f78d9bc88d6ab834504f7c6b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -548,6 +548,11 @@ struct sched_statistics { u64 nr_wakeups_preferred_cpus; u64 nr_wakeups_force_preferred_cpus; #endif + +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + u64 nr_qos_smt_send_ipi; + u64 nr_qos_smt_expelled; +#endif #endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; @@ -2021,9 +2026,16 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); __get_task_comm(buf, sizeof(buf), tsk); \ }) +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +void qos_smt_check_need_resched(void); +#endif + #ifdef CONFIG_SMP static __always_inline void scheduler_ipi(void) { +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_check_need_resched(); +#endif /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index fbb99a61f714cbebb91ba9280ce44f812ece32de..4bafb70dfafc6a504d4b5eb2326ab20ebd0477a1 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -268,6 +268,61 @@ TRACE_EVENT(sched_switch, __entry->next_comm, __entry->next_pid, __entry->next_prio) ); +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +/* + * Tracepoint for a offline task being resched: + */ +TRACE_EVENT(sched_qos_smt_expel, + + TP_PROTO(struct task_struct *sibling_p, int qos_smt_status), + + TP_ARGS(sibling_p, qos_smt_status), + + TP_STRUCT__entry( + __array( char, sibling_comm, TASK_COMM_LEN ) + __field( pid_t, sibling_pid ) + __field( int, sibling_qos_status ) + __field( int, sibling_cpu ) + ), + + TP_fast_assign( + memcpy(__entry->sibling_comm, sibling_p->comm, TASK_COMM_LEN); + __entry->sibling_pid = sibling_p->pid; + __entry->sibling_qos_status = qos_smt_status; + __entry->sibling_cpu = task_cpu(sibling_p); + ), + + TP_printk("sibling_comm=%s sibling_pid=%d sibling_qos_status=%d sibling_cpu=%d", + __entry->sibling_comm, __entry->sibling_pid, __entry->sibling_qos_status, + __entry->sibling_cpu) +); + +/* + * Tracepoint for a offline task being expelled: + */ +TRACE_EVENT(sched_qos_smt_expelled, + + TP_PROTO(struct task_struct *p, int qos_smt_status), + + TP_ARGS(p, qos_smt_status), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( int, qos_status ) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->qos_status = qos_smt_status; + ), + + TP_printk("comm=%s pid=%d qos_status=%d", + __entry->comm, __entry->pid, __entry->qos_status) +); +#endif + /* * Tracepoint for a task being migrated: */ diff --git a/init/Kconfig b/init/Kconfig index a9394a15e0d15ac8179070ab10b941de851cee19..869eea4108d0b5a2c5bdadf29b338c28baab69ac 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1031,6 +1031,15 @@ config QOS_SCHED If in doubt, say N. +config QOS_SCHED_SMT_EXPELLER + bool "Qos smt expeller" + depends on SCHED_SMT + depends on QOS_SCHED + default n + help + This feature enable online tasks to expel offline tasks + on the smt sibling cpus, and exclusively occupy CPU resources. + config QOS_SCHED_PRIO_LB bool "Priority load balance for Qos scheduler" depends on QOS_SCHED diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index eee2d05dc90afdb76c261042ee953d5d6b894f75..ebec719e204c174269a831f9680222b9ce7a9072 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1046,6 +1046,11 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, } #endif +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + P_SCHEDSTAT(nr_qos_smt_send_ipi); + P_SCHEDSTAT(nr_qos_smt_expelled); +#endif + avg_atom = p->se.sum_exec_runtime; if (nr_switches) avg_atom = div64_ul(avg_atom, nr_switches); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 53a075b087c63f5ebe605a6ba692f1524eaa7be8..318258ea011e989937785112f28e373587166da3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -62,6 +62,10 @@ #include #endif +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +#include +#endif + #ifdef CONFIG_SCHED_STEAL #include "sparsemask.h" #endif @@ -149,6 +153,11 @@ unsigned int sysctl_offline_wait_interval = 100; /* in ms */ static int one_thousand = 1000; static int hundred_thousand = 100000; static int unthrottle_qos_cfs_rqs(int cpu); +static bool qos_smt_expelled(int this_cpu); +#endif + +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +static DEFINE_PER_CPU(int, qos_smt_status); #endif #ifdef CONFIG_QOS_SCHED_PRIO_LB @@ -5823,6 +5832,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta, dequeue = 1; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif raw_spin_lock(&cfs_b->lock); /* This will start the period timer if necessary */ @@ -5854,6 +5866,10 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif + for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); /* throttled entity or throttle-on-deactivate */ @@ -5867,6 +5883,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta; +#endif if (qcfs_rq->load.weight) { /* Avoid re-evaluating load for this entity: */ @@ -5889,6 +5908,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta; +#endif } /* At this point se is NULL and we are at root level*/ @@ -5915,6 +5937,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif se = cfs_rq->tg->se[cpu_of(rq)]; @@ -5957,6 +5982,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); @@ -5969,6 +5997,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running += task_delta; qcfs_rq->idle_h_nr_running += idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(qcfs_rq)) @@ -5986,6 +6017,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running += task_delta; qcfs_rq->idle_h_nr_running += idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(qcfs_rq)) @@ -6807,6 +6841,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + int qos_idle_h_nr_running = task_has_qos_idle_policy(p); +#endif int task_new = !(flags & ENQUEUE_WAKEUP); unsigned int prev_nr = rq->cfs.h_nr_running; @@ -6834,6 +6871,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running; +#endif if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = 1; @@ -6854,7 +6894,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; - +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running; +#endif if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = 1; @@ -6904,6 +6946,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; int idle_h_nr_running = task_has_idle_policy(p); +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + int qos_idle_h_nr_running = task_has_qos_idle_policy(p); +#endif unsigned int prev_nr = rq->cfs.h_nr_running; bool was_sched_idle = sched_idle_rq(rq); @@ -6915,6 +6960,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running--; cfs_rq->idle_h_nr_running -= idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running; +#endif if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = 1; @@ -6947,7 +6995,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) cfs_rq->h_nr_running--; cfs_rq->idle_h_nr_running -= idle_h_nr_running; - +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running; +#endif if (cfs_rq_is_idle(cfs_rq)) idle_h_nr_running = 1; @@ -8616,6 +8666,16 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ } #ifdef CONFIG_QOS_SCHED +static inline bool qos_timer_is_activated(int cpu) +{ + return hrtimer_active(per_cpu_ptr(&qos_overload_timer, cpu)); +} + +static inline void cancel_qos_timer(int cpu) +{ + hrtimer_cancel(per_cpu_ptr(&qos_overload_timer, cpu)); +} + static inline bool is_offline_task(struct task_struct *p) { return task_group(p)->qos_level == -1; @@ -8628,6 +8688,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; long task_delta, idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif unsigned int prev_nr = cfs_rq->h_nr_running; se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; @@ -8639,6 +8702,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); /* throttled entity or throttle-on-deactivate */ @@ -8649,6 +8715,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta; +#endif if (qcfs_rq->load.weight) { /* Avoid re-evaluating load for this entity: */ @@ -8671,6 +8740,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta; +#endif } /* At this point se is NULL and we are at root level*/ @@ -8679,7 +8751,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) overload_clear(rq); done: - if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) + if (!qos_timer_is_activated(cpu_of(rq))) start_qos_hrtimer(cpu_of(rq)); cfs_rq->throttled = QOS_THROTTLED; @@ -8694,6 +8766,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct sched_entity *se; unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + long qos_idle_delta; +#endif se = cfs_rq->tg->se[cpu_of(rq)]; @@ -8726,6 +8801,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) task_delta = cfs_rq->h_nr_running; idle_task_delta = cfs_rq->idle_h_nr_running; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_idle_delta = cfs_rq->qos_idle_h_nr_running; +#endif for_each_sched_entity(se) { if (se->on_rq) break; @@ -8735,6 +8813,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif if (cfs_rq_throttled(cfs_rq)) goto unthrottle_throttle; @@ -8748,6 +8829,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + cfs_rq->qos_idle_h_nr_running += qos_idle_delta; +#endif /* end evaluation on encountering a throttled cfs_rq */ if (cfs_rq_throttled(cfs_rq)) @@ -8761,10 +8845,6 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) unthrottle_throttle: assert_list_leaf_cfs_rq(rq); - - /* Determine whether we need to wake up potentially idle CPU: */ - if (rq->curr == rq->idle && rq->cfs.nr_running) - resched_curr(rq); } static int __unthrottle_qos_cfs_rqs(int cpu) @@ -8786,11 +8866,10 @@ static int __unthrottle_qos_cfs_rqs(int cpu) static int unthrottle_qos_cfs_rqs(int cpu) { int res; - res = __unthrottle_qos_cfs_rqs(cpu); - if (res) - hrtimer_cancel(&(per_cpu(qos_overload_timer, cpu))); + if (qos_timer_is_activated(cpu) && !qos_smt_expelled(cpu)) + cancel_qos_timer(cpu); return res; } @@ -8850,8 +8929,13 @@ static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) struct rq *rq = this_rq(); rq_lock_irqsave(rq, &rf); - if (__unthrottle_qos_cfs_rqs(smp_processor_id())) - __this_cpu_write(qos_cpu_overload, 1); + __unthrottle_qos_cfs_rqs(smp_processor_id()); + __this_cpu_write(qos_cpu_overload, 1); + + /* Determine whether we need to wake up potentially idle CPU. */ + if (rq->curr == rq->idle && rq->cfs.nr_running) + resched_curr(rq); + rq_unlock_irqrestore(rq, &rf); return HRTIMER_NORESTART; @@ -8891,6 +8975,177 @@ static void qos_schedule_throttle(struct task_struct *p) } } +#ifndef CONFIG_QOS_SCHED_SMT_EXPELLER +static bool qos_smt_expelled(int this_cpu) +{ + return false; +} +#endif + +#endif + +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +DEFINE_STATIC_KEY_TRUE(qos_smt_expell_switch); + +static int __init qos_sched_smt_noexpell_setup(char *__unused) +{ + static_branch_disable(&qos_smt_expell_switch); + return 1; +} +__setup("nosmtexpell", qos_sched_smt_noexpell_setup); + +static bool qos_smt_check_siblings_status(int this_cpu) +{ + int cpu; + + if (!sched_smt_active()) + return false; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE) + return true; + } + + return false; +} + +static bool qos_sched_idle_cpu(int this_cpu) +{ + struct rq *rq = cpu_rq(this_cpu); + + return unlikely(rq->nr_running == rq->cfs.qos_idle_h_nr_running && + rq->nr_running); +} + +static bool qos_smt_expelled(int this_cpu) +{ + if (!static_branch_likely(&qos_smt_expell_switch)) + return false; + + /* + * The qos_smt_status of siblings cpu is online, and current cpu only has + * offline tasks enqueued, there is not suitable task, + * so pick_next_task_fair return null. + */ + if (qos_smt_check_siblings_status(this_cpu) && qos_sched_idle_cpu(this_cpu)) + return true; + + return false; +} + +static bool qos_smt_update_status(struct task_struct *p) +{ + int status = QOS_LEVEL_OFFLINE; + + if (p != NULL && task_group(p)->qos_level >= QOS_LEVEL_ONLINE) + status = QOS_LEVEL_ONLINE; + + if (__this_cpu_read(qos_smt_status) == status) + return false; + + __this_cpu_write(qos_smt_status, status); + + return true; +} + +static void qos_smt_send_ipi(int this_cpu) +{ + int cpu; + struct rq *rq = NULL; + + if (!sched_smt_active()) + return; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + rq = cpu_rq(cpu); + + /* + * There are two cases where current don't need to send scheduler_ipi: + * a) The qos_smt_status of siblings cpu is online; + * b) The cfs.h_nr_running of siblings cpu is 0. + */ + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE || + rq->cfs.h_nr_running == 0) + continue; + + schedstat_inc(current->stats.nr_qos_smt_send_ipi); + smp_send_reschedule(cpu); + } +} + +static void qos_smt_expel(int this_cpu, struct task_struct *p) +{ + if (!static_branch_likely(&qos_smt_expell_switch)) + return; + + if (qos_smt_update_status(p)) + qos_smt_send_ipi(this_cpu); +} + +static inline bool qos_smt_enabled(void) +{ + if (!static_branch_likely(&qos_smt_expell_switch)) + return false; + + if (!sched_smt_active()) + return false; + + return true; +} + +static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) +{ + int cpu; + + if (!qos_smt_enabled()) + return false; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + /* + * There are two cases rely on the set need_resched to drive away + * offline task: + * a) The qos_smt_status of siblings cpu is online, the task of curr cpu is offline; + * b) The qos_smt_status of siblings cpu is offline, the task of curr cpu is idle, + * and current cpu only has SCHED_IDLE tasks enqueued. + */ + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE && + task_group(current)->qos_level < QOS_LEVEL_ONLINE) { + trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); + return true; + } + + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE && + rq->curr == rq->idle && qos_sched_idle_cpu(this_cpu)) { + trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); + return true; + } + } + + return false; +} + +void qos_smt_check_need_resched(void) +{ + struct rq *rq = this_rq(); + int this_cpu = rq->cpu; + + if (test_tsk_need_resched(current)) + return; + + if (_qos_smt_check_need_resched(this_cpu, rq)) { + set_tsk_need_resched(current); + set_preempt_need_resched(); + } +} #endif #ifdef CONFIG_SMP @@ -8934,14 +9189,36 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf struct task_struct *p; int new_tasks; unsigned long time; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + int this_cpu = rq->cpu; +#endif again: +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + if (qos_smt_expelled(this_cpu) && !__this_cpu_read(qos_cpu_overload)) { + __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); + + if (!qos_timer_is_activated(this_cpu)) + start_qos_hrtimer(this_cpu); + + schedstat_inc(rq->curr->stats.nr_qos_smt_expelled); + trace_sched_qos_smt_expelled(rq->curr, per_cpu(qos_smt_status, this_cpu)); + return NULL; + } +#endif + if (!sched_fair_runnable(rq)) goto idle; #ifdef CONFIG_FAIR_GROUP_SCHED - if (!prev || prev->sched_class != &fair_sched_class) - goto simple; + if (!prev || prev->sched_class != &fair_sched_class) { +#ifdef CONFIG_QOS_SCHED + if (cfs_rq->idle_h_nr_running != 0 && rq->online) + goto qos_simple; + else +#endif + goto simple; + } /* * Because of the set_next_buddy() in dequeue_task_fair() it is rather @@ -9025,6 +9302,34 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf } goto done; + +#ifdef CONFIG_QOS_SCHED +qos_simple: + if (prev) + put_prev_task(rq, prev); + + do { + se = pick_next_entity(cfs_rq, NULL); + if (check_qos_cfs_rq(group_cfs_rq(se))) { + cfs_rq = &rq->cfs; + if (!cfs_rq->nr_running) + goto idle; + continue; + } + + cfs_rq = group_cfs_rq(se); + } while (cfs_rq); + + p = task_of(se); + + while (se) { + set_next_entity(cfs_rq_of(se), se); + se = parent_entity(se); + } + + goto done; +#endif + simple: #endif if (prev) @@ -9062,6 +9367,10 @@ done: __maybe_unused; qos_schedule_throttle(p); #endif +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_expel(this_cpu, p); +#endif + return p; idle: @@ -9102,7 +9411,8 @@ done: __maybe_unused; goto again; } - __this_cpu_write(qos_cpu_overload, 0); + if (!qos_smt_expelled(cpu_of(rq))) + __this_cpu_write(qos_cpu_overload, 0); #endif /* * rq is about to be idle, check if we need to update the @@ -9110,6 +9420,10 @@ done: __maybe_unused; */ update_idle_rq_clock_pelt(rq); +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_expel(this_cpu, NULL); +#endif + return NULL; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9b5e6ce74cec4dcdfae3db4722e53a68814aa9e9..4b679122d26f639ba3cc32cec382c84fe069d962 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -660,6 +660,14 @@ struct cfs_rq { #if defined(CONFIG_QOS_SCHED) struct list_head qos_throttled_list; #endif + +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + union { + unsigned int qos_idle_h_nr_running; /* qos_level:-1 */ + unsigned long qos_idle_h_nr_running_padding; + }; +#endif + }; static inline int rt_bandwidth_enabled(void) @@ -1432,6 +1440,11 @@ do { \ } while (0) #ifdef CONFIG_QOS_SCHED +enum task_qos_level { + QOS_LEVEL_OFFLINE = -1, + QOS_LEVEL_ONLINE = 0, + QOS_LEVEL_MAX +}; void init_qos_hrtimer(int cpu); #endif @@ -3305,6 +3318,20 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) } #endif +#ifdef CONFIG_QOS_SCHED +static inline int qos_idle_policy(int policy) +{ + return policy == QOS_LEVEL_OFFLINE; +} +#endif + +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +static inline int task_has_qos_idle_policy(struct task_struct *p) +{ + return qos_idle_policy(task_group(p)->qos_level) && p->policy == SCHED_IDLE; +} +#endif + extern void swake_up_all_locked(struct swait_queue_head *q); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);