From 590300b1c6590896b862a44d30fc52f2b84b4b14 Mon Sep 17 00:00:00 2001 From: gaochao Date: Wed, 26 Jan 2022 20:04:00 +0800 Subject: [PATCH] sched:Remove AOSP sched features and adapt WALT ohos inclusion category: feature issue: #I4SA1E CVE: NA Signed-off-by: Hu Zhaodong ------------------------------------------- Make some modifications for linux-5.10 kernel patch 1. Remove AOSP sched features 2. Fix conflicts with WALT Signed-off-by: gaochao --- linux-5.10/rk3568_patch/kernel.patch | 2364 ++------------------------ 1 file changed, 96 insertions(+), 2268 deletions(-) mode change 100644 => 100755 linux-5.10/rk3568_patch/kernel.patch diff --git a/linux-5.10/rk3568_patch/kernel.patch b/linux-5.10/rk3568_patch/kernel.patch old mode 100644 new mode 100755 index 77f663c..fe7615d --- a/linux-5.10/rk3568_patch/kernel.patch +++ b/linux-5.10/rk3568_patch/kernel.patch @@ -119803,26 +119803,10 @@ index e363ae04a..f1327e7fe 100644 list_del(&priv->node); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c -index ebee0ad55..9a7538886 100644 +index ebee0ad55..56fecf202 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c -@@ -16,6 +16,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -387,6 +388,7 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy, - CPUFREQ_POSTCHANGE, freqs); - - cpufreq_stats_record_transition(policy, freqs->new); -+ cpufreq_times_record_transition(policy, freqs->new); - policy->cur = freqs->new; - } - } -@@ -688,8 +690,12 @@ static ssize_t show_##file_name \ +@@ -688,8 +688,12 @@ static ssize_t show_##file_name \ return sprintf(buf, "%u\n", policy->object); \ } @@ -119836,23 +119820,7 @@ index ebee0ad55..9a7538886 100644 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -@@ -1484,6 +1490,7 @@ static int cpufreq_online(unsigned int cpu) - goto out_destroy_policy; - - cpufreq_stats_create_table(policy); -+ cpufreq_times_create_policy(policy); - - write_lock_irqsave(&cpufreq_driver_lock, flags); - list_add(&policy->policy_list, &cpufreq_policy_list); -@@ -2518,7 +2525,6 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, - ret = cpufreq_start_governor(policy); - if (!ret) { - pr_debug("governor change\n"); -- sched_cpufreq_governor_change(policy, old_gov); - return 0; - } - cpufreq_exit_governor(policy); -@@ -2536,6 +2542,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, +@@ -2536,6 +2540,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, return ret; } @@ -1548687,7 +1548655,7 @@ index 2d152571a..c333d13b3 100644 bool "Audio Class 1.0" depends on USB_CONFIGFS diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c -index a8704e649..5d07dc572 100644 +index 8bec0cbf8..3789c3291 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -13,6 +13,7 @@ @@ -1548802,7 +1548770,7 @@ index a8704e649..5d07dc572 100644 } return le16_to_cpu(bos->wTotalLength); -@@ -2046,7 +2077,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) +@@ -2058,7 +2089,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) return value; } @@ -1548811,7 +1548779,7 @@ index a8704e649..5d07dc572 100644 { struct usb_composite_dev *cdev = get_gadget_data(gadget); unsigned long flags; -@@ -2063,6 +2094,23 @@ void composite_disconnect(struct usb_gadget *gadget) +@@ -2075,6 +2106,23 @@ void composite_disconnect(struct usb_gadget *gadget) spin_unlock_irqrestore(&cdev->lock, flags); } @@ -1548835,7 +1548803,7 @@ index a8704e649..5d07dc572 100644 /*-------------------------------------------------------------------------*/ static ssize_t suspended_show(struct device *dev, struct device_attribute *attr, -@@ -2383,7 +2431,7 @@ static const struct usb_gadget_driver composite_driver_template = { +@@ -2395,7 +2443,7 @@ static const struct usb_gadget_driver composite_driver_template = { .unbind = composite_unbind, .setup = composite_setup, @@ -1604105,75 +1604073,8 @@ index 000000000..07c563a42 +#endif + +#endif /* __LINUX_CLK_ROCKCHIP_H_ */ -diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h -index acbad3b36..54ee25053 100644 ---- a/include/linux/cpufreq.h -+++ b/include/linux/cpufreq.h -@@ -1023,14 +1023,6 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) - } - #endif - --#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) --void sched_cpufreq_governor_change(struct cpufreq_policy *policy, -- struct cpufreq_governor *old_gov); --#else --static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy, -- struct cpufreq_governor *old_gov) { } --#endif -- - extern void arch_freq_prepare_all(void); - extern unsigned int arch_freq_get_on_cpu(int cpu); - -diff --git a/include/linux/cpufreq_times.h b/include/linux/cpufreq_times.h -new file mode 100755 -index 000000000..38272a5f3 ---- /dev/null -+++ b/include/linux/cpufreq_times.h -@@ -0,0 +1,42 @@ -+/* drivers/cpufreq/cpufreq_times.c -+ * -+ * Copyright (C) 2018 Google, Inc. -+ * -+ * This software is licensed under the terms of the GNU General Public -+ * License version 2, as published by the Free Software Foundation, and -+ * may be copied, distributed, and modified under those terms. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ */ -+ -+#ifndef _LINUX_CPUFREQ_TIMES_H -+#define _LINUX_CPUFREQ_TIMES_H -+ -+#include -+#include -+ -+#ifdef CONFIG_CPU_FREQ_TIMES -+void cpufreq_task_times_init(struct task_struct *p); -+void cpufreq_task_times_alloc(struct task_struct *p); -+void cpufreq_task_times_exit(struct task_struct *p); -+int proc_time_in_state_show(struct seq_file *m, struct pid_namespace *ns, -+ struct pid *pid, struct task_struct *p); -+void cpufreq_acct_update_power(struct task_struct *p, u64 cputime); -+void cpufreq_times_create_policy(struct cpufreq_policy *policy); -+void cpufreq_times_record_transition(struct cpufreq_policy *policy, -+ unsigned int new_freq); -+#else -+static inline void cpufreq_task_times_init(struct task_struct *p) {} -+static inline void cpufreq_task_times_alloc(struct task_struct *p) {} -+static inline void cpufreq_task_times_exit(struct task_struct *p) {} -+static inline void cpufreq_acct_update_power(struct task_struct *p, -+ u64 cputime) {} -+static inline void cpufreq_times_create_policy(struct cpufreq_policy *policy) {} -+static inline void cpufreq_times_record_transition( -+ struct cpufreq_policy *policy, unsigned int new_freq) {} -+#endif /* CONFIG_CPU_FREQ_TIMES */ -+#endif /* _LINUX_CPUFREQ_TIMES_H */ diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h -index 04c20de66..2713db839 100644 +index 04c20de66..047f449d3 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -15,6 +15,7 @@ @@ -1604184,16 +1604085,7 @@ index 04c20de66..2713db839 100644 #include #ifdef CONFIG_CPUSETS -@@ -55,8 +56,6 @@ extern void cpuset_init_smp(void); - extern void cpuset_force_rebuild(void); - extern void cpuset_update_active_cpus(void); - extern void cpuset_wait_for_hotplug(void); --extern void cpuset_read_lock(void); --extern void cpuset_read_unlock(void); - extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); - extern void cpuset_cpus_allowed_fallback(struct task_struct *p); - extern nodemask_t cpuset_mems_allowed(struct task_struct *p); -@@ -162,6 +161,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) +@@ -162,6 +163,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } @@ -1604202,13 +1604094,7 @@ index 04c20de66..2713db839 100644 #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } -@@ -178,13 +179,10 @@ static inline void cpuset_update_active_cpus(void) - - static inline void cpuset_wait_for_hotplug(void) { } - --static inline void cpuset_read_lock(void) { } --static inline void cpuset_read_unlock(void) { } -- +@@ -184,7 +187,7 @@ static inline void cpuset_read_unlock(void) { } static inline void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask) { @@ -1604217,7 +1604103,7 @@ index 04c20de66..2713db839 100644 } static inline void cpuset_cpus_allowed_fallback(struct task_struct *p) -@@ -280,6 +278,8 @@ static inline bool read_mems_allowed_retry(unsigned int seq) +@@ -280,6 +283,8 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } @@ -1607897,40 +1607783,10 @@ index 000000000..39c497774 + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h -index d42f4addc..560e5955a 100644 +index d42f4addc..1f36daefa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -718,6 +718,10 @@ struct task_struct { - struct uclamp_se uclamp[UCLAMP_CNT]; - #endif - -+#ifdef CONFIG_HOTPLUG_CPU -+ struct list_head percpu_kthread_node; -+#endif -+ - #ifdef CONFIG_PREEMPT_NOTIFIERS - /* List of struct preempt_notifier: */ - struct hlist_head preempt_notifiers; -@@ -897,6 +901,10 @@ struct task_struct { - u64 stimescaled; - #endif - u64 gtime; -+#ifdef CONFIG_CPU_FREQ_TIMES -+ u64 *time_in_state; -+ unsigned int max_state; -+#endif - struct prev_cputime prev_cputime; - #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN - struct vtime vtime; -@@ -1008,6 +1016,7 @@ struct task_struct { - raw_spinlock_t pi_lock; - - struct wake_q_node wake_q; -+ int wake_q_count; - - #ifdef CONFIG_RT_MUTEXES - /* PI waiters blocked on a rt_mutex held by this task: */ -@@ -1228,7 +1237,7 @@ struct task_struct { +@@ -1228,7 +1228,7 @@ struct task_struct { u64 timer_slack_ns; u64 default_timer_slack_ns; @@ -1607939,43 +1607795,6 @@ index d42f4addc..560e5955a 100644 unsigned int kasan_depth; #endif -@@ -1668,9 +1677,20 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) - - extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); - extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); -+ -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+extern bool cpupri_check_rt(void); -+#else -+static inline bool cpupri_check_rt(void) -+{ -+ return false; -+} -+#endif -+ - #ifdef CONFIG_SMP - extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); - extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); -+extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); - #else - static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - { -diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h -index 9a62ffdd2..89fb9b027 100644 ---- a/include/linux/sched/hotplug.h -+++ b/include/linux/sched/hotplug.h -@@ -8,7 +8,11 @@ - - extern int sched_cpu_starting(unsigned int cpu); - extern int sched_cpu_activate(unsigned int cpu); -+extern int sched_cpus_activate(struct cpumask *cpus); - extern int sched_cpu_deactivate(unsigned int cpu); -+extern int sched_cpus_deactivate_nosync(struct cpumask *cpus); -+extern int sched_cpu_drain_rq(unsigned int cpu); -+extern void sched_cpu_drain_rq_wait(unsigned int cpu); - - #ifdef CONFIG_HOTPLUG_CPU - extern int sched_cpu_dying(unsigned int cpu); diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 657640015..fd534cbfa 100644 --- a/include/linux/sched/signal.h @@ -1607988,26 +1607807,6 @@ index 657640015..fd534cbfa 100644 /* * Types defining task->signal and task->sighand and APIs using them: -diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h -index 26a2013ac..1e05e5669 100644 ---- a/include/linux/sched/wake_q.h -+++ b/include/linux/sched/wake_q.h -@@ -38,6 +38,7 @@ - struct wake_q_head { - struct wake_q_node *first; - struct wake_q_node **lastp; -+ int count; - }; - - #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) -@@ -49,6 +50,7 @@ static inline void wake_q_init(struct wake_q_head *head) - { - head->first = WAKE_Q_TAIL; - head->lastp = &head->first; -+ head->count = 0; - } - - static inline bool wake_q_empty(struct wake_q_head *head) diff --git a/include/linux/sched/xacct.h b/include/linux/sched/xacct.h index c078f0a94..9544c9d9d 100644 --- a/include/linux/sched/xacct.h @@ -1608561,38 +1608360,6 @@ index b56e1dedc..ba4246721 100644 struct mac_device_info *(*setup)(void *priv); void *bsp_priv; struct clk *stmmac_clk; -diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h -index 63ea9aff3..ddafb3cf4 100644 ---- a/include/linux/stop_machine.h -+++ b/include/linux/stop_machine.h -@@ -28,6 +28,16 @@ struct cpu_stop_work { - struct cpu_stop_done *done; - }; - -+/* -+ * Structure to determine completion condition and record errors. May -+ * be shared by works on different cpus. -+ */ -+struct cpu_stop_done { -+ atomic_t nr_todo; /* nr left to execute */ -+ int ret; /* collected return value */ -+ struct completion completion; /* fired if nr_todo reaches 0 */ -+}; -+ - int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); - int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg); - bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, -@@ -35,6 +45,10 @@ bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, - void stop_machine_park(int cpu); - void stop_machine_unpark(int cpu); - void stop_machine_yield(const struct cpumask *cpumask); -+int stop_one_cpu_async(unsigned int cpu, cpu_stop_fn_t fn, void *arg, -+ struct cpu_stop_work *work_buf, -+ struct cpu_stop_done *done); -+void cpu_stop_work_wait(struct cpu_stop_work *work_buf); - - #else /* CONFIG_SMP */ - diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 176d9454e..aa4b4114b 100644 --- a/include/linux/thermal.h @@ -1610748,7 +1610515,7 @@ index ad16f7731..f302ab6db 100644 /* This part must be outside protection */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h -index c96a4337a..bcd7f1fbb 100644 +index c96a4337a..372a06791 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -198,6 +198,7 @@ TRACE_EVENT(sched_migrate_task, @@ -1610775,37 +1610542,6 @@ index c96a4337a..bcd7f1fbb 100644 ); DECLARE_EVENT_CLASS(sched_process_template, -@@ -402,6 +405,30 @@ DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay)); - -+/* -+ * Tracepoint for recording the cause of uninterruptible sleep. -+ */ -+TRACE_EVENT(sched_blocked_reason, -+ -+ TP_PROTO(struct task_struct *tsk), -+ -+ TP_ARGS(tsk), -+ -+ TP_STRUCT__entry( -+ __field( pid_t, pid ) -+ __field( void*, caller ) -+ __field( bool, io_wait ) -+ ), -+ -+ TP_fast_assign( -+ __entry->pid = tsk->pid; -+ __entry->caller = (void *)get_wchan(tsk); -+ __entry->io_wait = tsk->in_iowait; -+ ), -+ -+ TP_printk("pid=%d iowait=%d caller=%pS", __entry->pid, __entry->io_wait, __entry->caller) -+); -+ - /* - * Tracepoint for accounting runtime (time the task is executing - * on a CPU). diff --git a/include/trace/events/thermal_ipa_power.h b/include/trace/events/thermal_ipa_power.h new file mode 100755 index 000000000..a3a932c75 @@ -1617757,7 +1617493,7 @@ index 4215c2ff6..0c26757ea 100644 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c -index 1805c682ccc3..622f344d03fc 100644 +index 1805c682c..622f344d0 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -513,7 +513,8 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, @@ -1617771,28 +1617507,19 @@ index 1805c682ccc3..622f344d03fc 100644 put_cred(tcred); if (ret) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c -index c33d70215079..19adc148f755 100644 +index c33d70215..bf2f359f9 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c -@@ -334,26 +334,15 @@ static struct cpuset top_cpuset = { +@@ -334,6 +334,8 @@ static struct cpuset top_cpuset = { * guidelines for accessing subsystem state in kernel/cgroup.c */ --DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem); -- --void cpuset_read_lock(void) --{ -- percpu_down_read(&cpuset_rwsem); --} -- --void cpuset_read_unlock(void) --{ -- percpu_up_read(&cpuset_rwsem); --} -- +static DEFINE_MUTEX(cpuset_mutex); - static DEFINE_SPINLOCK(callback_lock); ++ + DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem); + void cpuset_read_lock(void) +@@ -351,9 +353,9 @@ static DEFINE_SPINLOCK(callback_lock); static struct workqueue_struct *cpuset_migrate_mm_wq; /* @@ -1617804,7 +1617531,7 @@ index c33d70215079..19adc148f755 100644 static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); -@@ -373,18 +362,29 @@ static inline bool is_in_v2_mode(void) +@@ -373,18 +375,29 @@ static inline bool is_in_v2_mode(void) } /* @@ -1617840,7 +1617567,7 @@ index c33d70215079..19adc148f755 100644 cs = parent_cs(cs); if (unlikely(!cs)) { /* -@@ -394,11 +394,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) +@@ -394,11 +407,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) * cpuset's effective_cpus is on its way to be * identical to cpu_online_mask. */ @@ -1617857,7 +1617584,7 @@ index c33d70215079..19adc148f755 100644 } /* -@@ -489,6 +491,9 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) +@@ -489,6 +504,9 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) if (cs && !zalloc_cpumask_var(pmask4, GFP_KERNEL)) goto free_three; @@ -1617867,7 +1617594,7 @@ index c33d70215079..19adc148f755 100644 return 0; free_three: -@@ -939,7 +944,7 @@ static void rebuild_root_domains(void) +@@ -939,7 +957,7 @@ static void rebuild_root_domains(void) struct cpuset *cs = NULL; struct cgroup_subsys_state *pos_css; @@ -1617876,7 +1617603,7 @@ index c33d70215079..19adc148f755 100644 lockdep_assert_cpus_held(); lockdep_assert_held(&sched_domains_mutex); -@@ -999,8 +1004,7 @@ static void rebuild_sched_domains_locked(void) +@@ -999,8 +1017,7 @@ static void rebuild_sched_domains_locked(void) struct cpuset *cs; int ndoms; @@ -1617886,7 +1617613,7 @@ index c33d70215079..19adc148f755 100644 /* * If we have raced with CPU hotplug, return early to avoid -@@ -1051,12 +1055,18 @@ static void rebuild_sched_domains_locked(void) +@@ -1051,12 +1068,18 @@ static void rebuild_sched_domains_locked(void) void rebuild_sched_domains(void) { get_online_cpus(); @@ -1617907,7 +1617634,7 @@ index c33d70215079..19adc148f755 100644 /** * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed -@@ -1072,7 +1082,7 @@ static void update_tasks_cpumask(struct cpuset *cs) +@@ -1072,7 +1095,7 @@ static void update_tasks_cpumask(struct cpuset *cs) css_task_iter_start(&cs->css, 0, &it); while ((task = css_task_iter_next(&it))) @@ -1617916,7 +1617643,7 @@ index c33d70215079..19adc148f755 100644 css_task_iter_end(&it); } -@@ -1096,8 +1106,7 @@ static void compute_effective_cpumask(struct cpumask *new_cpus, +@@ -1096,8 +1119,7 @@ static void compute_effective_cpumask(struct cpumask *new_cpus, cpumask_and(new_cpus, new_cpus, cs->cpus_requested); cpumask_and(new_cpus, new_cpus, cpu_active_mask); } else { @@ -1617926,7 +1617653,7 @@ index c33d70215079..19adc148f755 100644 } } -@@ -1162,7 +1171,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, +@@ -1162,7 +1184,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, int new_prs; bool part_error = false; /* Partition error? */ @@ -1617935,7 +1617662,7 @@ index c33d70215079..19adc148f755 100644 /* * The parent must be a partition root. -@@ -1542,13 +1551,12 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, +@@ -1542,13 +1564,12 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, if (!cpumask_subset(trialcs->cpus_requested, top_cpuset.cpus_requested)) return -EINVAL; @@ -1617951,7 +1617678,7 @@ index c33d70215079..19adc148f755 100644 retval = validate_change(cs, trialcs); if (retval < 0) return retval; -@@ -2158,7 +2166,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) +@@ -2158,7 +2179,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); cs = css_cs(css); @@ -1617960,7 +1617687,7 @@ index c33d70215079..19adc148f755 100644 /* allow moving tasks into an empty cpuset if on default hierarchy */ ret = -ENOSPC; -@@ -2182,7 +2190,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) +@@ -2182,7 +2203,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) cs->attach_in_progress++; ret = 0; out_unlock: @@ -1617969,7 +1617696,7 @@ index c33d70215079..19adc148f755 100644 return ret; } -@@ -2192,9 +2200,9 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) +@@ -2192,9 +2213,9 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); @@ -1617981,7 +1617708,7 @@ index c33d70215079..19adc148f755 100644 } /* -@@ -2217,22 +2225,20 @@ static void cpuset_attach(struct cgroup_taskset *tset) +@@ -2217,22 +2238,20 @@ static void cpuset_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); @@ -1618010,7 +1617737,7 @@ index c33d70215079..19adc148f755 100644 cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); cpuset_update_task_spread_flag(cs, task); -@@ -2271,7 +2277,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) +@@ -2271,7 +2290,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) if (!cs->attach_in_progress) wake_up(&cpuset_attach_wq); @@ -1618019,7 +1617746,7 @@ index c33d70215079..19adc148f755 100644 } /* The various types of files and directories in a cpuset file system */ -@@ -2303,7 +2309,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, +@@ -2303,7 +2322,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, int retval = 0; get_online_cpus(); @@ -1618028,7 +1617755,7 @@ index c33d70215079..19adc148f755 100644 if (!is_cpuset_online(cs)) { retval = -ENODEV; goto out_unlock; -@@ -2339,7 +2345,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, +@@ -2339,7 +2358,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: @@ -1618037,7 +1617764,7 @@ index c33d70215079..19adc148f755 100644 put_online_cpus(); return retval; } -@@ -2352,7 +2358,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, +@@ -2352,7 +2371,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, int retval = -ENODEV; get_online_cpus(); @@ -1618046,7 +1617773,7 @@ index c33d70215079..19adc148f755 100644 if (!is_cpuset_online(cs)) goto out_unlock; -@@ -2365,7 +2371,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, +@@ -2365,7 +2384,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, break; } out_unlock: @@ -1618055,7 +1617782,7 @@ index c33d70215079..19adc148f755 100644 put_online_cpus(); return retval; } -@@ -2406,7 +2412,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, +@@ -2406,7 +2425,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, flush_work(&cpuset_hotplug_work); get_online_cpus(); @@ -1618064,7 +1617791,7 @@ index c33d70215079..19adc148f755 100644 if (!is_cpuset_online(cs)) goto out_unlock; -@@ -2430,7 +2436,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, +@@ -2430,7 +2449,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, free_cpuset(trialcs); out_unlock: @@ -1618073,7 +1617800,7 @@ index c33d70215079..19adc148f755 100644 put_online_cpus(); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); -@@ -2563,13 +2569,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, +@@ -2563,13 +2582,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, css_get(&cs->css); get_online_cpus(); @@ -1618089,7 +1617816,7 @@ index c33d70215079..19adc148f755 100644 put_online_cpus(); css_put(&cs->css); return retval ?: nbytes; -@@ -2777,7 +2783,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) +@@ -2777,7 +2796,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) return 0; get_online_cpus(); @@ -1618098,7 +1617825,7 @@ index c33d70215079..19adc148f755 100644 set_bit(CS_ONLINE, &cs->flags); if (is_spread_page(parent)) -@@ -2829,7 +2835,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) +@@ -2829,7 +2848,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpumask_copy(cs->effective_cpus, parent->cpus_allowed); spin_unlock_irq(&callback_lock); out_unlock: @@ -1618107,7 +1617834,7 @@ index c33d70215079..19adc148f755 100644 put_online_cpus(); return 0; } -@@ -2850,7 +2856,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) +@@ -2850,7 +2869,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) struct cpuset *cs = css_cs(css); get_online_cpus(); @@ -1618116,7 +1617843,7 @@ index c33d70215079..19adc148f755 100644 if (is_partition_root(cs)) update_prstate(cs, 0); -@@ -2869,7 +2875,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) +@@ -2869,7 +2888,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) cpuset_dec(); clear_bit(CS_ONLINE, &cs->flags); @@ -1618125,7 +1617852,7 @@ index c33d70215079..19adc148f755 100644 put_online_cpus(); } -@@ -2882,7 +2888,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) +@@ -2882,7 +2901,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) static void cpuset_bind(struct cgroup_subsys_state *root_css) { @@ -1618134,7 +1617861,7 @@ index c33d70215079..19adc148f755 100644 spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { -@@ -2895,7 +2901,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) +@@ -2895,7 +2914,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) } spin_unlock_irq(&callback_lock); @@ -1618143,7 +1617870,7 @@ index c33d70215079..19adc148f755 100644 } /* -@@ -2905,10 +2911,10 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) +@@ -2905,10 +2924,10 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) */ static void cpuset_fork(struct task_struct *task) { @@ -1618155,7 +1617882,7 @@ index c33d70215079..19adc148f755 100644 task->mems_allowed = current->mems_allowed; } -@@ -2937,7 +2943,6 @@ struct cgroup_subsys cpuset_cgrp_subsys = { +@@ -2937,7 +2956,6 @@ struct cgroup_subsys cpuset_cgrp_subsys = { int __init cpuset_init(void) { @@ -1618163,7 +1617890,7 @@ index c33d70215079..19adc148f755 100644 BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL)); -@@ -3012,7 +3017,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, +@@ -3012,7 +3030,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, is_empty = cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed); @@ -1618172,7 +1617899,7 @@ index c33d70215079..19adc148f755 100644 /* * Move tasks to the nearest ancestor with execution resources, -@@ -3022,7 +3027,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, +@@ -3022,7 +3040,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, if (is_empty) remove_tasks_in_empty_cpuset(cs); @@ -1618181,7 +1617908,7 @@ index c33d70215079..19adc148f755 100644 } static void -@@ -3072,14 +3077,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) +@@ -3072,14 +3090,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) retry: wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); @@ -1618198,7 +1617925,7 @@ index c33d70215079..19adc148f755 100644 goto retry; } -@@ -3151,7 +3156,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) +@@ -3151,7 +3169,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems, cpus_updated, mems_updated); @@ -1618207,7 +1617934,7 @@ index c33d70215079..19adc148f755 100644 } /** -@@ -3170,7 +3175,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) +@@ -3170,7 +3188,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) * Note that CPU offlining during suspend is ignored. We don't modify * cpusets across suspend/resume cycles at all. */ @@ -1618216,7 +1617943,7 @@ index c33d70215079..19adc148f755 100644 { static cpumask_t new_cpus; static nodemask_t new_mems; -@@ -3181,7 +3186,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) +@@ -3181,7 +3199,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) if (on_dfl && !alloc_cpumasks(NULL, &tmp)) ptmp = &tmp; @@ -1618225,7 +1617952,7 @@ index c33d70215079..19adc148f755 100644 /* fetch the available cpus/mems and find out which changed how */ cpumask_copy(&new_cpus, cpu_active_mask); -@@ -3238,7 +3243,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) +@@ -3238,7 +3256,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) update_tasks_nodemask(&top_cpuset); } @@ -1618234,7 +1617961,7 @@ index c33d70215079..19adc148f755 100644 /* if cpus or mems changed, we need to propagate to descendants */ if (cpus_updated || mems_updated) { -@@ -3282,6 +3287,7 @@ void cpuset_wait_for_hotplug(void) +@@ -3282,6 +3300,7 @@ void cpuset_wait_for_hotplug(void) { flush_work(&cpuset_hotplug_work); } @@ -1618242,7 +1617969,7 @@ index c33d70215079..19adc148f755 100644 /* * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY]. -@@ -3337,11 +3343,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) +@@ -3337,11 +3356,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); @@ -1618256,7 +1617983,7 @@ index c33d70215079..19adc148f755 100644 /** * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe. * @tsk: pointer to task_struct with which the scheduler is struggling -@@ -3356,9 +3362,17 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) +@@ -3356,9 +3375,17 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) void cpuset_cpus_allowed_fallback(struct task_struct *tsk) { @@ -1618286,17 +1618013,10 @@ index 08236798d..081d026f1 100644 }; +EXPORT_SYMBOL_GPL(freezer_cgrp_subsys); diff --git a/kernel/cpu.c b/kernel/cpu.c -index 67c22941b..5add449f2 100644 +index 67c22941b..45aa725b6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c -@@ -32,12 +32,15 @@ - #include - #include - #include -+#include - #include - - #include +@@ -38,6 +38,8 @@ #define CREATE_TRACE_POINTS #include @@ -1618305,7 +1618025,7 @@ index 67c22941b..5add449f2 100644 #include "smpboot.h" /** -@@ -273,11 +276,13 @@ void cpu_maps_update_begin(void) +@@ -273,11 +275,13 @@ void cpu_maps_update_begin(void) { mutex_lock(&cpu_add_remove_lock); } @@ -1618319,7 +1618039,7 @@ index 67c22941b..5add449f2 100644 /* * If set, cpu_up and cpu_down will return -EBUSY and do nothing. -@@ -1046,7 +1051,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, +@@ -1046,7 +1050,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int prev_state, ret = 0; @@ -1618328,318 +1618048,6 @@ index 67c22941b..5add449f2 100644 return -EBUSY; if (!cpu_present(cpu)) -@@ -1144,6 +1149,217 @@ int remove_cpu(unsigned int cpu) - } - EXPORT_SYMBOL_GPL(remove_cpu); - -+extern bool dl_cpu_busy(unsigned int cpu); -+ -+int __pause_drain_rq(struct cpumask *cpus) -+{ -+ unsigned int cpu; -+ int err = 0; -+ -+ /* -+ * Disabling preemption avoids that one of the stopper, started from -+ * sched_cpu_drain_rq(), blocks firing draining for the whole cpumask. -+ */ -+ preempt_disable(); -+ for_each_cpu(cpu, cpus) { -+ err = sched_cpu_drain_rq(cpu); -+ if (err) -+ break; -+ } -+ preempt_enable(); -+ -+ return err; -+} -+ -+void __wait_drain_rq(struct cpumask *cpus) -+{ -+ unsigned int cpu; -+ -+ for_each_cpu(cpu, cpus) -+ sched_cpu_drain_rq_wait(cpu); -+} -+ -+int pause_cpus(struct cpumask *cpus) -+{ -+ int err = 0; -+ int cpu; -+ u64 start_time = 0; -+ -+ start_time = sched_clock(); -+ -+ cpu_maps_update_begin(); -+ -+ if (cpu_hotplug_disabled) { -+ err = -EBUSY; -+ goto err_cpu_maps_update; -+ } -+ -+ /* Pausing an already inactive CPU isn't an error */ -+ cpumask_and(cpus, cpus, cpu_active_mask); -+ -+ for_each_cpu(cpu, cpus) { -+ if (!cpu_online(cpu) || dl_cpu_busy(cpu) || -+ get_cpu_device(cpu)->offline_disabled == true) { -+ err = -EBUSY; -+ goto err_cpu_maps_update; -+ } -+ } -+ -+ if (cpumask_weight(cpus) >= num_active_cpus()) { -+ err = -EBUSY; -+ goto err_cpu_maps_update; -+ } -+ -+ if (cpumask_empty(cpus)) -+ goto err_cpu_maps_update; -+ -+ /* -+ * Lazy migration: -+ * -+ * We do care about how fast a CPU can go idle and stay this in this -+ * state. If we try to take the cpus_write_lock() here, we would have -+ * to wait for a few dozens of ms, as this function might schedule. -+ * However, we can, as a first step, flip the active mask and migrate -+ * anything currently on the run-queue, to give a chance to the paused -+ * CPUs to reach quickly an idle state. There's a risk meanwhile for -+ * another CPU to observe an out-of-date active_mask or to incompletely -+ * update a cpuset. Both problems would be resolved later in the slow -+ * path, which ensures active_mask synchronization, triggers a cpuset -+ * rebuild and migrate any task that would have escaped the lazy -+ * migration. -+ */ -+ for_each_cpu(cpu, cpus) -+ set_cpu_active(cpu, false); -+ err = __pause_drain_rq(cpus); -+ if (err) { -+ __wait_drain_rq(cpus); -+ for_each_cpu(cpu, cpus) -+ set_cpu_active(cpu, true); -+ goto err_cpu_maps_update; -+ } -+ -+ /* -+ * Slow path deactivation: -+ * -+ * Now that paused CPUs are most likely idle, we can go through a -+ * complete scheduler deactivation. -+ * -+ * The cpu_active_mask being already set and cpus_write_lock calling -+ * synchronize_rcu(), we know that all preempt-disabled and RCU users -+ * will observe the updated value. -+ */ -+ cpus_write_lock(); -+ -+ __wait_drain_rq(cpus); -+ -+ cpuhp_tasks_frozen = 0; -+ -+ if (sched_cpus_deactivate_nosync(cpus)) { -+ err = -EBUSY; -+ goto err_cpus_write_unlock; -+ } -+ -+ err = __pause_drain_rq(cpus); -+ __wait_drain_rq(cpus); -+ if (err) { -+ for_each_cpu(cpu, cpus) -+ sched_cpu_activate(cpu); -+ goto err_cpus_write_unlock; -+ } -+ -+ /* -+ * Even if living on the side of the regular HP path, pause is using -+ * one of the HP step (CPUHP_AP_ACTIVE). This should be reflected on the -+ * current state of the CPU. -+ */ -+ for_each_cpu(cpu, cpus) { -+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); -+ -+ st->state = CPUHP_AP_ACTIVE - 1; -+ st->target = st->state; -+ } -+ -+err_cpus_write_unlock: -+ cpus_write_unlock(); -+err_cpu_maps_update: -+ cpu_maps_update_done(); -+ -+ trace_cpuhp_pause(cpus, start_time, 1); -+ -+ return err; -+} -+EXPORT_SYMBOL_GPL(pause_cpus); -+ -+int resume_cpus(struct cpumask *cpus) -+{ -+ unsigned int cpu; -+ int err = 0; -+ u64 start_time = 0; -+ -+ start_time = sched_clock(); -+ -+ cpu_maps_update_begin(); -+ -+ if (cpu_hotplug_disabled) { -+ err = -EBUSY; -+ goto err_cpu_maps_update; -+ } -+ -+ /* Resuming an already active CPU isn't an error */ -+ cpumask_andnot(cpus, cpus, cpu_active_mask); -+ -+ for_each_cpu(cpu, cpus) { -+ if (!cpu_online(cpu)) { -+ err = -EBUSY; -+ goto err_cpu_maps_update; -+ } -+ } -+ -+ if (cpumask_empty(cpus)) -+ goto err_cpu_maps_update; -+ -+ for_each_cpu(cpu, cpus) -+ set_cpu_active(cpu, true); -+ -+ if (err) -+ goto err_cpu_maps_update; -+ -+ /* Lazy Resume. Build domains immediately instead of scheduling -+ * a workqueue. This is so that the cpu can pull load when -+ * sent a load balancing kick. -+ */ -+ cpuset_hotplug_workfn(NULL); -+ -+ cpus_write_lock(); -+ -+ cpuhp_tasks_frozen = 0; -+ -+ if (sched_cpus_activate(cpus)) { -+ err = -EBUSY; -+ goto err_cpus_write_unlock; -+ } -+ -+ /* -+ * see pause_cpus. -+ */ -+ for_each_cpu(cpu, cpus) { -+ struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); -+ -+ st->state = CPUHP_ONLINE; -+ st->target = st->state; -+ } -+ -+err_cpus_write_unlock: -+ cpus_write_unlock(); -+err_cpu_maps_update: -+ cpu_maps_update_done(); -+ -+ trace_cpuhp_pause(cpus, start_time, 0); -+ -+ return err; -+} -+EXPORT_SYMBOL_GPL(resume_cpus); -+ - void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) - { - unsigned int cpu; -@@ -1238,6 +1454,25 @@ void cpuhp_online_idle(enum cpuhp_state state) - complete_ap_thread(st, true); - } - -+static int switch_to_rt_policy(void) -+{ -+ struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; -+ unsigned int policy = current->policy; -+ -+ if (policy == SCHED_NORMAL) -+ /* Switch to SCHED_FIFO from SCHED_NORMAL. */ -+ return sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); -+ else -+ return 1; -+} -+ -+static int switch_to_fair_policy(void) -+{ -+ struct sched_param param = { .sched_priority = 0 }; -+ -+ return sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); -+} -+ - /* Requires cpu_add_remove_lock to be held */ - static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) - { -@@ -1302,6 +1537,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) - static int cpu_up(unsigned int cpu, enum cpuhp_state target) - { - int err = 0; -+ int switch_err; - - if (!cpu_possible(cpu)) { - pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", -@@ -1312,9 +1548,22 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) - return -EINVAL; - } - -+ -+ /* -+ * CPU hotplug operations consists of many steps and each step -+ * calls a callback of core kernel subsystem. CPU hotplug-in -+ * operation may get preempted by other CFS tasks and whole -+ * operation of cpu hotplug in CPU gets delayed. Switch the -+ * current task to SCHED_FIFO from SCHED_NORMAL, so that -+ * hotplug in operation may complete quickly in heavy loaded -+ * conditions and new CPU will start handle the workload. -+ */ -+ -+ switch_err = switch_to_rt_policy(); -+ - err = try_online_node(cpu_to_node(cpu)); - if (err) -- return err; -+ goto switch_out; - - cpu_maps_update_begin(); - -@@ -1330,6 +1579,14 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) - err = _cpu_up(cpu, 0, target); - out: - cpu_maps_update_done(); -+switch_out: -+ if (!switch_err) { -+ switch_err = switch_to_fair_policy(); -+ if (switch_err) -+ pr_err("Hotplug policy switch err=%d Task %s pid=%d\n", -+ switch_err, current->comm, current->pid); -+ } -+ - return err; - } - -@@ -1465,6 +1722,7 @@ void __weak arch_thaw_secondary_cpus_end(void) - void thaw_secondary_cpus(void) - { - int cpu, error; -+ struct device *cpu_device; - - /* Allow everyone to use the CPU hotplug again */ - cpu_maps_update_begin(); -@@ -1482,6 +1740,12 @@ void thaw_secondary_cpus(void) - trace_suspend_resume(TPS("CPU_ON"), cpu, false); - if (!error) { - pr_info("CPU%d is up\n", cpu); -+ cpu_device = get_cpu_device(cpu); -+ if (!cpu_device) -+ pr_err("%s: failed to get cpu%d device\n", -+ __func__, cpu); -+ else -+ kobject_uevent(&cpu_device->kobj, KOBJ_ONLINE); - continue; - } - pr_warn("Error taking CPU%d up: %d\n", cpu, error); diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index e2999a070..79cb6d063 100644 --- a/kernel/irq/generic-chip.c @@ -1619380,14 +1618788,13 @@ index af6f23d8b..bce629531 100644 { /* The boot cpu is always logical cpu 0 */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index e4551d173..e2d31233a 100644 +index e4551d173..caca179f6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -41,8 +41,17 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); +@@ -41,6 +41,13 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_overutilized_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_cfs_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_util_est_se_tp); EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp); -+EXPORT_TRACEPOINT_SYMBOL_GPL(sched_switch); +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_waking); +#ifdef CONFIG_SCHEDSTATS +EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_sleep); @@ -1619397,74 +1618804,8 @@ index e4551d173..e2d31233a 100644 +#endif DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); -+EXPORT_SYMBOL_GPL(runqueues); - - #ifdef CONFIG_SCHED_DEBUG - /* -@@ -57,6 +66,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); - const_debug unsigned int sysctl_sched_features = - #include "features.h" - 0; -+EXPORT_SYMBOL_GPL(sysctl_sched_features); - #undef SCHED_FEAT - #endif - -@@ -197,6 +207,7 @@ struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) - cpu_relax(); - } - } -+EXPORT_SYMBOL_GPL(__task_rq_lock); - - /* - * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. -@@ -239,6 +250,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) - cpu_relax(); - } - } -+EXPORT_SYMBOL_GPL(task_rq_lock); - /* - * RQ-clock updating methods: -@@ -319,6 +331,7 @@ void update_rq_clock(struct rq *rq) - rq->clock += delta; - update_rq_clock_task(rq, delta); - } -+EXPORT_SYMBOL_GPL(update_rq_clock); - - static inline void - rq_csd_init(struct rq *rq, struct __call_single_data *csd, smp_call_func_t func) -@@ -531,6 +544,7 @@ static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task) - */ - *head->lastp = node; - head->lastp = &node->next; -+ head->count++; - return true; - } - -@@ -587,12 +601,14 @@ void wake_up_q(struct wake_q_head *head) - /* Task can safely be re-inserted now: */ - node = node->next; - task->wake_q.next = NULL; -+ task->wake_q_count = head->count; - - /* - * wake_up_process() executes a full barrier, which pairs with - * the queueing in wake_q_add() so as not to miss wakeups. - */ - wake_up_process(task); -+ task->wake_q_count = 0; - put_task_struct(task); - } - } -@@ -627,6 +643,7 @@ void resched_curr(struct rq *rq) - else - trace_sched_wake_idle_without_ipi(cpu); - } -+EXPORT_SYMBOL_GPL(resched_curr); - - void resched_cpu(int cpu) - { -@@ -654,7 +671,7 @@ int get_nohz_timer_target(void) +@@ -654,7 +661,7 @@ int get_nohz_timer_target(void) int i, cpu = smp_processor_id(), default_cpu = -1; struct sched_domain *sd; @@ -1619473,7 +1618814,7 @@ index e4551d173..e2d31233a 100644 if (!idle_cpu(cpu)) return cpu; default_cpu = cpu; -@@ -674,8 +691,25 @@ int get_nohz_timer_target(void) +@@ -674,8 +681,25 @@ int get_nohz_timer_target(void) } } @@ -1619501,159 +1618842,7 @@ index e4551d173..e2d31233a 100644 cpu = default_cpu; unlock: rcu_read_unlock(); -@@ -927,6 +961,7 @@ static struct uclamp_se uclamp_default[UCLAMP_CNT]; - * * An admin modifying the cgroup cpu.uclamp.{min, max} - */ - DEFINE_STATIC_KEY_FALSE(sched_uclamp_used); -+EXPORT_SYMBOL_GPL(sched_uclamp_used); - - /* Integer rounded range for each bucket */ - #define UCLAMP_BUCKET_DELTA DIV_ROUND_CLOSEST(SCHED_CAPACITY_SCALE, UCLAMP_BUCKETS) -@@ -1120,6 +1155,7 @@ unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id) - - return (unsigned long)uc_eff.value; - } -+EXPORT_SYMBOL_GPL(uclamp_eff_value); - - /* - * When a task is enqueued on a rq, the clamp bucket currently defined by the -@@ -1424,17 +1460,24 @@ int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, - static int uclamp_validate(struct task_struct *p, - const struct sched_attr *attr) - { -- unsigned int lower_bound = p->uclamp_req[UCLAMP_MIN].value; -- unsigned int upper_bound = p->uclamp_req[UCLAMP_MAX].value; -+ int util_min = p->uclamp_req[UCLAMP_MIN].value; -+ int util_max = p->uclamp_req[UCLAMP_MAX].value; - -- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) -- lower_bound = attr->sched_util_min; -- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) -- upper_bound = attr->sched_util_max; -+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { -+ util_min = attr->sched_util_min; - -- if (lower_bound > upper_bound) -- return -EINVAL; -- if (upper_bound > SCHED_CAPACITY_SCALE) -+ if (util_min + 1 > SCHED_CAPACITY_SCALE + 1) -+ return -EINVAL; -+ } -+ -+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { -+ util_max = attr->sched_util_max; -+ -+ if (util_max + 1 > SCHED_CAPACITY_SCALE + 1) -+ return -EINVAL; -+ } -+ -+ if (util_min != -1 && util_max != -1 && util_min > util_max) - return -EINVAL; - - /* -@@ -1449,20 +1492,41 @@ static int uclamp_validate(struct task_struct *p, - return 0; - } - -+static bool uclamp_reset(const struct sched_attr *attr, -+ enum uclamp_id clamp_id, -+ struct uclamp_se *uc_se) -+{ -+ /* Reset on sched class change for a non user-defined clamp value. */ -+ if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)) && -+ !uc_se->user_defined) -+ return true; -+ -+ /* Reset on sched_util_{min,max} == -1. */ -+ if (clamp_id == UCLAMP_MIN && -+ attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && -+ attr->sched_util_min == -1) { -+ return true; -+ } -+ -+ if (clamp_id == UCLAMP_MAX && -+ attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && -+ attr->sched_util_max == -1) { -+ return true; -+ } -+ -+ return false; -+} -+ - static void __setscheduler_uclamp(struct task_struct *p, - const struct sched_attr *attr) - { - enum uclamp_id clamp_id; - -- /* -- * On scheduling class change, reset to default clamps for tasks -- * without a task-specific value. -- */ - for_each_clamp_id(clamp_id) { - struct uclamp_se *uc_se = &p->uclamp_req[clamp_id]; -+ unsigned int value; - -- /* Keep using defined clamps across class changes */ -- if (uc_se->user_defined) -+ if (!uclamp_reset(attr, clamp_id, uc_se)) - continue; - - /* -@@ -1470,21 +1534,25 @@ static void __setscheduler_uclamp(struct task_struct *p, - * at runtime. - */ - if (unlikely(rt_task(p) && clamp_id == UCLAMP_MIN)) -- __uclamp_update_util_min_rt_default(p); -+ value = sysctl_sched_uclamp_util_min_rt_default; - else -- uclamp_se_set(uc_se, uclamp_none(clamp_id), false); -+ value = uclamp_none(clamp_id); -+ -+ uclamp_se_set(uc_se, value, false); - - } - - if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP))) - return; - -- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) { -+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN && -+ attr->sched_util_min != -1) { - uclamp_se_set(&p->uclamp_req[UCLAMP_MIN], - attr->sched_util_min, true); - } - -- if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX) { -+ if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MAX && -+ attr->sched_util_max != -1) { - uclamp_se_set(&p->uclamp_req[UCLAMP_MAX], - attr->sched_util_max, true); - } -@@ -1603,6 +1671,7 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags) - - p->on_rq = TASK_ON_RQ_QUEUED; - } -+EXPORT_SYMBOL_GPL(activate_task); - - void deactivate_task(struct rq *rq, struct task_struct *p, int flags) - { -@@ -1610,6 +1679,7 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags) - - dequeue_task(rq, p, flags); - } -+EXPORT_SYMBOL_GPL(deactivate_task); - - static inline int __normal_prio(int policy, int rt_prio, int nice) - { -@@ -1702,6 +1772,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) - if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr)) - rq_clock_skip_update(rq); - } -+EXPORT_SYMBOL_GPL(check_preempt_curr); - - #ifdef CONFIG_SMP - -@@ -1717,7 +1788,10 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +@@ -1717,7 +1741,10 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) if (is_per_cpu_kthread(p)) return cpu_online(cpu); @@ -1619665,185 +1618854,7 @@ index e4551d173..e2d31233a 100644 } /* -@@ -1869,24 +1943,19 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - } - - /* -- * Change a given task's CPU affinity. Migrate the thread to a -- * proper CPU and schedule it away if the CPU it's executing on -- * is removed from the allowed bitmask. -- * -- * NOTE: the caller must have a valid reference to the task, the -- * task must not exit() & deallocate itself prematurely. The -- * call is not atomic; no spinlocks may be held. -+ * Called with both p->pi_lock and rq->lock held; drops both before returning. - */ --static int __set_cpus_allowed_ptr(struct task_struct *p, -- const struct cpumask *new_mask, bool check) -+static int __set_cpus_allowed_ptr_locked(struct task_struct *p, -+ const struct cpumask *new_mask, -+ bool check, -+ struct rq *rq, -+ struct rq_flags *rf) - { - const struct cpumask *cpu_valid_mask = cpu_active_mask; -+ const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p); - unsigned int dest_cpu; -- struct rq_flags rf; -- struct rq *rq; - int ret = 0; - -- rq = task_rq_lock(p, &rf); - update_rq_clock(rq); - - if (p->flags & PF_KTHREAD) { -@@ -1894,6 +1963,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - * Kernel threads are allowed on online && !active CPUs - */ - cpu_valid_mask = cpu_online_mask; -+ } else if (!cpumask_subset(new_mask, cpu_allowed_mask)) { -+ ret = -EINVAL; -+ goto out; - } - - /* -@@ -1938,7 +2010,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - if (task_running(rq, p) || p->state == TASK_WAKING) { - struct migration_arg arg = { p, dest_cpu }; - /* Need help from migration thread: drop lock and wait. */ -- task_rq_unlock(rq, p, &rf); -+ task_rq_unlock(rq, p, rf); - stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); - return 0; - } else if (task_on_rq_queued(p)) { -@@ -1946,20 +2018,105 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, - * OK, since we're going to drop the lock immediately - * afterwards anyway. - */ -- rq = move_queued_task(rq, &rf, p, dest_cpu); -+ rq = move_queued_task(rq, rf, p, dest_cpu); - } - out: -- task_rq_unlock(rq, p, &rf); -+ task_rq_unlock(rq, p, rf); - - return ret; - } - -+/* -+ * Change a given task's CPU affinity. Migrate the thread to a -+ * proper CPU and schedule it away if the CPU it's executing on -+ * is removed from the allowed bitmask. -+ * -+ * NOTE: the caller must have a valid reference to the task, the -+ * task must not exit() & deallocate itself prematurely. The -+ * call is not atomic; no spinlocks may be held. -+ */ -+static int __set_cpus_allowed_ptr(struct task_struct *p, -+ const struct cpumask *new_mask, bool check) -+{ -+ struct rq_flags rf; -+ struct rq *rq; -+ -+ rq = task_rq_lock(p, &rf); -+ return __set_cpus_allowed_ptr_locked(p, new_mask, check, rq, &rf); -+} -+ - int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) - { - return __set_cpus_allowed_ptr(p, new_mask, false); - } - EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); - -+/* -+ * Change a given task's CPU affinity to the intersection of its current -+ * affinity mask and @subset_mask, writing the resulting mask to @new_mask. -+ * If the resulting mask is empty, leave the affinity unchanged and return -+ * -EINVAL. -+ */ -+static int restrict_cpus_allowed_ptr(struct task_struct *p, -+ struct cpumask *new_mask, -+ const struct cpumask *subset_mask) -+{ -+ struct rq_flags rf; -+ struct rq *rq; -+ -+ rq = task_rq_lock(p, &rf); -+ if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) { -+ task_rq_unlock(rq, p, &rf); -+ return -EINVAL; -+ } -+ -+ return __set_cpus_allowed_ptr_locked(p, new_mask, false, rq, &rf); -+} -+ -+/* -+ * Restrict a given task's CPU affinity so that it is a subset of -+ * task_cpu_possible_mask(). If the resulting mask is empty, we warn and -+ * walk up the cpuset hierarchy until we find a suitable mask. -+ */ -+void force_compatible_cpus_allowed_ptr(struct task_struct *p) -+{ -+ cpumask_var_t new_mask; -+ const struct cpumask *override_mask = task_cpu_possible_mask(p); -+ -+ alloc_cpumask_var(&new_mask, GFP_KERNEL); -+ -+ /* -+ * __migrate_task() can fail silently in the face of concurrent -+ * offlining of the chosen destination CPU, so take the hotplug -+ * lock to ensure that the migration succeeds. -+ */ -+ cpus_read_lock(); -+ if (!cpumask_available(new_mask)) -+ goto out_set_mask; -+ -+ if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask)) -+ goto out_free_mask; -+ -+ /* -+ * We failed to find a valid subset of the affinity mask for the -+ * task, so override it based on its cpuset hierarchy. -+ */ -+ cpuset_cpus_allowed(p, new_mask); -+ override_mask = new_mask; -+ -+out_set_mask: -+ if (printk_ratelimit()) { -+ printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n", -+ task_pid_nr(p), p->comm, -+ cpumask_pr_args(override_mask)); -+ } -+ -+ WARN_ON(set_cpus_allowed_ptr(p, override_mask)); -+out_free_mask: -+ cpus_read_unlock(); -+ free_cpumask_var(new_mask); -+} -+ - void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - { - #ifdef CONFIG_SCHED_DEBUG -@@ -2011,8 +2168,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) - - __set_task_cpu(p, new_cpu); - } -+EXPORT_SYMBOL_GPL(set_task_cpu); - --#ifdef CONFIG_NUMA_BALANCING - static void __migrate_swap_task(struct task_struct *p, int cpu) - { - if (task_on_rq_queued(p)) { -@@ -2127,7 +2284,7 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p, - out: - return ret; - } --#endif /* CONFIG_NUMA_BALANCING */ -+EXPORT_SYMBOL_GPL(migrate_swap); - - /* - * wait_task_inactive - wait for a thread to unschedule. -@@ -2327,10 +2484,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) +@@ -2327,10 +2354,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) } fallthrough; case possible: @@ -1619855,7 +1618866,7 @@ index e4551d173..e2d31233a 100644 case fail: BUG(); break; -@@ -2502,6 +2658,9 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, +@@ -2502,6 +2528,9 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, { int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; @@ -1619865,15 +1618876,7 @@ index e4551d173..e2d31233a 100644 lockdep_assert_held(&rq->lock); if (p->sched_contributes_to_load) -@@ -2647,6 +2806,7 @@ void wake_up_if_idle(int cpu) - out: - rcu_read_unlock(); - } -+EXPORT_SYMBOL_GPL(wake_up_if_idle); - - bool cpus_share_cache(int this_cpu, int that_cpu) - { -@@ -2871,6 +3031,19 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +@@ -2871,6 +2900,19 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (!(p->state & state)) goto unlock; @@ -1619893,7 +1618896,7 @@ index e4551d173..e2d31233a 100644 trace_sched_waking(p); /* We're going to change ->state: */ -@@ -4805,7 +4978,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) +@@ -4805,7 +4847,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void) int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags, void *key) { @@ -1619902,55 +1618905,7 @@ index e4551d173..e2d31233a 100644 return try_to_wake_up(curr->private, mode, wake_flags); } EXPORT_SYMBOL(default_wake_function); -@@ -5128,6 +5301,7 @@ int available_idle_cpu(int cpu) - - return 1; - } -+EXPORT_SYMBOL_GPL(available_idle_cpu); - - /** - * idle_task - return the idle task for a given CPU. -@@ -5306,9 +5480,6 @@ static int __sched_setscheduler(struct task_struct *p, - return retval; - } - -- if (pi) -- cpuset_read_lock(); -- - /* - * Make sure no PI-waiters arrive (or leave) while we are - * changing the priority of the task: -@@ -5383,8 +5554,6 @@ static int __sched_setscheduler(struct task_struct *p, - if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { - policy = oldpolicy = -1; - task_rq_unlock(rq, p, &rf); -- if (pi) -- cpuset_read_unlock(); - goto recheck; - } - -@@ -5449,10 +5618,8 @@ static int __sched_setscheduler(struct task_struct *p, - preempt_disable(); - task_rq_unlock(rq, p, &rf); - -- if (pi) { -- cpuset_read_unlock(); -+ if (pi) - rt_mutex_adjust_pi(p); -- } - - /* Run balance callbacks after we've adjusted the PI chain: */ - balance_callback(rq); -@@ -5462,8 +5629,6 @@ static int __sched_setscheduler(struct task_struct *p, - - unlock: - task_rq_unlock(rq, p, &rf); -- if (pi) -- cpuset_read_unlock(); - return retval; - } - -@@ -5502,16 +5667,19 @@ int sched_setscheduler(struct task_struct *p, int policy, +@@ -5502,16 +5544,19 @@ int sched_setscheduler(struct task_struct *p, int policy, { return _sched_setscheduler(p, policy, param, true); } @@ -1619970,7 +1618925,7 @@ index e4551d173..e2d31233a 100644 /** * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. -@@ -5531,6 +5699,7 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy, +@@ -5531,6 +5576,7 @@ int sched_setscheduler_nocheck(struct task_struct *p, int policy, { return _sched_setscheduler(p, policy, param, false); } @@ -1619978,49 +1618933,7 @@ index e4551d173..e2d31233a 100644 /* * SCHED_FIFO is a broken scheduler model; that is, it is fundamentally -@@ -5592,14 +5761,9 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) - rcu_read_lock(); - retval = -ESRCH; - p = find_process_by_pid(pid); -- if (likely(p)) -- get_task_struct(p); -- rcu_read_unlock(); -- -- if (likely(p)) { -+ if (p != NULL) - retval = sched_setscheduler(p, policy, &lparam); -- put_task_struct(p); -- } -+ rcu_read_unlock(); - - return retval; - } -@@ -5889,6 +6053,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) - cpumask_var_t cpus_allowed, new_mask; - struct task_struct *p; - int retval; -+ int skip = 0; - - rcu_read_lock(); - -@@ -6721,11 +6886,14 @@ static struct task_struct *__pick_migrate_task(struct rq *rq) - * Called with rq->lock held even though we'er in stop_machine() and - * there's no concurrency possible, we hold the required locks anyway - * because of lock validation efforts. -+ * -+ * force: if false, the function will skip CPU pinned kthreads. - */ --static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) -+static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf, bool force) - { - struct rq *rq = dead_rq; -- struct task_struct *next, *stop = rq->stop; -+ struct task_struct *next, *tmp, *stop = rq->stop; -+ LIST_HEAD(percpu_kthreads); - struct rq_flags orf = *rf; - int dest_cpu; - -@@ -6747,6 +6915,11 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) +@@ -6747,6 +6793,11 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) */ update_rq_clock(rq); @@ -1620032,559 +1618945,11 @@ index e4551d173..e2d31233a 100644 for (;;) { /* * There's this thread running, bail when that's the only -@@ -6757,6 +6930,20 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) - - next = __pick_migrate_task(rq); - -+ /* -+ * Argh ... no iterator for tasks, we need to remove the -+ * kthread from the run-queue to continue. -+ */ -+ if (!force && is_per_cpu_kthread(next)) { -+ INIT_LIST_HEAD(&next->percpu_kthread_node); -+ list_add(&next->percpu_kthread_node, &percpu_kthreads); -+ -+ /* DEQUEUE_SAVE not used due to move_entity in rt */ -+ deactivate_task(rq, next, -+ DEQUEUE_NOCLOCK); -+ continue; -+ } -+ - /* - * Rules for changing task_struct::cpus_mask are holding - * both pi_lock and rq->lock, such that holding either -@@ -6775,7 +6962,14 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) - * changed the task, WARN if weird stuff happened, because in - * that case the above rq->lock drop is a fail too. - */ -- if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { -+ if (task_rq(next) != rq || !task_on_rq_queued(next)) { -+ /* -+ * In the !force case, there is a hole between -+ * rq_unlock() and rq_relock(), where another CPU might -+ * not observe an up to date cpu_active_mask and try to -+ * move tasks around. -+ */ -+ WARN_ON(force); - raw_spin_unlock(&next->pi_lock); - continue; - } -@@ -6792,8 +6986,50 @@ static void migrate_tasks(struct rq *dead_rq, struct rq_flags *rf) - raw_spin_unlock(&next->pi_lock); - } - -+ list_for_each_entry_safe(next, tmp, &percpu_kthreads, -+ percpu_kthread_node) { -+ -+ /* ENQUEUE_RESTORE not used due to move_entity in rt */ -+ activate_task(rq, next, ENQUEUE_NOCLOCK); -+ list_del(&next->percpu_kthread_node); -+ } -+ - rq->stop = stop; - } -+ -+static int drain_rq_cpu_stop(void *data) -+{ -+ struct rq *rq = this_rq(); -+ struct rq_flags rf; -+ -+ rq_lock_irqsave(rq, &rf); -+ migrate_tasks(rq, &rf, false); -+ rq_unlock_irqrestore(rq, &rf); -+ -+ return 0; -+} -+ -+int sched_cpu_drain_rq(unsigned int cpu) -+{ -+ struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); -+ struct cpu_stop_done *rq_drain_done = &(cpu_rq(cpu)->drain_done); -+ -+ if (idle_cpu(cpu)) { -+ rq_drain->done = NULL; -+ return 0; -+ } -+ -+ return stop_one_cpu_async(cpu, drain_rq_cpu_stop, NULL, rq_drain, -+ rq_drain_done); -+} -+ -+void sched_cpu_drain_rq_wait(unsigned int cpu) -+{ -+ struct cpu_stop_work *rq_drain = &(cpu_rq(cpu)->drain); -+ -+ if (rq_drain->done) -+ cpu_stop_work_wait(rq_drain); -+} - #endif /* CONFIG_HOTPLUG_CPU */ - - void set_rq_online(struct rq *rq) -@@ -6909,22 +7145,32 @@ int sched_cpu_activate(unsigned int cpu) - } - rq_unlock_irqrestore(rq, &rf); - -+ update_max_interval(); -+ - return 0; - } - --int sched_cpu_deactivate(unsigned int cpu) -+int sched_cpus_activate(struct cpumask *cpus) -+{ -+ unsigned int cpu; -+ -+ for_each_cpu(cpu, cpus) { -+ if (sched_cpu_activate(cpu)) { -+ for_each_cpu_and(cpu, cpus, cpu_active_mask) -+ sched_cpu_deactivate(cpu); -+ -+ return -EBUSY; -+ } -+ } -+ -+ return 0; -+} -+ -+int _sched_cpu_deactivate(unsigned int cpu) - { - int ret; - - set_cpu_active(cpu, false); -- /* -- * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU -- * users of this state to go away such that all new such users will -- * observe it. -- * -- * Do sync before park smpboot threads to take care the rcu boost case. -- */ -- synchronize_rcu(); - - #ifdef CONFIG_SCHED_SMT - /* -@@ -6943,6 +7189,46 @@ int sched_cpu_deactivate(unsigned int cpu) - return ret; - } - sched_domains_numa_masks_clear(cpu); -+ -+ update_max_interval(); -+ -+ return 0; -+} -+ -+int sched_cpu_deactivate(unsigned int cpu) -+{ -+ int ret = _sched_cpu_deactivate(cpu); -+ -+ if (ret) -+ return ret; -+ -+ /* -+ * We've cleared cpu_active_mask, wait for all preempt-disabled and RCU -+ * users of this state to go away such that all new such users will -+ * observe it. -+ * -+ * Do sync before park smpboot threads to take care the rcu boost case. -+ */ -+ synchronize_rcu(); -+ -+ return 0; -+} -+ -+int sched_cpus_deactivate_nosync(struct cpumask *cpus) -+{ -+ unsigned int cpu; -+ -+ for_each_cpu(cpu, cpus) { -+ if (_sched_cpu_deactivate(cpu)) { -+ for_each_cpu(cpu, cpus) { -+ if (!cpu_active(cpu)) -+ sched_cpu_activate(cpu); -+ } -+ -+ return -EBUSY; -+ } -+ } -+ - return 0; - } - -@@ -6951,7 +7237,6 @@ static void sched_rq_cpu_starting(unsigned int cpu) - struct rq *rq = cpu_rq(cpu); - - rq->calc_load_update = calc_load_update; -- update_max_interval(); - } - - int sched_cpu_starting(unsigned int cpu) -@@ -6975,12 +7260,11 @@ int sched_cpu_dying(unsigned int cpu) - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_offline(rq); - } -- migrate_tasks(rq, &rf); -+ migrate_tasks(rq, &rf, true); - BUG_ON(rq->nr_running != 1); - rq_unlock_irqrestore(rq, &rf); - - calc_load_migrate(rq); -- update_max_interval(); - nohz_balance_exit_idle(rq); - hrtick_clear(rq); - return 0; -@@ -7003,6 +7287,7 @@ void __init sched_init_smp(void) - /* Move init over to a non-isolated CPU */ - if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0) - BUG(); -+ - sched_init_granularity(); - - init_sched_rt_class(); -@@ -7038,7 +7323,9 @@ int in_sched_functions(unsigned long addr) - * Every task in system belongs to this group at bootup. - */ - struct task_group root_task_group; -+EXPORT_SYMBOL_GPL(root_task_group); - LIST_HEAD(task_groups); -+EXPORT_SYMBOL_GPL(task_groups); - - /* Cacheline aligned slab cache for task_group */ - static struct kmem_cache *task_group_cache __read_mostly; -@@ -7860,6 +8147,27 @@ static int cpu_uclamp_max_show(struct seq_file *sf, void *v) - cpu_uclamp_print(sf, UCLAMP_MAX); - return 0; - } -+ -+static int cpu_uclamp_ls_write_u64(struct cgroup_subsys_state *css, -+ struct cftype *cftype, u64 ls) -+{ -+ struct task_group *tg; -+ -+ if (ls > 1) -+ return -EINVAL; -+ tg = css_tg(css); -+ tg->latency_sensitive = (unsigned int) ls; -+ -+ return 0; -+} -+ -+static u64 cpu_uclamp_ls_read_u64(struct cgroup_subsys_state *css, -+ struct cftype *cft) -+{ -+ struct task_group *tg = css_tg(css); -+ -+ return (u64) tg->latency_sensitive; -+} - #endif /* CONFIG_UCLAMP_TASK_GROUP */ - - #ifdef CONFIG_FAIR_GROUP_SCHED -@@ -8228,6 +8536,12 @@ static struct cftype cpu_legacy_files[] = { - .seq_show = cpu_uclamp_max_show, - .write = cpu_uclamp_max_write, - }, -+ { -+ .name = "uclamp.latency_sensitive", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .read_u64 = cpu_uclamp_ls_read_u64, -+ .write_u64 = cpu_uclamp_ls_write_u64, -+ }, - #endif - { } /* Terminate */ - }; -@@ -8409,6 +8723,12 @@ static struct cftype cpu_files[] = { - .seq_show = cpu_uclamp_max_show, - .write = cpu_uclamp_max_write, - }, -+ { -+ .name = "uclamp.latency_sensitive", -+ .flags = CFTYPE_NOT_ON_ROOT, -+ .read_u64 = cpu_uclamp_ls_read_u64, -+ .write_u64 = cpu_uclamp_ls_write_u64, -+ }, - #endif - { } /* terminate */ - }; -diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c -index 7c2fe50fd..3d5f5a80b 100644 ---- a/kernel/sched/cpufreq.c -+++ b/kernel/sched/cpufreq.c -@@ -75,3 +75,4 @@ bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy) - (policy->dvfs_possible_from_any_cpu && - rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data))); - } -+EXPORT_SYMBOL_GPL(cpufreq_this_cpu_can_update); -diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c -index 5e39da0ae..7421b2317 100644 ---- a/kernel/sched/cpufreq_schedutil.c -+++ b/kernel/sched/cpufreq_schedutil.c -@@ -277,6 +277,7 @@ unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs, - - return min(max, util); - } -+EXPORT_SYMBOL_GPL(schedutil_cpu_util); - - static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) - { -@@ -903,36 +904,3 @@ struct cpufreq_governor *cpufreq_default_governor(void) - #endif - - cpufreq_governor_init(schedutil_gov); -- --#ifdef CONFIG_ENERGY_MODEL --extern bool sched_energy_update; --extern struct mutex sched_energy_mutex; -- --static void rebuild_sd_workfn(struct work_struct *work) --{ -- mutex_lock(&sched_energy_mutex); -- sched_energy_update = true; -- rebuild_sched_domains(); -- sched_energy_update = false; -- mutex_unlock(&sched_energy_mutex); --} --static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); -- --/* -- * EAS shouldn't be attempted without sugov, so rebuild the sched_domains -- * on governor changes to make sure the scheduler knows about it. -- */ --void sched_cpufreq_governor_change(struct cpufreq_policy *policy, -- struct cpufreq_governor *old_gov) --{ -- if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) { -- /* -- * When called from the cpufreq_register_driver() path, the -- * cpu_hotplug_lock is already held, so use a work item to -- * avoid nested locking in rebuild_sched_domains(). -- */ -- schedule_work(&rebuild_sd_work); -- } -- --} --#endif -diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c -index 0033731a0..cb1153194 100644 ---- a/kernel/sched/cpupri.c -+++ b/kernel/sched/cpupri.c -@@ -41,8 +41,29 @@ static int convert_prio(int prio) - return cpupri; - } - -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+/** -+ * drop_nopreempt_cpus - remove likely nonpreemptible cpus from the mask -+ * @lowest_mask: mask with selected CPUs (non-NULL) -+ */ -+static void -+drop_nopreempt_cpus(struct cpumask *lowest_mask) -+{ -+ unsigned int cpu = cpumask_first(lowest_mask); -+ while (cpu < nr_cpu_ids) { -+ /* unlocked access */ -+ struct task_struct *task = READ_ONCE(cpu_rq(cpu)->curr); -+ if (task_may_not_preempt(task, cpu)) { -+ cpumask_clear_cpu(cpu, lowest_mask); -+ } -+ cpu = cpumask_next(cpu, lowest_mask); -+ } -+} -+#endif -+ - static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p, -- struct cpumask *lowest_mask, int idx) -+ struct cpumask *lowest_mask, int idx, -+ bool drop_nopreempts) - { - struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; - int skip = 0; -@@ -78,6 +99,12 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p, - - if (lowest_mask) { - cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); -+ cpumask_and(lowest_mask, lowest_mask, cpu_active_mask); -+ -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+ if (drop_nopreempts) -+ drop_nopreempt_cpus(lowest_mask); -+#endif - - /* - * We have to ensure that we have at least one bit -@@ -123,12 +150,16 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p, - { - int task_pri = convert_prio(p->prio); - int idx, cpu; -+ bool drop_nopreempts = task_pri <= MAX_RT_PRIO; - - BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); - -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+retry: -+#endif - for (idx = 0; idx < task_pri; idx++) { - -- if (!__cpupri_find(cp, p, lowest_mask, idx)) -+ if (!__cpupri_find(cp, p, lowest_mask, idx, drop_nopreempts)) - continue; - - if (!lowest_mask || !fitness_fn) -@@ -150,6 +181,17 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p, - return 1; - } - -+ /* -+ * If we can't find any non-preemptible cpu's, retry so we can -+ * find the lowest priority target and avoid priority inversion. -+ */ -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+ if (drop_nopreempts) { -+ drop_nopreempts = false; -+ goto retry; -+ } -+#endif -+ - /* - * If we failed to find a fitting lowest_mask, kick off a new search - * but without taking into account any fitness criteria this time. -@@ -172,6 +214,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p, - - return 0; - } -+EXPORT_SYMBOL_GPL(cpupri_find_fitness); - - /** - * cpupri_set - update the CPU priority setting -@@ -290,3 +333,16 @@ void cpupri_cleanup(struct cpupri *cp) - for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) - free_cpumask_var(cp->pri_to_cpu[i].mask); - } -+ -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+/* -+ * cpupri_check_rt - check if CPU has a RT task -+ * should be called from rcu-sched read section. -+ */ -+bool cpupri_check_rt(void) -+{ -+ int cpu = raw_smp_processor_id(); -+ -+ return cpu_rq(cpu)->rd->cpupri.cpu_to_pri[cpu] > CPUPRI_NORMAL; -+} -+#endif -diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c -index 5a55d2300..1e1edea6a 100644 ---- a/kernel/sched/cputime.c -+++ b/kernel/sched/cputime.c -@@ -2,6 +2,7 @@ - /* - * Simple CPU accounting cgroup controller - */ -+#include - #include "sched.h" - - #ifdef CONFIG_IRQ_TIME_ACCOUNTING -@@ -18,6 +19,7 @@ - * compromise in place of having locks on each irq in account_system_time. - */ - DEFINE_PER_CPU(struct irqtime, cpu_irqtime); -+EXPORT_PER_CPU_SYMBOL_GPL(cpu_irqtime); - - static int sched_clock_irqtime; - -@@ -129,6 +131,9 @@ void account_user_time(struct task_struct *p, u64 cputime) - - /* Account for user time used */ - acct_account_cputime(p); -+ -+ /* Account power usage for user time */ -+ cpufreq_acct_update_power(p, cputime); - } - - /* -@@ -173,6 +178,9 @@ void account_system_index_time(struct task_struct *p, - - /* Account for system time used */ - acct_account_cputime(p); -+ -+ /* Account power usage for system time */ -+ cpufreq_acct_update_power(p, cputime); - } - - /* -@@ -460,6 +468,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) - *ut = cputime.utime; - *st = cputime.stime; - } -+EXPORT_SYMBOL_GPL(thread_group_cputime_adjusted); - - #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */ - -diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c -index 8255267ce..cdcd87b3d 100644 ---- a/kernel/sched/deadline.c -+++ b/kernel/sched/deadline.c -@@ -2381,9 +2381,13 @@ void dl_add_task_root_domain(struct task_struct *p) - struct rq *rq; - struct dl_bw *dl_b; - -- rq = task_rq_lock(p, &rf); -- if (!dl_task(p)) -- goto unlock; -+ raw_spin_lock_irqsave(&p->pi_lock, rf.flags); -+ if (!dl_task(p)) { -+ raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); -+ return; -+ } -+ -+ rq = __task_rq_lock(p, &rf); - - dl_b = &rq->rd->dl_bw; - raw_spin_lock(&dl_b->lock); -@@ -2392,7 +2396,6 @@ void dl_add_task_root_domain(struct task_struct *p) - - raw_spin_unlock(&dl_b->lock); - --unlock: - task_rq_unlock(rq, p, &rf); - } - -diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index 70a578272..9c95334d8 100644 ---- a/kernel/sched/debug.c -+++ b/kernel/sched/debug.c -@@ -48,10 +48,11 @@ static unsigned long nsec_low(unsigned long long nsec) - #define SCHED_FEAT(name, enabled) \ - #name , - --static const char * const sched_feat_names[] = { -+const char * const sched_feat_names[] = { - #include "features.h" - }; - -+EXPORT_SYMBOL_GPL(sched_feat_names); - #undef SCHED_FEAT - - static int sched_feat_show(struct seq_file *m, void *v) -@@ -79,6 +80,7 @@ static int sched_feat_show(struct seq_file *m, void *v) - struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { - #include "features.h" - }; -+EXPORT_SYMBOL_GPL(sched_feat_keys); - - #undef SCHED_FEAT - diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index c004e3b89..61d807022 100644 +index c004e3b89..fbd91b8e1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -36,6 +36,7 @@ - * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds) - */ - unsigned int sysctl_sched_latency = 6000000ULL; -+EXPORT_SYMBOL_GPL(sysctl_sched_latency); - static unsigned int normalized_sysctl_sched_latency = 6000000ULL; - - /* -@@ -57,6 +58,7 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L +@@ -57,6 +57,7 @@ enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_L * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds) */ unsigned int sysctl_sched_min_granularity = 750000ULL; @@ -1620592,203 +1618957,7 @@ index c004e3b89..61d807022 100644 static unsigned int normalized_sysctl_sched_min_granularity = 750000ULL; /* -@@ -991,6 +993,7 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) - } - - trace_sched_stat_blocked(tsk, delta); -+ trace_sched_blocked_reason(tsk); - - /* - * Blocking time is in units of nanosecs, so shift by -@@ -4424,8 +4427,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) - resched_curr(rq_of(cfs_rq)); - } - --static void --set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) -+void set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) - { - /* 'current' is not kept within the tree. */ - if (se->on_rq) { -@@ -4456,6 +4458,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) - - se->prev_sum_exec_runtime = se->sum_exec_runtime; - } -+EXPORT_SYMBOL_GPL(set_next_entity); -+ - - static int - wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); -@@ -6614,20 +6618,34 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd) - * other use-cases too. So, until someone finds a better way to solve this, - * let's keep things simple by re-using the existing slow path. - */ --static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) -+static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync) - { - unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; - struct root_domain *rd = cpu_rq(smp_processor_id())->rd; -+ int max_spare_cap_cpu_ls = prev_cpu, best_idle_cpu = -1; -+ unsigned long max_spare_cap_ls = 0, target_cap; - unsigned long cpu_cap, util, base_energy = 0; -+ bool boosted, latency_sensitive = false; -+ unsigned int min_exit_lat = UINT_MAX; - int cpu, best_energy_cpu = prev_cpu; -+ struct cpuidle_state *idle; - struct sched_domain *sd; - struct perf_domain *pd; -+ int new_cpu = INT_MAX; - - rcu_read_lock(); - pd = rcu_dereference(rd->pd); - if (!pd || READ_ONCE(rd->overutilized)) - goto fail; - -+ cpu = smp_processor_id(); -+ if (sync && cpu_rq(cpu)->nr_running == 1 && -+ cpumask_test_cpu(cpu, p->cpus_ptr) && -+ task_fits_capacity(p, capacity_of(cpu))) { -+ rcu_read_unlock(); -+ return cpu; -+ } -+ - /* - * Energy-aware wake-up happens on the lowest sched_domain starting - * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu. -@@ -6638,10 +6656,13 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) - if (!sd) - goto fail; - -- sync_entity_load_avg(&p->se); - if (!task_util_est(p)) - goto unlock; - -+ latency_sensitive = uclamp_latency_sensitive(p); -+ boosted = uclamp_boosted(p); -+ target_cap = boosted ? 0 : ULONG_MAX; -+ - for (; pd; pd = pd->next) { - unsigned long cur_delta, spare_cap, max_spare_cap = 0; - unsigned long base_energy_pd; -@@ -6672,7 +6693,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) - continue; - - /* Always use prev_cpu as a candidate. */ -- if (cpu == prev_cpu) { -+ if (!latency_sensitive && cpu == prev_cpu) { - prev_delta = compute_energy(p, prev_cpu, pd); - prev_delta -= base_energy_pd; - best_delta = min(best_delta, prev_delta); -@@ -6686,10 +6707,34 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) - max_spare_cap = spare_cap; - max_spare_cap_cpu = cpu; - } -+ -+ if (!latency_sensitive) -+ continue; -+ -+ if (idle_cpu(cpu)) { -+ cpu_cap = capacity_orig_of(cpu); -+ if (boosted && cpu_cap < target_cap) -+ continue; -+ if (!boosted && cpu_cap > target_cap) -+ continue; -+ idle = idle_get_state(cpu_rq(cpu)); -+ if (idle && idle->exit_latency > min_exit_lat && -+ cpu_cap == target_cap) -+ continue; -+ -+ if (idle) -+ min_exit_lat = idle->exit_latency; -+ target_cap = cpu_cap; -+ best_idle_cpu = cpu; -+ } else if (spare_cap > max_spare_cap_ls) { -+ max_spare_cap_ls = spare_cap; -+ max_spare_cap_cpu_ls = cpu; -+ } - } - - /* Evaluate the energy impact of using this CPU. */ -- if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) { -+ if (!latency_sensitive && max_spare_cap_cpu >= 0 && -+ max_spare_cap_cpu != prev_cpu) { - cur_delta = compute_energy(p, max_spare_cap_cpu, pd); - cur_delta -= base_energy_pd; - if (cur_delta < best_delta) { -@@ -6701,6 +6746,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) - unlock: - rcu_read_unlock(); - -+ if (latency_sensitive) -+ return best_idle_cpu >= 0 ? best_idle_cpu : max_spare_cap_cpu_ls; -+ - /* - * Pick the best CPU if prev_cpu cannot be used, or if it saves at - * least 6% of the energy used by prev_cpu. -@@ -6744,7 +6792,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f - record_wakee(p); - - if (sched_energy_enabled()) { -- new_cpu = find_energy_efficient_cpu(p, prev_cpu); -+ new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync); - if (new_cpu >= 0) - return new_cpu; - new_cpu = prev_cpu; -@@ -7038,8 +7086,8 @@ struct task_struct * - pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) - { - struct cfs_rq *cfs_rq = &rq->cfs; -- struct sched_entity *se; -- struct task_struct *p; -+ struct sched_entity *se = NULL; -+ struct task_struct *p = NULL; - int new_tasks; - - again: -@@ -7368,7 +7416,8 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p) - * rewrite all of this once again.] - */ - --static unsigned long __read_mostly max_load_balance_interval = HZ/10; -+unsigned long __read_mostly max_load_balance_interval = HZ/10; -+EXPORT_SYMBOL_GPL(max_load_balance_interval); - - enum fbq_type { regular, remote, all }; - -@@ -7449,6 +7498,7 @@ struct lb_env { - enum fbq_type fbq_type; - enum migration_type migration_type; - struct list_head tasks; -+ struct rq_flags *src_rq_rf; - }; - - /* -@@ -9635,6 +9685,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, - - more_balance: - rq_lock_irqsave(busiest, &rf); -+ env.src_rq_rf = &rf; - update_rq_clock(busiest); - - /* -@@ -9944,6 +9995,7 @@ static int active_load_balance_cpu_stop(void *data) - * about DST_PINNED. - */ - .flags = LBF_DST_PINNED, -+ .src_rq_rf = &rf, - }; - - schedstat_inc(sd->alb_count); -@@ -9979,7 +10031,7 @@ static DEFINE_SPINLOCK(balancing); - */ - void update_max_interval(void) - { -- max_load_balance_interval = HZ*num_online_cpus()/10; -+ max_load_balance_interval = HZ*num_active_cpus()/10; - } - - /* -@@ -10323,9 +10375,20 @@ void nohz_balance_enter_idle(int cpu) +@@ -10323,9 +10324,20 @@ void nohz_balance_enter_idle(int cpu) SCHED_WARN_ON(cpu != smp_processor_id()); @@ -1620824,7 +1618993,7 @@ index d2a655643..b5837e277 100644 long calc_load_fold_active(struct rq *this_rq, long adjust) { diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c -index 2c613e1cf..cd54fab42 100644 +index 2c613e1cf..e2890b677 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -28,6 +28,42 @@ @@ -1620879,29 +1619048,8 @@ index 2c613e1cf..cd54fab42 100644 return val; } -@@ -306,6 +342,7 @@ int __update_load_avg_blocked_se(u64 now, struct sched_entity *se) - - return 0; - } -+EXPORT_SYMBOL_GPL(__update_load_avg_blocked_se); - - int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se) - { -diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c -index d50a31ece..24f189ff2 100644 ---- a/kernel/sched/psi.c -+++ b/kernel/sched/psi.c -@@ -748,7 +748,7 @@ static void psi_group_change(struct psi_group *group, int cpu, - - static struct psi_group *iterate_groups(struct task_struct *task, void **iter) - { --#ifdef CONFIG_CGROUPS -+#if defined CONFIG_CGROUPS && defined CONFIG_PSI_PER_CGROUP_ACCT - struct cgroup *cgroup = NULL; - - if (!*iter) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c -index dae1e8eaa..24f859339 100644 +index dae1e8eaa..6e3c9ba2e 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1377,6 +1377,27 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) @@ -1620932,7 +1619080,7 @@ index dae1e8eaa..24f859339 100644 /* * Adding/removing a task to/from a priority array: */ -@@ -1384,13 +1405,15 @@ static void +@@ -1385,6 +1406,7 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) { struct sched_rt_entity *rt_se = &p->rt; @@ -1620940,8 +1619088,9 @@ index dae1e8eaa..24f859339 100644 if (flags & ENQUEUE_WAKEUP) rt_se->timeout = 0; - +@@ -1392,7 +1414,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags); + walt_inc_cumulative_runnable_avg(rq, p); - if (!task_current(rq, p) && p->nr_cpus_allowed > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1 && @@ -1620949,46 +1619098,19 @@ index dae1e8eaa..24f859339 100644 enqueue_pushable_task(rq, p); } -@@ -1441,12 +1464,38 @@ static void yield_task_rt(struct rq *rq) - #ifdef CONFIG_SMP - static int find_lowest_rq(struct task_struct *task); - -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+/* -+ * Return whether the task on the given cpu is currently non-preemptible -+ * while handling a potentially long softint, or if the task is likely -+ * to block preemptions soon because it is a ksoftirq thread that is -+ * handling slow softints. -+ */ -+bool -+task_may_not_preempt(struct task_struct *task, int cpu) -+{ -+ __u32 softirqs = per_cpu(active_softirqs, cpu) | -+ __IRQ_STAT(cpu, __softirq_pending); -+ -+ struct task_struct *cpu_ksoftirqd = per_cpu(ksoftirqd, cpu); -+ return ((softirqs & LONG_SOFTIRQ_MASK) && -+ (task == cpu_ksoftirqd || -+ task_thread_info(task)->preempt_count & SOFTIRQ_MASK)); -+} -+EXPORT_SYMBOL_GPL(task_may_not_preempt); -+#endif /* CONFIG_RT_SOFTINT_OPTIMIZATION */ -+ - static int - select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) +@@ -1446,7 +1469,11 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) { struct task_struct *curr; struct rq *rq; + struct rq *this_cpu_rq; bool test; + int target_cpu = -1; -+ bool may_not_preempt; + bool sync = !!(flags & WF_SYNC); + int this_cpu; /* For anything but wake ups, just return the task_cpu */ if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) -@@ -1456,9 +1505,16 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) +@@ -1456,6 +1483,8 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) rcu_read_lock(); curr = READ_ONCE(rq->curr); /* unlocked access */ @@ -1620996,28 +1619118,11 @@ index dae1e8eaa..24f859339 100644 + this_cpu_rq = cpu_rq(this_cpu); /* -- * If the current task on @p's runqueue is an RT task, then -+ * If the current task on @p's runqueue is a softirq task, -+ * it may run without preemption for a time that is -+ * ill-suited for a waiting RT task. Therefore, try to -+ * wake this RT task on another runqueue. -+ * -+ * Also, if the current task on @p's runqueue is an RT task, then - * try to see if we can wake this RT task up on another - * runqueue. Otherwise simply start this RT task - * on its current runqueue. -@@ -1483,9 +1539,19 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) - * requirement of the task - which is only important on heterogeneous - * systems like big.LITTLE. - */ -- test = curr && -- unlikely(rt_task(curr)) && -- (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); -+ may_not_preempt = task_may_not_preempt(curr, cpu); -+ test = (curr && (may_not_preempt || -+ (unlikely(rt_task(curr)) && -+ (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio)))); -+ + * If the current task on @p's runqueue is an RT task, then +@@ -1487,6 +1516,15 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) + unlikely(rt_task(curr)) && + (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); + + /* + * Respect the sync flag as long as the task can run on this CPU. + */ @@ -1621026,52 +1619131,10 @@ index dae1e8eaa..24f859339 100644 + cpu = this_cpu; + goto out_unlock; + } - ++ if (test || !rt_task_fits_capacity(p, cpu)) { int target = find_lowest_rq(p); -@@ -1498,11 +1564,14 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) - goto out_unlock; - /* -- * Don't bother moving it if the destination CPU is -+ * If cpu is non-preemptible, prefer remote cpu -+ * even if it's running a higher-prio task. -+ * Otherwise: Don't bother moving it if the destination CPU is - * not running a lower priority task. - */ - if (target != -1 && -- p->prio < cpu_rq(target)->rt.highest_prio.curr) -+ (may_not_preempt || -+ p->prio < cpu_rq(target)->rt.highest_prio.curr)) - cpu = target; - } - -@@ -1682,7 +1751,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) - * Return the highest pushable rq's task, which is suitable to be executed - * on the CPU, NULL otherwise - */ --static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) -+struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) - { - struct plist_head *head = &rq->rt.pushable_tasks; - struct task_struct *p; -@@ -1697,6 +1766,7 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) - - return NULL; - } -+EXPORT_SYMBOL_GPL(pick_highest_pushable_task); - - static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); - -@@ -1733,6 +1803,8 @@ static int find_lowest_rq(struct task_struct *task) - if (!ret) - return -1; /* No targets found */ - -+ cpu = task_cpu(task); -+ - /* - * At this point we have built a mask of CPUs representing the - * lowest priority tasks in the system. Now we want to elect diff --git a/kernel/sched/sched-pelt.h b/kernel/sched/sched-pelt.h index c529706be..92a6875bc 100644 --- a/kernel/sched/sched-pelt.h @@ -1621109,66 +1619172,10 @@ index c529706be..92a6875bc 100644 +#define LOAD_AVG_PERIOD pelt_load_avg_period +#define LOAD_AVG_MAX pelt_load_avg_max diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index 08db8e095..5109a91a6 100644 +index 08db8e095..23029e657 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -431,6 +431,8 @@ struct task_group { - struct uclamp_se uclamp_req[UCLAMP_CNT]; - /* Effective clamp values used for a task group */ - struct uclamp_se uclamp[UCLAMP_CNT]; -+ /* Latency-sensitive flag used for a task group */ -+ unsigned int latency_sensitive; - #endif - - }; -@@ -842,6 +844,7 @@ extern void sched_put_rd(struct root_domain *rd); - #ifdef HAVE_RT_PUSH_IPI - extern void rto_push_irq_work_func(struct irq_work *work); - #endif -+extern struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu); - #endif /* CONFIG_SMP */ - - #ifdef CONFIG_UCLAMP_TASK -@@ -1048,6 +1051,11 @@ struct rq { - unsigned int ttwu_local; - #endif - -+#ifdef CONFIG_HOTPLUG_CPU -+ struct cpu_stop_work drain; -+ struct cpu_stop_done drain_done; -+#endif -+ - #ifdef CONFIG_CPU_IDLE - /* Must be inspected within a rcu lock section */ - struct cpuidle_state *idle_state; -@@ -1374,8 +1382,6 @@ enum numa_faults_stats { - }; - extern void sched_setnuma(struct task_struct *p, int node); - extern int migrate_task_to(struct task_struct *p, int cpu); --extern int migrate_swap(struct task_struct *p, struct task_struct *t, -- int cpu, int scpu); - extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p); - #else - static inline void -@@ -1386,6 +1392,8 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p) - - #ifdef CONFIG_SMP - -+extern int migrate_swap(struct task_struct *p, struct task_struct *t, -+ int cpu, int scpu); - static inline void - queue_balance_callback(struct rq *rq, - struct callback_head *head, -@@ -1652,6 +1660,8 @@ static __always_inline bool static_branch_##name(struct static_key *key) \ - #undef SCHED_FEAT - - extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; -+extern const char * const sched_feat_names[__SCHED_FEAT_NR]; -+ - #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) - - #else /* !CONFIG_JUMP_LABEL */ -@@ -1778,6 +1788,8 @@ extern const u32 sched_prio_to_wmult[40]; +@@ -1778,6 +1778,8 @@ extern const u32 sched_prio_to_wmult[40]; #define ENQUEUE_MIGRATED 0x00 #endif @@ -1621177,81 +1619184,8 @@ index 08db8e095..5109a91a6 100644 #define RETRY_TASK ((void *)-1UL) struct sched_class { -@@ -1901,6 +1913,7 @@ extern void trigger_load_balance(struct rq *rq); - - extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask); - -+extern unsigned long __read_mostly max_load_balance_interval; - #endif - - #ifdef CONFIG_CPU_IDLE -@@ -2456,6 +2469,11 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, - return clamp(util, min_util, max_util); - } - -+static inline bool uclamp_boosted(struct task_struct *p) -+{ -+ return uclamp_eff_value(p, UCLAMP_MIN) > 0; -+} -+ - /* - * When uclamp is compiled in, the aggregation at rq level is 'turned off' - * by default in the fast path and only gets turned on once userspace performs -@@ -2476,12 +2494,36 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, - return util; - } - -+static inline bool uclamp_boosted(struct task_struct *p) -+{ -+ return false; -+} -+ - static inline bool uclamp_is_used(void) - { - return false; - } - #endif /* CONFIG_UCLAMP_TASK */ - -+#ifdef CONFIG_UCLAMP_TASK_GROUP -+static inline bool uclamp_latency_sensitive(struct task_struct *p) -+{ -+ struct cgroup_subsys_state *css = task_css(p, cpu_cgrp_id); -+ struct task_group *tg; -+ -+ if (!css) -+ return false; -+ tg = container_of(css, struct task_group, css); -+ -+ return tg->latency_sensitive; -+} -+#else -+static inline bool uclamp_latency_sensitive(struct task_struct *p) -+{ -+ return false; -+} -+#endif /* CONFIG_UCLAMP_TASK_GROUP */ -+ - #ifdef arch_scale_freq_capacity - # ifndef arch_scale_freq_invariant - # define arch_scale_freq_invariant() true -@@ -2644,3 +2686,15 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) - - void swake_up_all_locked(struct swait_queue_head *q); - void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); -+ -+/* -+ * task_may_not_preempt - check whether a task may not be preemptible soon -+ */ -+#ifdef CONFIG_RT_SOFTINT_OPTIMIZATION -+extern bool task_may_not_preempt(struct task_struct *task, int cpu); -+#else -+static inline bool task_may_not_preempt(struct task_struct *task, int cpu) -+{ -+ return false; -+} -+#endif /* CONFIG_RT_SOFTINT_OPTIMIZATION */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c -index 004e9505f..a351e79fd 100644 +index 004e9505f..44b74cdb2 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -5,6 +5,9 @@ @@ -1621264,34 +1619198,6 @@ index 004e9505f..a351e79fd 100644 /* Protected by sched_domains_mutex: */ static cpumask_var_t sched_domains_tmpmask; -@@ -343,7 +346,6 @@ static void sched_energy_set(bool has_eas) - */ - #define EM_MAX_COMPLEXITY 2048 - --extern struct cpufreq_governor schedutil_gov; - static bool build_perf_domains(const struct cpumask *cpu_map) - { - int i, nr_pd = 0, nr_ps = 0, nr_cpus = cpumask_weight(cpu_map); -@@ -377,19 +379,6 @@ static bool build_perf_domains(const struct cpumask *cpu_map) - if (find_pd(pd, i)) - continue; - -- /* Do not attempt EAS if schedutil is not being used. */ -- policy = cpufreq_cpu_get(i); -- if (!policy) -- goto free; -- gov = policy->governor; -- cpufreq_cpu_put(policy); -- if (gov != &schedutil_gov) { -- if (rd->pd) -- pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n", -- cpumask_pr_args(cpu_map)); -- goto free; -- } -- - /* Create the new pd and add it to the local list. */ - tmp = pd_init(i); - if (!tmp) diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 21005b980..6cdf93634 100644 --- a/kernel/sched/wait.c @@ -1621325,84 +1619231,6 @@ index 21005b980..6cdf93634 100644 { /* Pairs with the smp_store_mb() in wait_woken(). */ smp_mb(); /* C */ -diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c -index 890b79cf0..c65cfb7f7 100644 ---- a/kernel/stop_machine.c -+++ b/kernel/stop_machine.c -@@ -22,16 +22,7 @@ - #include - #include - #include -- --/* -- * Structure to determine completion condition and record errors. May -- * be shared by works on different cpus. -- */ --struct cpu_stop_done { -- atomic_t nr_todo; /* nr left to execute */ -- int ret; /* collected return value */ -- struct completion completion; /* fired if nr_todo reaches 0 */ --}; -+#include - - /* the actual stopper, one per every possible cpu, enabled on online cpus */ - struct cpu_stopper { -@@ -370,6 +361,55 @@ bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, - *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; - return cpu_stop_queue_work(cpu, work_buf); - } -+EXPORT_SYMBOL_GPL(stop_one_cpu_nowait); -+ -+/** -+ * stop_one_cpu_async - stop a cpu and wait for completion in a separated -+ * function: stop_wait_work() -+ * @cpu: cpu to stop -+ * @fn: function to execute -+ * @arg: argument to @fn -+ * @work_buf: pointer to cpu_stop_work structure -+ * -+ * CONTEXT: -+ * Might sleep. -+ * -+ * RETURNS: -+ * 0 if cpu_stop_work was queued successfully and @fn will be called. -+ * ENOENT if @fn(@arg) was not executed because @cpu was offline. -+ */ -+int stop_one_cpu_async(unsigned int cpu, cpu_stop_fn_t fn, void *arg, -+ struct cpu_stop_work *work_buf, -+ struct cpu_stop_done *done) -+{ -+ cpu_stop_init_done(done, 1); -+ -+ work_buf->done = done; -+ work_buf->fn = fn; -+ work_buf->arg = arg; -+ -+ if (cpu_stop_queue_work(cpu, work_buf)) -+ return 0; -+ -+ work_buf->done = NULL; -+ -+ return -ENOENT; -+} -+ -+/** -+ * cpu_stop_work_wait - wait for a stop initiated by stop_one_cpu_async(). -+ * @work_buf: pointer to cpu_stop_work structure -+ * -+ * CONTEXT: -+ * Might sleep. -+ */ -+void cpu_stop_work_wait(struct cpu_stop_work *work_buf) -+{ -+ struct cpu_stop_done *done = work_buf->done; -+ -+ wait_for_completion(&done->completion); -+ work_buf->done = NULL; -+} - - static bool queue_stop_cpus_work(const struct cpumask *cpumask, - cpu_stop_fn_t fn, void *arg, diff --git a/make-ohos.sh b/make-ohos.sh new file mode 100755 index 000000000..e737baa28 -- Gitee