From 17f3f0be6369223df0dcf473bd15def1981357bb Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Wed, 26 Jun 2024 10:35:05 +0800 Subject: [PATCH 1/4] sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy stable inclusion from stable-v6.6.54 commit 5a4f8de92dd7ddaa7ce33aa804cbad254b844b8f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAZ3K2 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=5a4f8de92dd7ddaa7ce33aa804cbad254b844b8f -------------------------------- [ Upstream commit faa42d29419def58d3c3e5b14ad4037f0af3b496 ] Consider the following cgroup: root | ------------------------ | | normal_cgroup idle_cgroup | | SCHED_IDLE task_A SCHED_NORMAL task_B According to the cgroup hierarchy, A should preempt B. But current check_preempt_wakeup_fair() treats cgroup se and task separately, so B will preempt A unexpectedly. Unify the wakeup logic by {c,p}se_is_idle only. This makes SCHED_IDLE of a task a relative policy that is effective only within its own cgroup, similar to the behavior of NICE. Also fix se_is_idle() definition when !CONFIG_FAIR_GROUP_SCHED. Fixes: 304000390f88 ("sched: Cgroup SCHED_IDLE support") Signed-off-by: Tianchen Ding Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Josh Don Reviewed-by: Vincent Guittot Link: https://lkml.kernel.org/r/20240626023505.1332596-1-dtcccc@linux.alibaba.com Signed-off-by: Sasha Levin Conflicts: kernel/sched/fair.c Signed-off-by: Wen Zhiwei --- kernel/sched/fair.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f1cd57e70f1f..086dd0dc1b06 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -675,7 +675,7 @@ static int cfs_rq_is_idle(struct cfs_rq *cfs_rq) static int se_is_idle(struct sched_entity *se) { - return 0; + return task_has_idle_policy(task_of(se)); } #endif /* CONFIG_FAIR_GROUP_SCHED */ @@ -9317,16 +9317,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (test_tsk_need_resched(curr)) return; - /* Idle tasks are by definition preempted by non-idle tasks. */ - if (unlikely(task_has_idle_policy(curr)) && - likely(!task_has_idle_policy(p))) - goto preempt; - - /* - * Batch and idle tasks do not preempt non-idle tasks (their preemption - * is driven by the tick): - */ - if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION)) + if (!sched_feat(WAKEUP_PREEMPTION)) return; find_matching_se(&se, &pse); @@ -9336,7 +9327,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ pse_is_idle = se_is_idle(pse); /* - * Preempt an idle group in favor of a non-idle group (and don't preempt + * Preempt an idle entity in favor of a non-idle entity (and don't preempt * in the inverse case). */ if (cse_is_idle && !pse_is_idle) @@ -9344,9 +9335,14 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (cse_is_idle != pse_is_idle) return; + /* + * BATCH and IDLE tasks do not preempt others. + */ + if (unlikely(p->policy != SCHED_NORMAL)) + return; + cfs_rq = cfs_rq_of(se); update_curr(cfs_rq); - /* * XXX pick_eevdf(cfs_rq) != se ? */ -- Gitee From 3f7973aa40d1668d2df3b6322a9700eb9d0fa69e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 9 Aug 2024 17:52:55 +0200 Subject: [PATCH 2/4] xen: allow mapping ACPI data using a different physical address stable inclusion from stable-v6.6.54 commit adbb44539b56f15dc94701a43aa21ded41635c99 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAZ3K2 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=adbb44539b56f15dc94701a43aa21ded41635c99 -------------------------------- commit 9221222c717dbddac1e3c49906525475d87a3a44 upstream. When running as a Xen PV dom0 the system needs to map ACPI data of the host using host physical addresses, while those addresses can conflict with the guest physical addresses of the loaded linux kernel. The same problem might apply in case a PV guest is configured to use the host memory map. This conflict can be solved by mapping the ACPI data to a different guest physical address, but mapping the data via acpi_os_ioremap() must still be possible using the host physical address, as this address might be generated by AML when referencing some of the ACPI data. When configured to support running as a Xen PV domain, have an implementation of acpi_os_ioremap() being aware of the possibility to need above mentioned translation of a host physical address to the guest physical address. This modification requires to #include linux/acpi.h in some sources which need to include asm/acpi.h directly. Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Greg Kroah-Hartman Conflicts: arch/x86/include/asm/acpi.h arch/x86/kernel/acpi/boot.c arch/x86/kernel/jailhouse.c arch/x86/kernel/mmconf-fam10h_64.c arch/x86/kernel/smpboot.c arch/x86/kernel/x86_init.c arch/x86/xen/p2m.c arch/x86/xen/setup.c Signed-off-by: Wen Zhiwei --- arch/x86/include/asm/acpi.h | 8 +++++++ arch/x86/kernel/acpi/boot.c | 11 ++++++++++ arch/x86/kernel/jailhouse.c | 1 + arch/x86/kernel/mmconf-fam10h_64.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/kernel/x86_init.c | 1 + arch/x86/xen/p2m.c | 35 ++++++++++++++++++++++++++++++ arch/x86/xen/setup.c | 2 +- 8 files changed, 59 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 32369a488849..e3a6de3e2677 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -166,6 +166,14 @@ void acpi_generic_reduced_hw_init(void); void x86_default_set_root_pointer(u64 addr); u64 x86_default_get_root_pointer(void); +#ifdef CONFIG_XEN_PV +/* A Xen PV domain needs a special acpi_os_ioremap() handling. */ +extern void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys, + acpi_size size); +void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +#define acpi_os_ioremap acpi_os_ioremap +#endif + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c55c0ef47a18..04a7d8cf5d55 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1901,3 +1901,14 @@ u64 x86_default_get_root_pointer(void) { return boot_params.acpi_rsdp_addr; } + +#ifdef CONFIG_XEN_PV +void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +{ + return ioremap_cache(phys, size); +} + +void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys, acpi_size size) = + x86_acpi_os_ioremap; +EXPORT_SYMBOL_GPL(acpi_os_ioremap); +#endif diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 578d16fc040f..5481c7c5db30 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index c94dec6a1834..1f54eedc3015 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed3e07b878f1..590e62066ca3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 3f0718b4a7d2..268627a17cf0 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6cb8dae768fa..5b02bba600cf 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include @@ -836,6 +837,34 @@ void __init xen_do_remap_nonram(void) pr_info("Remapped %u non-RAM page(s)\n", remapped); } +#ifdef CONFIG_ACPI +/* + * Xen variant of acpi_os_ioremap() taking potentially remapped non-RAM + * regions into account. + * Any attempt to map an area crossing a remap boundary will produce a + * WARN() splat. + * phys is related to remap->maddr on input and will be rebased to remap->paddr. + */ +static void __iomem *xen_acpi_os_ioremap(acpi_physical_address phys, + acpi_size size) +{ + unsigned int i; + const struct nonram_remap *remap = xen_nonram_remap; + + for (i = 0; i < nr_nonram_remap; i++) { + if (phys + size > remap->maddr && + phys < remap->maddr + remap->size) { + WARN_ON(phys < remap->maddr || + phys + size > remap->maddr + remap->size); + phys += remap->paddr - remap->maddr; + break; + } + } + + return x86_acpi_os_ioremap(phys, size); +} +#endif /* CONFIG_ACPI */ + /* * Add a new non-RAM remap entry. * In case of no free entry found, just crash the system. @@ -850,6 +879,12 @@ void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, BUG(); } +#ifdef CONFIG_ACPI + /* Switch to the Xen acpi_os_ioremap() variant. */ + if (nr_nonram_remap == 0) + acpi_os_ioremap = xen_acpi_os_ioremap; +#endif + xen_nonram_remap[nr_nonram_remap].maddr = maddr; xen_nonram_remap[nr_nonram_remap].paddr = paddr; xen_nonram_remap[nr_nonram_remap].size = size; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1a426d7c1d0d..dc822124cacb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -15,12 +15,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include -- Gitee From c4f98fd6611c2d4b8f893e75d16881a6f9e2b82e Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Tue, 27 Aug 2024 14:17:01 -0700 Subject: [PATCH 3/4] arm64: errata: Enable the AC03_CPU_38 workaround for ampere1a stable inclusion from stable-v6.6.54 commit 0e6774ec012bf8bf28eac2a00478d7e0639a3a8f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAZ3K2 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=0e6774ec012bf8bf28eac2a00478d7e0639a3a8f -------------------------------- commit db0d8a84348b876df7c4276f0cbce5df3b769f5f upstream. The ampere1a cpu is affected by erratum AC04_CPU_10 which is the same bug as AC03_CPU_38. Add ampere1a to the AC03_CPU_38 workaround midr list. Cc: Signed-off-by: D Scott Phillips Acked-by: Oliver Upton Link: https://lore.kernel.org/r/20240827211701.2216719-1-scott@os.amperecomputing.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman Conflicts: Documentation/arch/arm64/silicon-errata.rst arch/arm64/Kconfig arch/arm64/include/asm/cputype.h arch/arm64/kernel/cpu_errata.c Signed-off-by: Wen Zhiwei --- Documentation/arch/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/cpu_errata.c | 10 +++++++++- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index bc1c1ba2fe56..efd57b87f096 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -54,6 +54,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Ampere | AmpereOne | AC03_CPU_38 | AMPERE_ERRATUM_AC03_CPU_38 | +----------------+-----------------+-----------------+-----------------------------+ +| Ampere | AmpereOne AC04 | AC04_CPU_10 | AMPERE_ERRATUM_AC03_CPU_38 | ++----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f917ba077fb4..49a179046d15 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -427,7 +427,7 @@ config AMPERE_ERRATUM_AC03_CPU_38 default y help This option adds an alternative code sequence to work around Ampere - erratum AC03_CPU_38 on AmpereOne. + errata AC03_CPU_38 and AC04_CPU_10 on AmpereOne. The affected design reports FEAT_HAFDBS as not implemented in ID_AA64MMFR1_EL1.HAFDBS, but (V)TCR_ELx.{HA,HD} are not RES0 diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 30a437f4d292..5c1cadeb032d 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -152,6 +152,7 @@ #define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define AMPERE_CPU_PART_AMPERE1A 0xAC4 #define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ @@ -229,6 +230,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A) #define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 46813132a09f..f4533d0ba363 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -526,6 +526,14 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { }; #endif +#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 +static const struct midr_range erratum_ac03_cpu_38_list[] = { + MIDR_ALL_VERSIONS(MIDR_AMPERE1), + MIDR_ALL_VERSIONS(MIDR_AMPERE1A), + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -852,7 +860,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "AmpereOne erratum AC03_CPU_38", .capability = ARM64_WORKAROUND_AMPERE_AC03_CPU_38, - ERRATA_MIDR_ALL_VERSIONS(MIDR_AMPERE1), + ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list), }, #endif #ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH -- Gitee From fd6e67529e95d8bd73ff3b88a7a2ec45a75078c0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 4 Oct 2023 11:38:53 +0100 Subject: [PATCH 4/4] btrfs: update comment for struct btrfs_inode::lock stable inclusion from stable-v6.6.54 commit 971d03cd457a1c701bf4cc0d299f8a48d0f49037 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAZ3K2 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=971d03cd457a1c701bf4cc0d299f8a48d0f49037 -------------------------------- [ Upstream commit 68539bd0e73b457f88a9d00cabb6533ec8582dc9 ] Update the comment for the lock named "lock" in struct btrfs_inode because it does not mention that the fields "delalloc_bytes", "defrag_bytes", "csum_bytes", "outstanding_extents" and "disk_i_size" are also protected by that lock. Also add a comment on top of each field protected by this lock to mention that the lock protects them. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Stable-dep-of: 7ee85f5515e8 ("btrfs: fix race setting file private on concurrent lseek using same fd") Signed-off-by: Sasha Levin Conflicts: fs/btrfs/btrfs_inode.h Signed-off-by: Wen Zhiwei --- fs/btrfs/btrfs_inode.h | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 095ab3e4303a..ec6679a538c1 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -82,8 +82,9 @@ struct btrfs_inode { /* * Lock for counters and all fields used to determine if the inode is in * the log or not (last_trans, last_sub_trans, last_log_commit, - * logged_trans), to access/update new_delalloc_bytes and to update the - * VFS' inode number of bytes used. + * logged_trans), to access/update delalloc_bytes, new_delalloc_bytes, + * defrag_bytes, disk_i_size, outstanding_extents, csum_bytes and to + * update the VFS' inode number of bytes used. * Also protects setting struct file::private_data. */ spinlock_t lock; @@ -107,7 +108,7 @@ struct btrfs_inode { * Counters to keep track of the number of extent item's we may use due * to delalloc and such. outstanding_extents is the number of extent * items we think we'll end up using, and reserved_extents is the number - * of extent items we've reserved metadata for. + * of extent items we've reserved metadata for. Protected by 'lock'. */ unsigned outstanding_extents; @@ -131,28 +132,31 @@ struct btrfs_inode { u64 generation; /* - * transid of the trans_handle that last modified this inode + * ID of the transaction handle that last modified this inode. + * Protected by 'lock'. */ u64 last_trans; /* - * transid that last logged this inode + * ID of the transaction that last logged this inode. + * Protected by 'lock'. */ u64 logged_trans; /* - * log transid when this inode was last modified + * Log transaction ID when this inode was last modified. + * Protected by 'lock'. */ int last_sub_trans; - /* a local copy of root's last_log_commit */ + /* A local copy of root's last_log_commit. Protected by 'lock'. */ int last_log_commit; union { /* * Total number of bytes pending delalloc, used by stat to * calculate the real block usage of the file. This is used - * only for files. + * only for files. Protected by 'lock'. */ u64 delalloc_bytes; /* @@ -170,7 +174,7 @@ struct btrfs_inode { * Total number of bytes pending delalloc that fall within a file * range that is either a hole or beyond EOF (and no prealloc extent * exists in the range). This is always <= delalloc_bytes and this - * is used only for files. + * is used only for files. Protected by 'lock'. */ u64 new_delalloc_bytes; /* @@ -181,15 +185,15 @@ struct btrfs_inode { }; /* - * total number of bytes pending defrag, used by stat to check whether - * it needs COW. + * Total number of bytes pending defrag, used by stat to check whether + * it needs COW. Protected by 'lock'. */ u64 defrag_bytes; /* - * the size of the file stored in the metadata on disk. data=ordered + * The size of the file stored in the metadata on disk. data=ordered * means the in-memory i_size might be larger than the size on disk - * because not all the blocks are written yet. + * because not all the blocks are written yet. Protected by 'lock'. */ u64 disk_i_size; @@ -223,7 +227,7 @@ struct btrfs_inode { /* * Number of bytes outstanding that are going to need csums. This is - * used in ENOSPC accounting. + * used in ENOSPC accounting. Protected by 'lock'. */ u64 csum_bytes; -- Gitee