From fefb231d4e1c5e86c81932a17b44f29ebcee15bb Mon Sep 17 00:00:00 2001 From: Ferry Meng Date: Tue, 28 Oct 2025 19:25:03 +0800 Subject: [PATCH 1/9] The naming of xdragon seems to have no practical significance, so delete it. ANBZ: #26358 commit 7f85351c43b38e38ebe3b2a2fdf3789b7ba9725a upstream anolis: virtio-blk: rename virtio-blk extra features negotiation function Signed-off-by: Ferry Meng Reviewed-by: Jingbo Xu Reviewed-by: Gao Xiang Link: https://gitee.com/anolis/cloud-kernel/pulls/5925 --- drivers/block/virtio_blk_ext.c | 80 +++++++++++++++++----------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/block/virtio_blk_ext.c b/drivers/block/virtio_blk_ext.c index f661f09d3150..8d72e2c396f4 100644 --- a/drivers/block/virtio_blk_ext.c +++ b/drivers/block/virtio_blk_ext.c @@ -17,29 +17,29 @@ #define VIRTIO_PCI_GUEST_VNDR_SPEC_FEATURES 0x10 -/* xdragon vsc */ +/* vsc */ #define PCI_CAP_ID_VNDR 0x09 /* Vendor specific */ -#define PCI_XDRAGON_VSC_CFGTYPE 0xff +#define PCI_VSC_CFGTYPE 0xff -/* xdragon vsec */ +/* vsec */ #define PCI_EXT_CAP_ID_VNDR 0x0B -#define PCI_EXP_XDRAGON_VSEC_CFGTYPE 0xff -#define XDRAGON_VSEC_VERSION 2 - -#define XDRAGON_XVCS_MAGIC 0x53435658 -#define XDRAGON_XVCS_VSF_KEY "xvcs-vsf" -#define XDRAGON_XVCS_VERSION 1 -#define XDRAGON_XVCS_NUM_MAX 32U -#define XDRAGON_XVCS_KEY_MAX 16 - -#define XDRAGON_XVCS_O_MAGIC 0 -#define XDRAGON_XVCS_O_VER 4 -#define XDRAGON_XVCS_O_ADDR 12 -#define XDRAGON_XVCS_O_F_CNT 16 -#define XDRAGON_XVCS_O_CUR 16 -#define XDRAGON_XVCS_O_NEXT 20 -#define XDRAGON_XVCS_O_VSF 32 -static void xdragon_read_xvcs(struct pci_dev *d, u32 pos, +#define PCI_EXP_VSEC_CFGTYPE 0xff +#define VSEC_VERSION 2 + +#define XVCS_MAGIC 0x53435658 +#define XVCS_VSF_KEY "xvcs-vsf" +#define XVCS_VERSION 1 +#define XVCS_NUM_MAX 32U +#define XVCS_KEY_MAX 16 + +#define XVCS_O_MAGIC 0 +#define XVCS_O_VER 4 +#define XVCS_O_ADDR 12 +#define XVCS_O_F_CNT 16 +#define XVCS_O_CUR 16 +#define XVCS_O_NEXT 20 +#define XVCS_O_VSF 32 +static void read_xvcs(struct pci_dev *d, u32 pos, u32 cap_len, u32 addr, u32 num, void *data) { u32 idx, where; @@ -51,54 +51,54 @@ static void xdragon_read_xvcs(struct pci_dev *d, u32 pos, } } -static int xdragon_vcs_find_vsf_bar0_offset(struct pci_dev *dev, uint32_t cap_len, +static int vcs_find_vsf_bar0_offset(struct pci_dev *dev, uint32_t cap_len, uint32_t pos, u32 *bar0_offset) { - u8 buf[XDRAGON_XVCS_KEY_MAX+1]; + u8 buf[XVCS_KEY_MAX+1]; u32 where; u32 idx, num; u32 reg; /* check xvcs magic */ - xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_MAGIC, sizeof(reg), ®); - if (reg != XDRAGON_XVCS_MAGIC) { + read_xvcs(dev, pos, cap_len, XVCS_O_MAGIC, sizeof(reg), ®); + if (reg != XVCS_MAGIC) { pr_err("%s: xvcs magic 0x%x not match\n", __func__, reg); return -1; } /* check xvcs version */ - xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_VER, sizeof(reg), ®); - if (reg != XDRAGON_XVCS_VERSION) { + read_xvcs(dev, pos, cap_len, XVCS_O_VER, sizeof(reg), ®); + if (reg != XVCS_VERSION) { pr_err("%s: xvcs version 0x%x not match\n", __func__, reg); return -1; } /* xvcs feat block addr */ - xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_ADDR, sizeof(reg), ®); + read_xvcs(dev, pos, cap_len, XVCS_O_ADDR, sizeof(reg), ®); where = reg; /* xvcs feat cnt */ - xdragon_read_xvcs(dev, pos, cap_len, XDRAGON_XVCS_O_F_CNT, sizeof(reg), ®); + read_xvcs(dev, pos, cap_len, XVCS_O_F_CNT, sizeof(reg), ®); num = reg; - for (idx = 0; (idx < min(XDRAGON_XVCS_NUM_MAX, num)) && (where > 0); idx++) { + for (idx = 0; (idx < min(XVCS_NUM_MAX, num)) && (where > 0); idx++) { memset(buf, 0, sizeof(buf)); /* self addr check */ - xdragon_read_xvcs(dev, pos, cap_len, - where + XDRAGON_XVCS_O_CUR, sizeof(reg), ®); + read_xvcs(dev, pos, cap_len, + where + XVCS_O_CUR, sizeof(reg), ®); if (reg != where) return -1; /* check key */ - xdragon_read_xvcs(dev, pos, cap_len, where, XDRAGON_XVCS_KEY_MAX, buf); + read_xvcs(dev, pos, cap_len, where, XVCS_KEY_MAX, buf); /* found vsf */ - if (strncmp(buf, XDRAGON_XVCS_VSF_KEY, sizeof(XDRAGON_XVCS_VSF_KEY)) == 0) { - xdragon_read_xvcs(dev, pos, cap_len, where + XDRAGON_XVCS_O_VSF, + if (strncmp(buf, XVCS_VSF_KEY, sizeof(XVCS_VSF_KEY)) == 0) { + read_xvcs(dev, pos, cap_len, where + XVCS_O_VSF, sizeof(reg), ®); *bar0_offset = reg; return 0; } /* next vcs feat */ - xdragon_read_xvcs(dev, pos, cap_len, - where + XDRAGON_XVCS_O_NEXT, sizeof(reg), ®); + read_xvcs(dev, pos, cap_len, + where + XVCS_O_NEXT, sizeof(reg), ®); where = reg; } pr_err("%s: vsf offset not found\n", __func__); @@ -118,7 +118,7 @@ int virtblk_get_ext_feature_bar(struct virtio_device *vdev, u32 *bar_offset) vsec > 0; vsec = pci_find_next_capability(dev, vsec, PCI_CAP_ID_VNDR)) { pci_read_config_byte(dev, vsec + offsetof(struct virtio_pci_cap, cfg_type), &type); - if (type == PCI_XDRAGON_VSC_CFGTYPE) { + if (type == PCI_VSC_CFGTYPE) { pci_read_config_byte(dev, vsec + offsetof(struct virtio_pci_cap, cap_len), &len); cap_len = len; @@ -134,10 +134,10 @@ int virtblk_get_ext_feature_bar(struct virtio_device *vdev, u32 *bar_offset) PCI_EXT_CAP_ID_VNDR))) { pci_read_config_word(dev, vsec + 0x4, &val); /* vsec found */ - if (val == PCI_EXP_XDRAGON_VSEC_CFGTYPE) { + if (val == PCI_EXP_VSEC_CFGTYPE) { /* get vsec cap len */ pci_read_config_word(dev, vsec + 0x6, &val); - if ((val & 0xF) != XDRAGON_VSEC_VERSION) + if ((val & 0xF) != VSEC_VERSION) continue; cap_len = (val >> 4) & (0xFFF); found = true; @@ -146,7 +146,7 @@ int virtblk_get_ext_feature_bar(struct virtio_device *vdev, u32 *bar_offset) } } - return found ? xdragon_vcs_find_vsf_bar0_offset(dev, cap_len, vsec, bar_offset) : -1; + return found ? vcs_find_vsf_bar0_offset(dev, cap_len, vsec, bar_offset) : -1; } int virtblk_get_ext_feature(void __iomem *ioaddr, u32 *host_features) -- Gitee From d7714319cd171be5ed9cb90e3c93e1a3c74a2fea Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 7 Jun 2023 15:54:33 +0800 Subject: [PATCH 2/9] x86/tsc: Extend watchdog check exemption to 4-Sockets platform ANBZ: #26358 commit 233756a640be811efae33763db718fe29753b1e9 upstream. There were reports again that the tsc clocksource on 4 sockets x86 servers was wrongly judged as 'unstable' by 'jiffies' and other watchdogs, and disabled [1][2]. Commit b50db7095fe0 ("x86/tsc: Disable clocksource watchdog for TSC on qualified platorms") was introduce to deal with these false alarms of tsc unstable issues, covering qualified platforms for 2 sockets or smaller ones. And from history of chasing TSC issues, Thomas and Peter only saw real TSC synchronization issue on 8 socket machines. So extend the exemption to 4 sockets to fix the issue. Rui also proposed another way to disable 'jiffies' as clocksource watchdog [3], which can also solve problem in [1]. in an architecture independent way, but can't cure the problem in [2]. whose watchdog is HPET or PMTIMER, while 'jiffies' is mostly used as watchdog in boot phase. 'nr_online_nodes' has known inaccurate problem for cases like platform with cpu-less memory nodes, sub numa cluster enabled, fakenuma, kernel cmdline parameter 'maxcpus=', etc. The harmful case is the 'maxcpus' one which could possibly under estimates the package number, and disable the watchdog, but bright side is it is mostly for debug usage. All these will be addressed in other patches, as discussed in thread [4]. [1]. https://lore.kernel.org/all/9d3bf570-3108-0336-9c52-9bee15767d29@huawei.com/ [2]. https://lore.kernel.org/lkml/06df410c-2177-4671-832f-339cff05b1d9@paulmck-laptop/ [3]. https://lore.kernel.org/all/bd5b97f89ab2887543fc262348d1c7cafcaae536.camel@intel.com/ [4]. https://lore.kernel.org/all/20221021062131.1826810-1-feng.tang@intel.com/ Hygon-SIG: commit 233756a640be upstream x86/tsc: Extend watchdog check exemption to 4-Sockets platform Reported-by: Yu Liao Reported-by: Paul E. McKenney Signed-off-by: Feng Tang Signed-off-by: Paul E. McKenney [ Wenhui Fan: amend commit log ] Signed-off-by: Wenhui Fan Cc: hygon-arch@list.openanolis.cn Reviewed-by: Xiaochen Shen Reviewed-by: Guanghui Feng Reviewed-by: Guixin Liu Link: https://gitee.com/anolis/cloud-kernel/pulls/5911 --- arch/x86/kernel/tsc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 13d1a0ac8916..a535756f456f 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1216,7 +1216,7 @@ static void __init check_system_tsc_reliable(void) if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && boot_cpu_has(X86_FEATURE_TSC_ADJUST) && - nr_online_nodes <= 2) + nr_online_nodes <= 4) tsc_disable_clocksource_watchdog(); } -- Gitee From 7200e2abd2f6594a340f997fc1e35908d1825048 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 29 Jul 2024 10:12:02 +0800 Subject: [PATCH 3/9] x86/tsc: Use topology_max_packages() to get package number ANBZ: #26358 commit b4bac279319d3082eb42f074799c7b18ba528c71 upstream. Commit b50db7095fe0 ("x86/tsc: Disable clocksource watchdog for TSC on qualified platorms") was introduced to solve problem that sometimes TSC clocksource is wrongly judged as unstable by watchdog like 'jiffies', HPET, etc. In it, the hardware package number is a key factor for judging whether to disable the watchdog for TSC, and 'nr_online_nodes' was chosen due to, at that time (kernel v5.1x), it is available in early boot phase before registering 'tsc-early' clocksource, where all non-boot CPUs are not brought up yet. Dave and Rui pointed out there are many cases in which 'nr_online_nodes' is cheated and not accurate, like: * SNC (sub-numa cluster) mode enabled * numa emulation (numa=fake=8 etc.) * numa=off * platforms with CPU-less HBM nodes, CPU-less Optane memory nodes. * 'maxcpus=' cmdline setup, where chopped CPUs could be onlined later * 'nr_cpus=', 'possible_cpus=' cmdline setup, where chopped CPUs can not be onlined after boot The SNC case is the most user-visible case, as many CSP (Cloud Service Provider) enable this feature in their server fleets. When SNC3 enabled, a 2 socket machine will appear to have 6 NUMA nodes, and get impacted by the issue in reality. Thomas' recent patchset of refactoring x86 topology code improves topology_max_packages() greatly, by making it more accurate and available in early boot phase, which works well in most of the above cases. The only exceptions are 'nr_cpus=' and 'possible_cpus=' setup, which may under-estimate the package number. As during topology setup, the boot CPU iterates through all enumerated APIC IDs and either accepts or rejects the APIC ID. For accepted IDs, it figures out which bits of the ID map to the package number. It tracks which package numbers have been seen in a bitmap. topology_max_packages() just returns the number of bits set in that bitmap. 'nr_cpus=' and 'possible_cpus=' can cause more APIC IDs to be rejected and can artificially lower the number of bits in the package bitmap and thus topology_max_packages(). This means that, for example, a system with 8 physical packages might reject all the CPUs on 6 of those packages and be left with only 2 packages and 2 bits set in the package bitmap. It needs the TSC watchdog, but would disable it anyway. This isn't ideal, but it only happens for debug-oriented options. This is fixable by tracking the package numbers for rejected CPUs. But it's not worth the trouble for debugging. So use topology_max_packages() to replace nr_online_nodes(). Hygon-SIG: commit b4bac279319d upstream x86/tsc: Use topology_max_packages() to get package number. Reported-by: Dave Hansen Signed-off-by: Feng Tang Signed-off-by: Thomas Gleixner Reviewed-by: Waiman Long [ Wenhui Fan: amend commit log ] Signed-off-by: Wenhui Fan Cc: hygon-arch@list.openanolis.cn Link: https://lore.kernel.org/all/20240729021202.180955-1-feng.tang@intel.com Closes: https://lore.kernel.org/lkml/a4860054-0f16-6513-f121-501048431086@intel.com/ Reviewed-by: Xiaochen Shen Reviewed-by: Guanghui Feng Reviewed-by: Guixin Liu Link: https://gitee.com/anolis/cloud-kernel/pulls/5911 --- arch/x86/kernel/tsc.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index a535756f456f..0cd1a8b1202a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -27,6 +27,7 @@ #include #include #include +#include #include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ @@ -1208,15 +1209,12 @@ static void __init check_system_tsc_reliable(void) * - TSC which does not stop in C-States * - the TSC_ADJUST register which allows to detect even minimal * modifications - * - not more than two sockets. As the number of sockets cannot be - * evaluated at the early boot stage where this has to be - * invoked, check the number of online memory nodes as a - * fallback solution which is an reasonable estimate. + * - not more than four packages */ if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && boot_cpu_has(X86_FEATURE_TSC_ADJUST) && - nr_online_nodes <= 4) + topology_max_packages() <= 4) tsc_disable_clocksource_watchdog(); } -- Gitee From 8706c8fcd06d9d9f24a742bbecc11b5dd0dc84d8 Mon Sep 17 00:00:00 2001 From: Wenhui Fan Date: Tue, 28 Oct 2025 14:11:03 +0800 Subject: [PATCH 4/9] anolis: EDAC/amd64: The width of hash value is 2 bits for Hygon family 18h model 6h processors ANBZ: #26358 It has 2 bits hash value when hash enabled for hygon family 18h model 6h. Hygon-SIG: commit none hygon anolis: EDAC/amd64: The width of hash value is 2 bits for Hygon family 18h model 6h processors Signed-off-by: Wenhui Fan Cc: hygon-arch@list.openanolis.cn Reviewed-by: Guixin Liu Link: https://gitee.com/anolis/cloud-kernel/pulls/5912 --- drivers/edac/amd64_edac.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8c73fa46d9c3..f3d358eefe22 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -891,7 +891,11 @@ static int hygon_umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, case 5: intlv_num_chan = 3; break; case 7: intlv_num_chan = 4; break; - case 8: intlv_num_chan = 1; + case 8: + if (boot_cpu_data.x86_model >= 0x6) + intlv_num_chan = 2; + else + intlv_num_chan = 1; hash_enabled = true; break; default: @@ -1035,10 +1039,16 @@ static int hygon_umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, (ctx.ret_addr >> 30) ^ cs_id; - hashed_bit &= BIT(0); - - if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) - ctx.ret_addr ^= BIT(intlv_addr_bit); + if (boot_cpu_data.x86_model >= 0x6) { + hashed_bit &= 0x3; + if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & 0x3)) + ctx.ret_addr = (ctx.ret_addr & ~((u64)3 << intlv_addr_bit)) | + (hashed_bit << intlv_addr_bit); + } else { + hashed_bit &= BIT(0); + if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) + ctx.ret_addr ^= BIT(intlv_addr_bit); + } } /* The channel hashing process. */ -- Gitee From 8f450714093edf9c3c62e6b4fadb67ac8b6c87bd Mon Sep 17 00:00:00 2001 From: Zhiguang Ni Date: Mon, 27 Oct 2025 16:49:00 +0000 Subject: [PATCH 5/9] anolis: x86/csv: Enhance CMA memory usage reporting for CSV3 ANBZ: #26358 Hygon-SIG: commit none hygon x86/csv: Enhance CMA memory usage reporting for CSV3 Provide more detailed reporting of CMA memory usage for CSV3. Main changes: 1. Display memory usage of CSV3 guest NPT page tables. 2. Display memory usage of CSV3 private memory. 3. Remove the "free" memory field and add "csv used" memory information. 4.Get CSV3 meminfo by "/sys/kernel/mm/csv3_cma/mem_info". This improvement helps to better observe and analyze memory allocation behavior for CSV3 guests. Signed-off-by: Zhiguang Ni Cc: hygon-arch@list.openanolis.cn Reviewed-by: Guixin Liu Link: https://gitee.com/anolis/cloud-kernel/pulls/5919 --- arch/x86/include/asm/csv.h | 5 +++ arch/x86/kvm/svm/csv.c | 16 ++++++++++ arch/x86/mm/csv.c | 62 +++++++++++++++++++++++++----------- drivers/crypto/ccp/sev-dev.c | 4 +++ 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/csv.h b/arch/x86/include/asm/csv.h index 4bb2c837db96..cdaff971bcff 100644 --- a/arch/x86/include/asm/csv.h +++ b/arch/x86/include/asm/csv.h @@ -23,6 +23,11 @@ struct csv_mem { extern struct csv_mem *csv_smr; extern unsigned int csv_smr_num; +#ifdef CONFIG_SYSFS +extern atomic_long_t csv3_npt_size; +extern atomic_long_t csv3_pri_mem; +extern unsigned long csv3_meta; +#endif /* CONFIG_SYSFS */ void __init early_csv_reserve_mem(void); phys_addr_t csv_alloc_from_contiguous(size_t size, nodemask_t *nodes_allowed, diff --git a/arch/x86/kvm/svm/csv.c b/arch/x86/kvm/svm/csv.c index 0e115ceb9f49..12383937f0a1 100644 --- a/arch/x86/kvm/svm/csv.c +++ b/arch/x86/kvm/svm/csv.c @@ -104,6 +104,11 @@ struct kvm_csv_info { bool kvm_ext_valid; /* if @kvm_ext field is valid */ u32 kvm_ext; /* extensions supported by KVM */ u32 inuse_ext; /* extensions inused by current VM */ + +#ifdef CONFIG_SYSFS + unsigned long npt_size; + unsigned long pri_mem; +#endif /* CONFIG_SYSFS */ }; struct kvm_svm_csv { @@ -459,6 +464,12 @@ static int csv3_set_guest_private_memory(struct kvm *kvm, struct kvm_sev_cmd *ar list_splice(&tmp_list, &csv->smr_list); +#ifdef CONFIG_SYSFS + csv->npt_size = ALIGN(nr_pages * 9, 1UL << smr_entry_shift); + csv->pri_mem = ALIGN((nr_pages << PAGE_SHIFT), 1UL << smr_entry_shift); + atomic_long_add(csv->npt_size, &csv3_npt_size); + atomic_long_add(csv->pri_mem, &csv3_pri_mem); +#endif /* CONFIG_SYSFS */ goto done; e_free_smr: @@ -1685,6 +1696,11 @@ static void csv_vm_destroy(struct kvm *kvm) kfree(smr); } } + +#ifdef CONFIG_SYSFS + atomic_long_sub(csv->npt_size, &csv3_npt_size); + atomic_long_sub(csv->pri_mem, &csv3_pri_mem); +#endif /* CONFIG_SYSFS */ } } diff --git a/arch/x86/mm/csv.c b/arch/x86/mm/csv.c index e0df64e8ca84..b27d6303a34f 100644 --- a/arch/x86/mm/csv.c +++ b/arch/x86/mm/csv.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -88,6 +89,23 @@ EXPORT_SYMBOL_GPL(csv_smr); unsigned int csv_smr_num; EXPORT_SYMBOL_GPL(csv_smr_num); +#ifdef CONFIG_SYSFS +/* + * Global counters exposed via sysfs /sys. Updated atomically during VM creation/destruction. + * csv3_npt_size: total size of NPT tables allocated. + * csv3_pri_mem: total private memory allocated for CSV guests. + * csv3_meta: metadata overhead for CSV memory regions. + */ +atomic_long_t csv3_npt_size = ATOMIC_LONG_INIT(0); +EXPORT_SYMBOL_GPL(csv3_npt_size); + +atomic_long_t csv3_pri_mem = ATOMIC_LONG_INIT(0); +EXPORT_SYMBOL_GPL(csv3_pri_mem); + +unsigned long csv3_meta; +EXPORT_SYMBOL_GPL(csv3_meta); +#endif + struct csv_cma { int nid; int fast; @@ -97,7 +115,7 @@ struct csv_cma { struct cma_array { unsigned long count; unsigned int index; - atomic64_t csv_free_size; + atomic64_t csv_used_size; struct csv_cma csv_cma[]; }; @@ -170,7 +188,7 @@ void __init csv_cma_reserve_mem(void) array->count = 0; array->index = 0; - atomic64_set(&array->csv_free_size, 0); + atomic64_set(&array->csv_used_size, 0); csv_contiguous_pernuma_area[node] = array; for (i = 0; i < count; i++) { @@ -187,8 +205,6 @@ void __init csv_cma_reserve_mem(void) break; } - atomic64_add(CSV_CMA_SIZE, &array->csv_free_size); - if (start > cma_get_base(csv_cma->cma) || !start) start = cma_get_base(csv_cma->cma); @@ -343,7 +359,7 @@ phys_addr_t csv_alloc_from_contiguous(size_t size, nodemask_t *nodes_allowed, } success: - atomic64_sub(PAGE_ALIGN(size), &array->csv_free_size); + atomic64_add(PAGE_ALIGN(size), &array->csv_used_size); phys_addr = page_to_phys(page); clflush_cache_range(__va(phys_addr), size); @@ -366,7 +382,7 @@ void csv_release_to_contiguous(phys_addr_t pa, size_t size) csv_cma->fast = 1; cma_release(csv_cma->cma, page, PAGE_ALIGN(size) >> PAGE_SHIFT); array = csv_contiguous_pernuma_area[csv_cma->nid]; - atomic64_add(PAGE_ALIGN(size), &array->csv_free_size); + atomic64_sub(PAGE_ALIGN(size), &array->csv_used_size); } } } @@ -377,50 +393,60 @@ EXPORT_SYMBOL_GPL(csv_release_to_contiguous); /* * The "free_size" file where the free size of csv cma is read from. */ -static ssize_t free_size_show(struct kobject *kobj, +static ssize_t mem_info_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { int node; int offset = 0; - unsigned long free_size, total_free_size = 0; + unsigned long csv_used_size, total_used_size = 0; unsigned long csv_size, total_csv_size = 0; + unsigned long npt_size, pri_mem; struct cma_array *array = NULL; for_each_node_state(node, N_ONLINE) { array = csv_contiguous_pernuma_area[node]; if (array == NULL) { csv_size = 0; - free_size = 0; + csv_used_size = 0; offset += snprintf(buf + offset, PAGE_SIZE - offset, "Node%d:\n", node); offset += snprintf(buf + offset, PAGE_SIZE - offset, - " total: %8lu MiB\n", csv_size); + " total cma size:%12lu MiB\n", csv_size); offset += snprintf(buf + offset, PAGE_SIZE - offset, - " free: %8lu MiB\n", free_size); + " csv3 used:%17lu MiB\n", csv_used_size); continue; } - free_size = atomic64_read(&array->csv_free_size); + csv_used_size = atomic64_read(&array->csv_used_size); csv_size = array->count * CSV_CMA_SIZE; offset += snprintf(buf + offset, PAGE_SIZE - offset, "Node%d:\n", node); offset += snprintf(buf + offset, PAGE_SIZE - offset, - " total: %8lu MiB\n", csv_size >> 20); + " total cma size:%12lu MiB\n", csv_size >> 20); offset += snprintf(buf + offset, PAGE_SIZE - offset, - " free: %8lu MiB\n", free_size >> 20); - total_free_size += free_size; + " csv3 used:%17lu MiB\n", csv_used_size >> 20); + total_used_size += csv_used_size; total_csv_size += csv_size; } + npt_size = atomic_long_read(&csv3_npt_size) >> 20; + pri_mem = atomic_long_read(&csv3_pri_mem) >> 20; + offset += snprintf(buf + offset, PAGE_SIZE - offset, "All Nodes:\n"); offset += snprintf(buf + offset, PAGE_SIZE - offset, - " total: %8lu MiB\n", total_csv_size >> 20); + " total cma size:%12lu MiB\n", total_csv_size >> 20); + offset += snprintf(buf + offset, PAGE_SIZE - offset, + " csv3 used:%17lu MiB\n", total_used_size >> 20); + offset += snprintf(buf + offset, PAGE_SIZE - offset, + " npt table:%15lu MiB\n", npt_size); + offset += snprintf(buf + offset, PAGE_SIZE - offset, + " csv3 private memory:%5lu MiB\n", pri_mem); offset += snprintf(buf + offset, PAGE_SIZE - offset, - " free: %8lu MiB\n", total_free_size >> 20); + " meta data:%15lu MiB\n", csv3_meta >> 20); return offset; } -static struct kobj_attribute csv_cma_attr = __ATTR(free_size, 0444, free_size_show, NULL); +static struct kobj_attribute csv_cma_attr = __ATTR(mem_info, 0444, mem_info_show, NULL); /* * Create a group of attributes so that we can create and destroy them all diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e7ee77f1427b..893be9421ae8 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1785,6 +1785,10 @@ static int csv_platform_cmd_set_secure_memory_region(int *error) csv_release_to_contiguous(cmd_set_smcr->base_address, 1UL << CSV_MR_ALIGN_BITS); } +#ifdef CONFIG_SYSFS + else + csv3_meta += cmd_set_smcr->size; +#endif /* CONFIG_SYSFS */ e_free_cmd_set_smcr: kfree((void *)cmd_set_smcr); -- Gitee From 2616288147c3ad5f5f8f0445e198373affaca21f Mon Sep 17 00:00:00 2001 From: Cruz Zhao Date: Wed, 22 Oct 2025 10:20:45 +0800 Subject: [PATCH 6/9] anolis: sched: support to set absolute_expel to task and cgroup ANBZ: #26358 In certain scenarios, it is not necessary to set all highclass tasks as absolute_expeller, so we support setting absolute_expeller for both tasks and cgroups. - For tasks: Use the prctl syscall: prctl(PR_SET_IDENTITY, pid, ID_ABSOLUTE_EXPELLER, NULL, NULL) - For cgroups: echo 3 > /sys/fs/cgroup/$cgroup/cpu.bvt_warp_ns It should be noted that enabling ID_ABSOLUTE_EXPEL is not required when setting absolute_expeller for tasks or cgroups. If ID_ABSOLUTE_EXPEL is enabled, however, all highclass tasks will automatically be set as absolute_expeller. Signed-off-by: Cruz Zhao Reviewed-by: Tianchen Ding Link: https://gitee.com/anolis/cloud-kernel/pulls/5898 --- kernel/sched/fair.c | 60 ++++++++++++++++++++++++++++++++++++++------ kernel/sched/sched.h | 3 +++ 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c8369f32a4ed..e9548bfd2bd4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -624,6 +624,7 @@ enum { TYPE_NORMAL, TYPE_LS, TYPE_STRICT, + TYPE_MOST_STRICT, }; static DEFINE_MUTEX(identity_mutex); @@ -640,6 +641,18 @@ static inline bool is_expeller(struct sched_entity *se) return test_identity(se, ID_SMT_EXPELLER); } +static inline bool +is_absolute_expeller(struct sched_entity *se) +{ + return test_identity(se, ID_ABSOLUTE_EXPELLER); +} + +static inline bool +task_is_absolute_expeller(struct task_struct *p) +{ + return p && test_identity(&p->se, ID_ABSOLUTE_EXPELLER); +} + static inline bool is_idle_saver(struct sched_entity *se) { return test_identity(se, ID_IDLE_SAVER); @@ -683,6 +696,9 @@ static inline bool need_expel(int this_cpu, bool *expel_by_smt_sibling) if (sched_feat(ID_ABSOLUTE_EXPEL) && rq->nr_high_running) return true; + if (rq->nr_absolute_expeller) + return true; + return false; } @@ -1171,6 +1187,10 @@ id_update_make_up(struct task_group *tg, struct rq *rq, struct cfs_rq *cfs_rq, if (__is_underclass(se)) rq->nr_under_make_up += coefficient * cfs_rq->nr_tasks; + + if (is_absolute_expeller(se)) + rq->nr_absolute_expeller_make_up += + coefficient * cfs_rq->nr_tasks; } static __always_inline void @@ -1182,15 +1202,17 @@ id_commit_make_up(struct rq *rq, bool commit) if (commit) { rq->nr_high_running += rq->nr_high_make_up; rq->nr_under_running += rq->nr_under_make_up; + rq->nr_absolute_expeller += rq->nr_absolute_expeller_make_up; } rq->nr_high_make_up = 0; rq->nr_under_make_up = 0; + rq->nr_absolute_expeller_make_up = 0; } #endif static __always_inline void -id_update_nr_running(struct task_group *tg, struct rq *rq, long delta) +id_update_nr_running(struct task_group *tg, struct task_struct *p, struct rq *rq, long delta) { struct sched_entity *se; @@ -1207,6 +1229,17 @@ id_update_nr_running(struct task_group *tg, struct rq *rq, long delta) if (__is_underclass(se)) rq->nr_under_running += delta; + + if (is_absolute_expeller(se)) + rq->nr_absolute_expeller += delta; + + /* + * If a task was set absolute_expeller by prctl syscall, count for it separately, + * so that no errors will occur when both the task and the task group are set to + * absolute_expeller at the same time. + */ + if (task_is_absolute_expeller(p)) + rq->nr_absolute_expeller += delta > 0 ? 1 : -1; } static inline bool id_regard_as_idle(struct rq *rq) @@ -1523,7 +1556,7 @@ static void __update_identity(struct task_group *tg, int flags) __dequeue_entity(cfs_rq, se); hierarchy_update_nr_expel_immune(se, -ei_delta); if (!throttled) - id_update_nr_running(tg, rq, -delta); + id_update_nr_running(tg, NULL, rq, -delta); update_curr(cfs_rq); se->vruntime -= __id_min_vruntime(cfs_rq, se); @@ -1538,7 +1571,7 @@ static void __update_identity(struct task_group *tg, int flags) __enqueue_entity(cfs_rq, se); hierarchy_update_nr_expel_immune(se, ei_delta); if (!throttled) - id_update_nr_running(tg, rq, delta); + id_update_nr_running(tg, NULL, rq, delta); update_min_vruntime(cfs_rq); } @@ -1592,6 +1625,8 @@ static void __update_task_identity(struct task_struct *p, int flags) rq->nr_high_running--; if (__is_underclass(se)) rq->nr_under_running--; + if (task_is_absolute_expeller(p)) + rq->nr_absolute_expeller--; update_curr(cfs_rq); se->vruntime -= __id_min_vruntime(cfs_rq, se); @@ -1609,6 +1644,8 @@ static void __update_task_identity(struct task_struct *p, int flags) rq->nr_high_running++; if (__is_underclass(se)) rq->nr_under_running++; + if (task_is_absolute_expeller(p)) + rq->nr_absolute_expeller++; update_min_vruntime(cfs_rq); } @@ -1661,6 +1698,9 @@ int update_bvt_warp_ns(struct task_group *tg, s64 val) case TYPE_STRICT: flags = ID_HIGHCLASS | ID_IDLE_SEEKER | ID_SMT_EXPELLER; break; + case TYPE_MOST_STRICT: + flags = ID_HIGHCLASS | ID_IDLE_SEEKER | ID_SMT_EXPELLER | ID_ABSOLUTE_EXPELLER; + break; default: ret = -ERANGE; goto unlock; @@ -1695,8 +1735,10 @@ int update_identity(struct task_group *tg, struct task_struct *p, s64 val) /* * Tasks stuck in root group can update their id_flags. + * Tasks can be set and clear absolute_expeller. */ - if (p && !(p->flags & PF_NO_SETAFFINITY)) + if (p && !(p->flags & PF_NO_SETAFFINITY) && + val != ID_ABSOLUTE_EXPELLER && val != 0) return -EINVAL; mutex_lock(&identity_mutex); @@ -1987,11 +2029,13 @@ id_entity_before(struct sched_entity *a, struct sched_entity *b) { bool a_is_underclass = __is_underclass(a); bool b_is_underclass = __is_underclass(b); + struct rq *rq = rq_of((cfs_rq_of(a))); if (group_identity_disabled()) return entity_before(a, b); - if (sched_feat(ID_ABSOLUTE_EXPEL) && a_is_underclass != b_is_underclass) + if ((sched_feat(ID_ABSOLUTE_EXPEL) || rq->nr_absolute_expeller) && + a_is_underclass != b_is_underclass) return b_is_underclass; return (s64)(__id_vruntime(a) - __id_vruntime(b)) < 0; @@ -2475,7 +2519,7 @@ id_commit_make_up(struct rq *rq, bool commit) #endif static __always_inline void -id_update_nr_running(struct task_group *tg, struct rq *rq, long delta) +id_update_nr_running(struct task_group *tg, struct task_struct *p, struct rq *rq, long delta) { } @@ -8394,7 +8438,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) /* At this point se is NULL and we are at root level*/ add_nr_running(rq, 1); - id_update_nr_running(task_group(p), rq, 1); + id_update_nr_running(task_group(p), p, rq, 1); /* * Since new tasks are assigned an initial util_avg equal to @@ -8523,7 +8567,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) /* At this point se is NULL and we are at root level*/ sub_nr_running(rq, 1); - id_update_nr_running(task_group(p), rq, -1); + id_update_nr_running(task_group(p), p, rq, -1); /* balance early to pull high priority tasks */ if (unlikely(!was_sched_idle && sched_idle_rq(rq))) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 088213f8ba09..84e1ccef2c69 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -738,6 +738,7 @@ static inline bool group_identity_disabled(void); #define ID_SMT_EXPELLER 0x0004 #define ID_IDLE_SAVER 0x0008 #define ID_IDLE_SEEKER 0x0010 +#define ID_ABSOLUTE_EXPELLER 0x0020 #define IDENTITY_FLAGS_MASK 0x00ff /* @@ -1335,8 +1336,10 @@ struct rq { unsigned int nr_high_running; unsigned int nr_under_running; unsigned int nr_expel_immune; + unsigned int nr_absolute_expeller; long nr_high_make_up; long nr_under_make_up; + long nr_absolute_expeller_make_up; bool smt_expeller; bool smt_expellee; bool on_expel; -- Gitee From 359eda9a16b3c6afc4434baaec9b327cf8b8f2a2 Mon Sep 17 00:00:00 2001 From: Cruz Zhao Date: Mon, 20 Oct 2025 16:25:59 +0800 Subject: [PATCH 7/9] anolis: sched: fix expel_score() ANBZ: #26358 Expel_score should be the account of expellee tasks, which should be calculated using the following formula: rq->cfs.h_nr_running - rq->nr_expel_immune Fixes: 4901599bcf78 ("ck: sched: rescue the expellee on migration") Signed-off-by: Cruz Zhao Reviewed-by: Tianchen Ding Link: https://gitee.com/anolis/cloud-kernel/pulls/5891 --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e9548bfd2bd4..620f50defbe4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -863,7 +863,7 @@ static inline unsigned long expel_score(struct rq *rq) if (!sched_feat(ID_RESCUE_EXPELLEE)) return 0; - return rq->cfs.nr_running - rq->nr_expel_immune; + return rq->cfs.h_nr_running - rq->nr_expel_immune; } #else static inline bool expellee_only(struct rq *rq) -- Gitee From e1fa2c444b1a62c16e7b21f34104916e6bc68ee8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:34 -0700 Subject: [PATCH 8/9] net: use WARN_ON_ONCE() in sk_stream_kill_queues() ANBZ: #26358 commit c59f02f848672f92bcea90306240822239d68049 upstream. sk_stream_kill_queues() has three checks which have been useful to detect kernel bugs in the past. However they are potentially a problem because they could flood the syslog. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Xingrui Yi Reviewed-by: D. Wythe Link: https://gitee.com/anolis/cloud-kernel/pulls/5958 --- net/core/stream.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/stream.c b/net/core/stream.c index 422ee97e4f2b..faec050c5325 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -203,12 +203,12 @@ void sk_stream_kill_queues(struct sock *sk) skb_queue_purge(&sk->sk_error_queue); /* Next, the write queue. */ - WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); + WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue)); /* Account for returned memory. */ sk_mem_reclaim(sk); - WARN_ON(sk->sk_wmem_queued); + WARN_ON_ONCE(sk->sk_wmem_queued); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket -- Gitee From 65d9e1404814223f72c9e410f32bc73601962545 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2022 09:04:33 -0700 Subject: [PATCH 9/9] net: use WARN_ON_ONCE() in inet_sock_destruct() ANBZ: #26358 commit 3e7f2b8d30883f27ab1157bf3f23f30f1a07bf69 upstream. inet_sock_destruct() has four warnings which have been useful to point to kernel bugs in the past. However they are potentially a problem because they could flood the syslog. Signed-off-by: Eric Dumazet Signed-off-by: Jakub Kicinski Signed-off-by: Xingrui Yi Reviewed-by: D. Wythe Link: https://gitee.com/anolis/cloud-kernel/pulls/5958 --- net/ipv4/af_inet.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ce889306752b..70ca1c63b30c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -151,10 +151,10 @@ void inet_sock_destruct(struct sock *sk) return; } - WARN_ON(atomic_read(&sk->sk_rmem_alloc)); - WARN_ON(refcount_read(&sk->sk_wmem_alloc)); - WARN_ON(sk->sk_wmem_queued); - WARN_ON(sk->sk_forward_alloc); + WARN_ON_ONCE(atomic_read(&sk->sk_rmem_alloc)); + WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc)); + WARN_ON_ONCE(sk->sk_wmem_queued); + WARN_ON_ONCE(sk->sk_forward_alloc); kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); -- Gitee