From 8e8b9254ebeabab4409e535c1920e6780d9f9a47 Mon Sep 17 00:00:00 2001 From: hr567 Date: Fri, 14 Jun 2024 08:17:22 +0000 Subject: [PATCH 01/11] anolis: mem: add devmem.enable_readonly parameter ANBZ: #12923 Added a kernel parameter to turn off the ability to write to /dev/mem. Signed-off-by: hr567 Reviewed-by: zhongjiang-ali --- drivers/char/mem.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 94c2b556cf97..c2953f7924cc 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -94,6 +94,14 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } #endif +static bool devmem_readonly; +static int __init devmem_enable_readonly(char *val) +{ + devmem_readonly = true; + return 0; +} +early_param("devmem.enable_readonly", devmem_enable_readonly); + #ifndef unxlate_dev_mem_ptr #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr void __weak unxlate_dev_mem_ptr(phys_addr_t phys, void *addr) @@ -213,6 +221,9 @@ static ssize_t write_mem(struct file *file, const char __user *buf, if (!valid_phys_addr_range(p, count)) return -EFAULT; + if (devmem_readonly) + return -EPERM; + written = 0; #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED @@ -375,6 +386,11 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma) size_t size = vma->vm_end - vma->vm_start; phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + if (devmem_readonly) { + vma->vm_flags &= ~(VM_WRITE | VM_SHARED); + vma_set_page_prot(vma); + } + /* Does it even fit in phys_addr_t? */ if (offset >> PAGE_SHIFT != vma->vm_pgoff) return -EINVAL; -- Gitee From dacedd575cabf4a1148220cbbc22f56647ffdc37 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Sun, 24 Nov 2024 22:09:44 +0800 Subject: [PATCH 02/11] anolis: mm: skip set_zone_contiguous when online/offline pages ANBZ: #12923 set_zone_contiguous will take a long of time to scan the whole zone, and the time will increasing as the zone size increase. This is not unexpected for user to wait long time for hotplug memory. hence we remove the set_zone_contiguous in the hotplug/ unhotplug path, it will result in that compact will take more time in pageblock_pfn_to_page, but that is not the critical path. Signed-off-by: zhongjiang-ali Signed-off-by: hr567 Reviewed-by: Baolin Wang --- include/linux/memory_hotplug.h | 1 + mm/memory_hotplug.c | 4 ++++ mm/page_alloc.c | 5 +++++ 3 files changed, 10 insertions(+) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 657e0511ed4e..c180c10d0305 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -197,6 +197,7 @@ extern u64 max_mem_size; extern int memhp_online_type_from_str(const char *str); +extern bool skip_set_contiguous; /* Default online_type (MMOP_*) when new memory blocks are added. */ extern int memhp_default_online_type; /* If movable_node boot option specified */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9f2bc27d7f18..7e665909d301 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -118,6 +118,10 @@ MODULE_PARM_DESC(auto_movable_ratio, "Set the maximum ratio of MOVABLE:KERNEL memory in the system " "in percent for \"auto-movable\" online policy. Default: 301"); +bool skip_set_contiguous __read_mostly; +module_param(skip_set_contiguous, bool, 0644); +MODULE_PARM_DESC(skip_set_contiguous, "Do not set zone contiguous when online/offline pages"); + /* * memory_hotplug.auto_movable_numa_aware: consider numa node stats */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f4233ca7252e..7e4e017e4416 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1895,6 +1895,11 @@ void set_zone_contiguous(struct zone *zone) unsigned long block_start_pfn = zone->zone_start_pfn; unsigned long block_end_pfn; +#ifdef CONFIG_MEMORY_HOTPLUG + if (skip_set_contiguous) + return; +#endif + block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages); for (; block_start_pfn < zone_end_pfn(zone); block_start_pfn = block_end_pfn, -- Gitee From f81ed56e119ded01fa82ad773b0016876e82d056 Mon Sep 17 00:00:00 2001 From: hr567 Date: Wed, 24 Jul 2024 02:21:15 +0000 Subject: [PATCH 03/11] anolis: mm: downgrade memory hotplug logging to debug ANBZ: #12923 Optimize memory offline logging. Reduce the amount of log printing to reduce the pressure of log storage and analysis. Signed-off-by: hr567 --- mm/memory_hotplug.c | 2 +- mm/page_alloc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 7e665909d301..593106ca92dc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1883,7 +1883,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, /* Mark all sections offline and remove free pages from the buddy. */ __offline_isolated_pages(start_pfn, end_pfn); - pr_info("Offlined Pages %ld\n", nr_pages); + pr_debug("Offlined Pages %ld\n", nr_pages); /* * The memory sections are marked offline, and the pageblock flags diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7e4e017e4416..86e1fe5de0b5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -9325,7 +9325,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, 0)) { - pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n", + pr_debug_ratelimited("%s: [%lx, %lx) PFNs busy\n", __func__, outer_start, end); ret = -EBUSY; goto done; -- Gitee From 6d43eefd59e0f5dfdea10452bf2750dcb73dc2aa Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Tue, 31 Oct 2023 15:37:31 +0800 Subject: [PATCH 04/11] anolis: hookers: fix an warning when hooker was built-in ANBZ: #12923 hooker will fails to find the cr_pinning when hooker was built-in because it has defined as static. hence we need to fix it and keep the compatibility. Signed-off-by: zhongjiang-ali Acked-by: Zelin Deng Acked-by: Zeng Jiahao --- arch/x86/include/asm/tlbflush.h | 2 ++ arch/x86/kernel/cpu/common.c | 7 +++++++ net/hookers/hookers.c | 4 +--- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 8c87a2e0b660..0421a7a368a2 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -18,6 +18,8 @@ void __flush_tlb_all(void); #define TLB_FLUSH_ALL -1UL void cr4_update_irqsoff(unsigned long set, unsigned long clear); +bool cr_pinning_enabled(void); + unsigned long cr4_read_shadow(void); /* Set in this cpu's CR4. */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b58b5ba4a70f..8deb567c92a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -442,8 +442,15 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); + static unsigned long cr4_pinned_bits __ro_after_init; +bool cr_pinning_enabled(void) +{ + return static_key_enabled(&cr_pinning); +} +EXPORT_SYMBOL(cr_pinning_enabled); + void native_write_cr0(unsigned long val) { unsigned long bits_missing = 0; diff --git a/net/hookers/hookers.c b/net/hookers/hookers.c index dbdae64985dc..6e87d48c8c36 100644 --- a/net/hookers/hookers.c +++ b/net/hookers/hookers.c @@ -263,12 +263,10 @@ EXPORT_SYMBOL_GPL(hooker_uninstall); #if defined(CONFIG_X86) static inline unsigned int hookers_clear_cr0(void) { - struct static_key *orig_key; unsigned int cr0 = read_cr0(); unsigned long val = cr0 & 0xfffeffff; - orig_key = (struct static_key *)kallsyms_lookup_name("cr_pinning"); - if (!orig_key || !static_key_enabled(orig_key)) + if (!cr_pinning_enabled()) write_cr0(val); else asm volatile("mov %0,%%cr0" : "+r" (val) : : "memory"); -- Gitee From 46193aab30781acb843fc9cd0a2f17b56e682eb4 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Wed, 1 Nov 2023 10:45:16 +0800 Subject: [PATCH 05/11] anolis: make vtoa module can be built-in ANBZ: #12923 Database need install some module at boot stage to work for rund. It is easier to make them built in the kernel. Signed-off-by: zhongjiang-ali Acked-by: Xunlei Pang --- net/vtoa/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vtoa/Makefile b/net/vtoa/Makefile index 57d59fac3015..7da0d9920b17 100644 --- a/net/vtoa/Makefile +++ b/net/vtoa/Makefile @@ -1,2 +1,2 @@ -obj-m = vtoa.o +obj-$(CONFIG_VTOA) = vtoa.o vtoa-objs := vtoa_main.o vtoa_ctl.o -- Gitee From 35c5392ac60b1d81b75e98b234c91d1c85eef14a Mon Sep 17 00:00:00 2001 From: Fupan Li Date: Fri, 10 Mar 2023 15:34:46 +0800 Subject: [PATCH 06/11] anolis: oom: notify the victim's memcg's oom event when system OOM occur ANBZ: #12923 When a system OOM occur, and select a victim to kill, it's better to notify the victim's memcg's OOM event, thus, the container's cgroup OOM event watcher would be notified and mark the container's exit reason as "OOM". Signed-off-by: Fupan Li Signed-off-by: zhongjiang-ali Acked-by: Xu Yu --- include/linux/oom.h | 1 + mm/memcontrol.c | 3 +-- mm/oom_kill.c | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 4623f66ceb31..e52bc3749154 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -118,6 +118,7 @@ long oom_badness(struct task_struct *p, extern bool out_of_memory(struct oom_control *oc); extern void exit_oom_victim(void); +extern void mem_cgroup_oom_notify(struct mem_cgroup *memcg); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fd92b8ebfb1c..285b2f644847 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -187,7 +187,6 @@ struct mem_cgroup_event { }; static void mem_cgroup_threshold(struct mem_cgroup *memcg); -static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value); static int memory_min_show(struct seq_file *m, void *v); @@ -5683,7 +5682,7 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) return 0; } -static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) +void mem_cgroup_oom_notify(struct mem_cgroup *memcg) { struct mem_cgroup *iter; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 8de061ffaaf4..d63abf27fd22 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1021,6 +1021,9 @@ static void oom_kill_process(struct oom_control *oc, const char *message) struct mem_cgroup *oom_group; static DEFINE_RATELIMIT_STATE(oom_global_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); +#ifdef CONFIG_MEMCG + struct mem_cgroup *memcg; +#endif /* * If the task is already exiting, don't alarm the sysadmin or kill @@ -1042,6 +1045,16 @@ static void oom_kill_process(struct oom_control *oc, const char *message) else if (!is_memcg_oom(oc) && __ratelimit(&oom_global_rs)) dump_global_header(oc, victim); +#ifdef CONFIG_MEMCG + rcu_read_lock(); + memcg = mem_cgroup_from_task(victim); + if (memcg != NULL && memcg != root_mem_cgroup && !is_memcg_oom(oc)) { + css_get(&memcg->css); + mem_cgroup_oom_notify(memcg); + css_put(&memcg->css); + } + rcu_read_unlock(); +#endif /* * Do we need to kill the entire memory cgroup? * Or even one of the ancestor memory cgroups? -- Gitee From 63e46e99a9cc89df15c6f02c5ff0d256300ceb42 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Tue, 9 Apr 2024 20:25:42 +0800 Subject: [PATCH 07/11] anolis: virtio-mem: make memmap on movable memory can be offlined ANBZ: #12923 online memory will allocate memory for struct page from normal/dma32 zone by virtio-mem. it will trigger oom when online memory will take up a lot of kernel memory. It will fails to offline the memmap memory in movable node when memmap_on_memory enable, which will result in memory waste. The patch will finish the function to offline the memmap when the other memory of memory memblock has been offlined, the memmap also will offlined. Signed-off-by: hr567 Signed-off-by: zhongjiang-ali --- arch/x86/mm/init_64.c | 7 +++ drivers/base/memory.c | 15 ++++++ drivers/virtio/virtio_mem.c | 105 +++++++++++++++++++++++++++++------- include/linux/memory.h | 1 + include/linux/mm.h | 46 +++++++++++++++- mm/hugetlb_vmemmap.c | 2 +- mm/memory_hotplug.c | 5 +- mm/sparse-vmemmap.c | 55 ++++++++++++++----- mm/sparse.c | 3 +- 9 files changed, 203 insertions(+), 36 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6a432e596159..d765d199de1e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1215,6 +1215,13 @@ void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + /* + * altmap range has been offline succeeded and altmap vmemmap + * has replaced with allocated memory from buddy. + */ + if (altmap && !altmap->alloc) + altmap = NULL; + __remove_pages(start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 67eef614d792..a82db349a3a8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -593,6 +593,21 @@ struct memory_block *find_memory_block(struct mem_section *section) return find_memory_block_by_id(block_id); } +unsigned long get_memory_block_vmemmap_pages(unsigned long block_id) +{ + struct memory_block *mem = find_memory_block_by_id(block_id); + unsigned long nr_vmemmap_size; + + if (!mem || !mem->nr_vmemmap_pages) + return 0; + + nr_vmemmap_size = mem->nr_vmemmap_pages << PAGE_SHIFT; + put_device(&mem->dev); + + return nr_vmemmap_size; +} +EXPORT_SYMBOL(get_memory_block_vmemmap_pages); + static struct attribute *memory_memblk_attrs[] = { &dev_attr_phys_index.attr, &dev_attr_state.attr, diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index e0b0633c5c81..f14f3dad0a21 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -613,6 +613,7 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, uint64_t size) { int rc; + mhp_t mhp_flags = MHP_MERGE_RESOURCE | MHP_NID_IS_MGID; /* * When force-unloading the driver and we still have memory added to @@ -629,8 +630,10 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, addr + size - 1); /* Memory might get onlined immediately. */ atomic64_add(size, &vm->offline_size); - rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, - MHP_MERGE_RESOURCE | MHP_NID_IS_MGID); + /* only support memmap_on_memory on sbm scenario */ + if (vm->in_sbm) + mhp_flags |= MHP_MEMMAP_ON_MEMORY; + rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, mhp_flags); if (rc) { atomic64_sub(size, &vm->offline_size); dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); @@ -870,13 +873,13 @@ static void virtio_mem_sbm_notify_online(struct virtio_mem *vm, } static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, - unsigned long mb_id) + unsigned long mb_id, unsigned long nr_sb) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; int sb_id; - for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { + for (sb_id = nr_sb; sb_id < vm->sbm.sbs_per_mb; sb_id++) { if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + @@ -942,19 +945,21 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, const unsigned long size = PFN_PHYS(mhp->nr_pages); int rc = NOTIFY_OK; unsigned long id; + unsigned long nr_vmemmap_size = 0; if (!virtio_mem_overlaps_range(vm, start, size)) return NOTIFY_DONE; if (vm->in_sbm) { id = virtio_mem_phys_to_mb_id(start); + nr_vmemmap_size = get_memory_block_vmemmap_pages(id); + /* - * In SBM, we add memory in separate memory blocks - we expect - * it to be onlined/offlined in the same granularity. Bail out - * if this ever changes. + * In SBM, we add memory in separate memory blocks, but vmemmap page + * can be added to the start of memory block, we still expect to + * online/offline the whole memory blocks in that case. */ - if (WARN_ON_ONCE(size != memory_block_size_bytes() || - !IS_ALIGNED(start, memory_block_size_bytes()))) + if (WARN_ON_ONCE(!IS_ALIGNED(start, vm->sbm.sb_size))) return NOTIFY_BAD; } else { id = virtio_mem_phys_to_bb_id(vm, start); @@ -986,7 +991,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, } vm->hotplug_active = true; if (vm->in_sbm) - virtio_mem_sbm_notify_going_offline(vm, id); + virtio_mem_sbm_notify_going_offline(vm, id, + nr_vmemmap_size / vm->sbm.sb_size); else virtio_mem_bbm_notify_going_offline(vm, id, mhp->start_pfn, @@ -1007,7 +1013,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, if (vm->in_sbm) virtio_mem_sbm_notify_offline(vm, id); - atomic64_add(size, &vm->offline_size); + atomic64_add(size + nr_vmemmap_size, &vm->offline_size); /* * Trigger the workqueue. Now that we have some offline memory, * maybe we can handle pending unplug requests. @@ -1022,7 +1028,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, if (vm->in_sbm) virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn); - atomic64_sub(size, &vm->offline_size); + atomic64_sub(size + nr_vmemmap_size, &vm->offline_size); /* * Start adding more memory once we onlined half of our * threshold. Don't trigger if it's possibly due to our actipn @@ -1141,12 +1147,20 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) * Try to allocate a range, marking pages fake-offline, effectively * fake-offlining them. */ -static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) +static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages, bool map) { const bool is_movable = page_zonenum(pfn_to_page(pfn)) == ZONE_MOVABLE; int rc, retry_count; + /* + * map means the subblock represent the struct page of movable zone. + * the range of vmemmap pages will remap to new page to keep the + * page information for offline_pages. + */ + if (is_movable && map) + return 0; + /* * TODO: We want an alloc_contig_range() mode that tries to allocate * harder (e.g., dealing with temporarily pinned pages, PCP), especially @@ -1892,22 +1906,46 @@ static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, * * Will modify the state of the memory block. */ +static void virtio_mem_init_section(unsigned long start_pfn, unsigned long nr_pages) +{ + struct mem_section *ms; + unsigned long section_nr = pfn_to_section_nr(start_pfn); + struct page *memmap = pfn_to_page(start_pfn); + + ms = __nr_to_section(section_nr); + ms->section_mem_map = sparse_encode_mem_map(memmap, section_nr) + | SECTION_MARKED_PRESENT | SECTION_IS_ONLINE | SECTION_HAS_MEM_MAP; +} + static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, unsigned long mb_id, int sb_id, - int count) + int count, bool map) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); unsigned long start_pfn; + unsigned long block_addr = virtio_mem_mb_id_to_phys(mb_id); + unsigned long block_nr_pages = PFN_DOWN(memory_block_size_bytes()); + unsigned long block_start_pfn = PFN_DOWN(block_addr); + unsigned long block_start = (unsigned long)pfn_to_page(block_start_pfn); + unsigned long block_end = block_start + block_nr_pages * sizeof(struct page); + unsigned long altmap_pfn; int rc; + LIST_HEAD(vmemmap_pages); - start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->sbm.sb_size); + start_pfn = PFN_DOWN(block_addr + sb_id * vm->sbm.sb_size); - rc = virtio_mem_fake_offline(start_pfn, nr_pages); + rc = virtio_mem_fake_offline(start_pfn, nr_pages, map); if (rc) return rc; + if (map) { + /* Make sure that memblock has rebuild the page mapping */ + if (vmemmap_remap_alloc(block_start, block_end, + VIRTIO_MEMMAP_COPY, GFP_KERNEL, &vmemmap_pages)) + return -ENOMEM; + } + /* Try to unplug the allocated memory */ rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); if (rc) { @@ -1916,6 +1954,17 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, return rc; } + if (map) { + /* Make sure that memblock has rebuild the page mapping */ + if (vmemmap_remap_alloc(block_start, block_end, + VIRTIO_MEMMAP_RESTORE, GFP_KERNEL, &vmemmap_pages)) + return -ENOMEM; + + for (altmap_pfn = block_start_pfn; altmap_pfn < block_start_pfn + nr_pages; + altmap_pfn += PAGES_PER_SECTION) + virtio_mem_init_section(altmap_pfn, PAGES_PER_SECTION); + } + switch (old_state) { case VIRTIO_MEM_SBM_MB_KERNEL: virtio_mem_sbm_set_mb_state(vm, mb_id, @@ -1945,12 +1994,16 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, uint64_t *nb_sb) { int rc, sb_id; + bool map = false; + unsigned long nr_vmemmap_size = get_memory_block_vmemmap_pages(mb_id); + unsigned long nr_vmemmap_sbs = nr_vmemmap_size / vm->sbm.sb_size; + int count_vmemmap = 0; /* If possible, try to unplug the complete block in one shot. */ if (*nb_sb >= vm->sbm.sbs_per_mb && virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0, - vm->sbm.sbs_per_mb); + vm->sbm.sbs_per_mb, map); if (!rc) { *nb_sb -= vm->sbm.sbs_per_mb; goto unplugged; @@ -1967,7 +2020,15 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, if (sb_id < 0) break; - rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1); + if (nr_vmemmap_size && sb_id < nr_vmemmap_sbs && + virtio_mem_sbm_test_sb_unplugged(vm, mb_id, nr_vmemmap_sbs, + vm->sbm.sbs_per_mb - nr_vmemmap_sbs)) { + map = true; + count_vmemmap++; + continue; + } + + rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1, map); if (rc == -EBUSY) continue; else if (rc) @@ -1975,6 +2036,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, *nb_sb -= 1; } + /* unplug the vmemmap of the whole memblock if it exists. */ + if (map) + virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id + 1, count_vmemmap, map); + unplugged: /* * Once all subblocks of a memory block were unplugged, offline and @@ -2114,7 +2179,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, if (!page) continue; - rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION, false); if (rc) { end_pfn = pfn; goto rollback_safe_unplug; diff --git a/include/linux/memory.h b/include/linux/memory.h index cbcc43ad2b97..4ddc3b960ae9 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -136,6 +136,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size); extern void memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); extern struct memory_block *find_memory_block(struct mem_section *); +extern unsigned long get_memory_block_vmemmap_pages(unsigned long block_id); typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); extern int walk_memory_blocks(unsigned long start, unsigned long size, void *arg, walk_memory_blocks_func_t func); diff --git a/include/linux/mm.h b/include/linux/mm.h index e09f994c61b0..5531b4543dbc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1789,6 +1789,43 @@ extern int mlock_fixup(struct vm_area_struct *vma, */ #define ZAP_ZEROPAGE 0x01 +/* + * virtio use the following special value to remapping memmap. + */ +#define VIRTIO_MEMMAP_RESTORE (-1UL) +#define VIRTIO_MEMMAP_COPY (-2UL) +#ifdef CONFIG_VIRTIO_MEM +static inline bool virtio_is_use_memmap(unsigned long addr) +{ + return addr == VIRTIO_MEMMAP_RESTORE || addr == VIRTIO_MEMMAP_COPY; +} + +static inline bool virtio_memmap_restore(unsigned long addr) +{ + return addr == VIRTIO_MEMMAP_RESTORE; +} + +static inline bool virtio_memmap_copy(unsigned long addr) +{ + return addr == VIRTIO_MEMMAP_COPY; +} +#else +static inline bool virtio_is_use_memmap(unsigned long addr) +{ + return false; +} + +static inline bool virtio_memmap_restore(unsigned long addr) +{ + return false; +} + +static inline bool virtio_memmap_copy(unsigned long addr) +{ + return false; +} +#endif + /* * Parameter block passed down to zap_pte_range in exceptional cases. */ @@ -3207,12 +3244,19 @@ static inline void print_vma_addr(char *prefix, unsigned long rip) int vmemmap_remap_free(unsigned long start, unsigned long end, unsigned long reuse); int vmemmap_remap_alloc(unsigned long start, unsigned long end, - unsigned long reuse, gfp_t gfp_mask); + unsigned long reuse, gfp_t gfp_mask, struct list_head *pages); +#else +static inline int vmemmap_remap_alloc(unsigned long start, unsigned long end, unsigned long reuse, + gfp_t gfp_mask, struct list_head *pages) +{ + return -EINVAL; +} #endif void *sparse_buffer_alloc(unsigned long size); struct page * __populate_section_memmap(unsigned long pfn, unsigned long nr_pages, int nid, struct vmem_altmap *altmap); +unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum); void pmd_init(void *addr); void pud_init(void *addr); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 9167fcf8ed63..caf8a1bef280 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -254,7 +254,7 @@ int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) * discarded vmemmap pages must be allocated and remapping. */ ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse, - GFP_KERNEL | __GFP_NORETRY | __GFP_NOKFENCE); + GFP_KERNEL | __GFP_NORETRY | __GFP_NOKFENCE, NULL); if (!ret) { ClearHPageVmemmapOptimized(head); static_branch_dec(&hugetlb_optimize_vmemmap_key); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 593106ca92dc..c5dc6843ba2c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1498,6 +1498,7 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn, struct zone *zone = NULL; struct page *page; int i; + for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1); pfn < end_pfn; pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) { @@ -2171,8 +2172,10 @@ static int try_offline_memory_block(struct memory_block *mem, void *arg) * Default is MMOP_OFFLINE - change it only if offlining succeeded, * so try_reonline_memory_block() can do the right thing. */ - if (!rc) + if (!rc) { **online_types = online_type; + mem->nr_vmemmap_pages = 0; + } (*online_types)++; /* Ignore if already offline. */ diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index adfddcc93a37..8ea9e10f8026 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -59,8 +59,8 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) pmd_t __pmd; int i; unsigned long addr = start; - struct page *page = pmd_page(*pmd); pte_t *pgtable = pte_alloc_one_kernel(&init_mm); + unsigned long pfn = pmd_pfn(*pmd); if (!pgtable) return -ENOMEM; @@ -71,7 +71,7 @@ static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) pte_t entry, *pte; pgprot_t pgprot = PAGE_KERNEL; - entry = mk_pte(page + i, pgprot); + entry = pfn_pte(pfn + i, pgprot); pte = pte_offset_kernel(&__pmd, addr); set_pte_at(&init_mm, addr, pte, entry); } @@ -114,7 +114,7 @@ static void vmemmap_pte_range(pmd_t *pmd, unsigned long addr, * The reuse_page is found 'first' in table walk before we start * remapping (which is calling @walk->remap_pte). */ - if (!walk->reuse_page) { + if (!virtio_is_use_memmap(walk->reuse_addr) && !walk->reuse_page) { walk->reuse_page = pte_page(*pte); /* * Because the reuse address is part of the range that we are @@ -288,22 +288,44 @@ static void vmemmap_restore_pte(pte_t *pte, unsigned long addr, { pgprot_t pgprot = PAGE_KERNEL; struct page *page; - void *to; + void *to, *src; + unsigned long pfn; + pte_t entry; - BUG_ON(pte_page(*pte) != walk->reuse_page); + if (!virtio_is_use_memmap(walk->reuse_addr)) + BUG_ON(pte_page(*pte) != walk->reuse_page); page = list_first_entry(walk->vmemmap_pages, struct page, lru); list_del(&page->lru); to = page_to_virt(page); - copy_page(to, (void *)walk->reuse_addr); - reset_struct_pages(to); + if (!virtio_is_use_memmap(walk->reuse_addr)) { + copy_page(to, (void *)walk->reuse_addr); + reset_struct_pages(to); + entry = mk_pte(page, pgprot); + } else { + if (virtio_memmap_restore(walk->reuse_addr)) { + pfn = __pa(to) >> PAGE_SHIFT; + entry = pfn_pte(pfn, pgprot); + } else { + if (virtio_memmap_copy(walk->reuse_addr)) { + pfn = pte_pfn(*pte); + src = __va(__pfn_to_phys(pfn)); + + copy_page(to, src); + list_add_tail(&page->lru, walk->vmemmap_pages); + return; + } + /* trigger an warnning when walk in here now */ + WARN_ON(1); + } + } /* * Makes sure that preceding stores to the page contents become visible * before the set_pte_at() write. */ smp_wmb(); - set_pte_at(&init_mm, addr, pte, mk_pte(page, pgprot)); + set_pte_at(&init_mm, addr, pte, entry); } /** @@ -404,7 +426,7 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, * Return: %0 on success, negative error code otherwise. */ int vmemmap_remap_alloc(unsigned long start, unsigned long end, - unsigned long reuse, gfp_t gfp_mask) + unsigned long reuse, gfp_t gfp_mask, struct list_head *altmap_pages) { LIST_HEAD(vmemmap_pages); struct vmemmap_remap_walk walk = { @@ -414,17 +436,26 @@ int vmemmap_remap_alloc(unsigned long start, unsigned long end, }; /* See the comment in the vmemmap_remap_free(). */ - BUG_ON(start - reuse != PAGE_SIZE); + if (!virtio_is_use_memmap(reuse)) + BUG_ON(start - reuse != PAGE_SIZE); + + if (virtio_is_use_memmap(reuse)) + walk.vmemmap_pages = altmap_pages; - if (alloc_vmemmap_page_list(start, end, gfp_mask, &vmemmap_pages)) + if (list_empty(walk.vmemmap_pages) && alloc_vmemmap_page_list(start, + end, gfp_mask, walk.vmemmap_pages)) return -ENOMEM; mmap_read_lock(&init_mm); - vmemmap_remap_range(reuse, end, &walk); + if (virtio_is_use_memmap(reuse)) + vmemmap_remap_range(start, end, &walk); + else + vmemmap_remap_range(reuse, end, &walk); mmap_read_unlock(&init_mm); return 0; } +EXPORT_SYMBOL(vmemmap_remap_alloc); #endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ /* diff --git a/mm/sparse.c b/mm/sparse.c index 1fa994760f4c..fc13230c1921 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -304,7 +304,7 @@ static void __init memblocks_present(void) * the identity pfn - section_mem_map will return the actual * physical page frame number. */ -static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) +unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) { unsigned long coded_mem_map = (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); @@ -312,6 +312,7 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p BUG_ON(coded_mem_map & ~SECTION_MAP_MASK); return coded_mem_map; } +EXPORT_SYMBOL(sparse_encode_mem_map); #ifdef CONFIG_MEMORY_HOTPLUG /* -- Gitee From b498a9e801bf25909f1ba26898c69dce37cda055 Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Thu, 9 May 2024 16:37:51 +0800 Subject: [PATCH 08/11] anolis: virtio-mem: restrict offline memmap on movable in x86_64 ANBZ: #12923 Currently, remapping vmemmap depends on hugepage vmemmap optimization. because it reuse the some logical, but it disable the feautre in arm64, hence we also need to restrict it in x86_64. Meanwhile, the patch also restrict the supported memmap on movable will not impact the original local. Signed-off-by: zhongjiang-ali Signed-off-by: hr567 --- drivers/dax/kmem.c | 2 +- drivers/virtio/virtio_mem.c | 11 +++++++---- drivers/xen/balloon.c | 2 +- include/linux/memory_hotplug.h | 4 ++-- mm/memory_hotplug.c | 14 +++++++++----- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index e8532ab519f5..4bb1a077e54a 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -130,7 +130,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) * this as RAM automatically. */ rc = add_memory_driver_managed(data->mgid, range.start, - range_len(&range), kmem_name, MHP_NID_IS_MGID); + range_len(&range), kmem_name, MHP_NID_IS_MGID, 0); if (rc) { dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f14f3dad0a21..ef1be18bfb28 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -613,7 +613,7 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, uint64_t size) { int rc; - mhp_t mhp_flags = MHP_MERGE_RESOURCE | MHP_NID_IS_MGID; + mhp_t extra_flags = 0; /* * When force-unloading the driver and we still have memory added to @@ -630,10 +630,13 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, addr + size - 1); /* Memory might get onlined immediately. */ atomic64_add(size, &vm->offline_size); +#ifdef CONFIG_X86_64 /* only support memmap_on_memory on sbm scenario */ if (vm->in_sbm) - mhp_flags |= MHP_MEMMAP_ON_MEMORY; - rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, mhp_flags); + extra_flags |= MHP_MEMMAP_ON_MEMORY; +#endif + rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, + MHP_MERGE_RESOURCE | MHP_NID_IS_MGID, extra_flags); if (rc) { atomic64_sub(size, &vm->offline_size); dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); @@ -1940,7 +1943,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, return rc; if (map) { - /* Make sure that memblock has rebuild the page mapping */ + /* Make sure that memblock will record the page mapping */ if (vmemmap_remap_alloc(block_start, block_end, VIRTIO_MEMMAP_COPY, GFP_KERNEL, &vmemmap_pages)) return -ENOMEM; diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 8db96b5e7253..c79072df4f87 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -336,7 +336,7 @@ static enum bp_state reserve_additional_memory(void) mutex_unlock(&balloon_mutex); /* add_memory_resource() requires the device_hotplug lock */ lock_device_hotplug(); - rc = add_memory_resource(nid, resource, MHP_MERGE_RESOURCE); + rc = add_memory_resource(nid, resource, MHP_MERGE_RESOURCE, 0); unlock_device_hotplug(); mutex_lock(&balloon_mutex); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index c180c10d0305..874abfbf215f 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -332,10 +332,10 @@ extern void __ref free_area_init_core_hotplug(struct pglist_data *pgdat); extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags); extern int add_memory_resource(int nid, struct resource *resource, - mhp_t mhp_flags); + mhp_t mhp_flags, mhp_t extra_flags); extern int add_memory_driver_managed(int nid, u64 start, u64 size, const char *resource_name, - mhp_t mhp_flags); + mhp_t mhp_flags, mhp_t extra_flags); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap, int migratetype); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c5dc6843ba2c..990d14e1be4b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1303,7 +1303,8 @@ bool mhp_supports_memmap_on_memory(unsigned long size) * * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) +int __ref add_memory_resource(int nid, struct resource *res, + mhp_t mhp_flags, mhp_t extra_flags) { struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; struct vmem_altmap mhp_altmap = {}; @@ -1344,7 +1345,9 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) /* * Self hosted memmap array */ - if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { + if ((mhp_flags & MHP_MEMMAP_ON_MEMORY) || + ((extra_flags & MHP_MEMMAP_ON_MEMORY) && + mhp_memmap_on_memory())) { if (!mhp_supports_memmap_on_memory(size)) { ret = -EINVAL; goto error; @@ -1418,7 +1421,7 @@ int __ref __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags) if (IS_ERR(res)) return PTR_ERR(res); - ret = add_memory_resource(nid, res, mhp_flags); + ret = add_memory_resource(nid, res, mhp_flags, 0); if (ret < 0) release_memory_resource(res); return ret; @@ -1458,7 +1461,8 @@ EXPORT_SYMBOL_GPL(add_memory); * "System RAM ($DRIVER)". */ int add_memory_driver_managed(int nid, u64 start, u64 size, - const char *resource_name, mhp_t mhp_flags) + const char *resource_name, + mhp_t mhp_flags, mhp_t extra_flags) { struct resource *res; int rc; @@ -1476,7 +1480,7 @@ int add_memory_driver_managed(int nid, u64 start, u64 size, goto out_unlock; } - rc = add_memory_resource(nid, res, mhp_flags); + rc = add_memory_resource(nid, res, mhp_flags, extra_flags); if (rc < 0) release_memory_resource(res); -- Gitee From 5dcb911f1d73e8cf20ca24f0736e607453145c2b Mon Sep 17 00:00:00 2001 From: zhongjiang-ali Date: Thu, 20 Jun 2024 14:34:31 +0800 Subject: [PATCH 09/11] anolis: virtio-mem: Make struct page can be offlined in normal zone ANBZ: #12923 memmap_on_memory will allocate struct page in the front of the onlined memory block, but we failed to offline it when unplug the memory. hence we will also unplug the memory when the rest of memory block is offlined. therefore, we failed to decrease the total request size when unplug the struct page, it will result in the more memory will be offlined. Signed-off-by: zhongjiang-ali --- drivers/virtio/virtio_mem.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index ef1be18bfb28..e0ae99cf39fb 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -892,13 +892,13 @@ static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, } static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, - unsigned long mb_id) + unsigned long mb_id, unsigned long nr_sb) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; int sb_id; - for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { + for (sb_id = nr_sb; sb_id < vm->sbm.sbs_per_mb; sb_id++) { if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + @@ -1049,7 +1049,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, if (!vm->hotplug_active) break; if (vm->in_sbm) - virtio_mem_sbm_notify_cancel_offline(vm, id); + virtio_mem_sbm_notify_cancel_offline(vm, id, + nr_vmemmap_size / vm->sbm.sb_size); else virtio_mem_bbm_notify_cancel_offline(vm, id, mhp->start_pfn, @@ -1152,8 +1153,6 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) */ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages, bool map) { - const bool is_movable = page_zonenum(pfn_to_page(pfn)) == - ZONE_MOVABLE; int rc, retry_count; /* @@ -1161,7 +1160,7 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages, bo * the range of vmemmap pages will remap to new page to keep the * page information for offline_pages. */ - if (is_movable && map) + if (map) return 0; /* @@ -1177,7 +1176,7 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages, bo if (rc == -ENOMEM) /* whoops, out of memory */ return rc; - else if (rc && !is_movable) + else if (rc && page_zonenum(pfn_to_page(pfn)) != ZONE_MOVABLE) break; else if (rc) continue; @@ -2040,8 +2039,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, } /* unplug the vmemmap of the whole memblock if it exists. */ - if (map) + if (map) { virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id + 1, count_vmemmap, map); + *nb_sb -= count_vmemmap; + } unplugged: /* -- Gitee From b4718c55136813da6addff0219c260451bbb17e3 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Fri, 22 Jan 2021 17:21:46 +0800 Subject: [PATCH 10/11] virtio-blk: support per-device queue depth ANBZ: #12923 commit d1e9aa9c34a776d43de819d3d334833aff5cca5a upstream. module parameter 'virtblk_queue_depth' was firstly introduced for testing/benchmarking purposes described in commit fc4324b4597c ("virtio-blk: base queue-depth on virtqueue ringsize or module param"). And currently 'virtblk_queue_depth' is used as a saved value for the first probed device. Since we have different virtio-blk devices which have different capabilities, it requires that we support per-device queue depth instead of per-module. So defaultly use vq free elements if module parameter 'virtblk_queue_depth' is not set. Signed-off-by: Joseph Qi Acked-by: Jason Wang Link: https://lore.kernel.org/r/1611307306-71067-1-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Signed-off-by: Yifei Zhang --- drivers/block/virtio_blk.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index d2a7da2831d9..00f6b6301ffb 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -856,6 +856,7 @@ static int virtblk_probe(struct virtio_device *vdev) u32 v, blk_size, max_size, sg_elems, opt_io_size; u16 min_io_size; u8 physical_block_exp, alignment_offset; + unsigned int queue_depth; if (!vdev->config->get) { dev_err(&vdev->dev, "%s failure: config access disabled\n", @@ -907,16 +908,18 @@ static int virtblk_probe(struct virtio_device *vdev) } /* Default queue sizing is to fill the ring. */ - if (!virtblk_queue_depth) { - virtblk_queue_depth = vblk->vqs[0].vq->num_free; + if (likely(!virtblk_queue_depth)) { + queue_depth = vblk->vqs[0].vq->num_free; /* ... but without indirect descs, we use 2 descs per req */ if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) - virtblk_queue_depth /= 2; + queue_depth /= 2; + } else { + queue_depth = virtblk_queue_depth; } memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); vblk->tag_set.ops = &virtio_mq_ops; - vblk->tag_set.queue_depth = virtblk_queue_depth; + vblk->tag_set.queue_depth = queue_depth; vblk->tag_set.numa_node = NUMA_NO_NODE; vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; vblk->tag_set.cmd_size = -- Gitee From 914d78bd56b7cb4ed64f8aa25137957bf1341a56 Mon Sep 17 00:00:00 2001 From: Simon Guo Date: Tue, 10 Sep 2024 16:53:16 +0800 Subject: [PATCH 11/11] anolis: mm: hugetext: add macro definition for hugetext flags ANBZ: #12923 This patch adds hugetext flags definition. There is no functionality change. Signed-off-by: Simon Guo --- include/linux/huge_mm.h | 12 ++++++++++++ mm/huge_memory.c | 26 +++++++++++++------------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 72c8cf8662f5..9d637b1cabe5 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -97,6 +97,18 @@ enum transparent_hugepage_flag { #endif }; +#ifdef CONFIG_HUGETEXT +#define BIT_SHIFT_THP_TEXT_FILE 0 +#define BIT_SHIFT_THP_TEXT_ANON 1 +#define BIT_SHIFT_THP_TEXT_FILE_DIRECT 2 + +#define BIT_MASK_THP_TEXT_FILE (1 << BIT_SHIFT_THP_TEXT_FILE) +#define BIT_MASK_THP_TEXT_ANON (1 << BIT_SHIFT_THP_TEXT_ANON) +#define BIT_MASK_THP_TEXT_FILE_DIRECT (1 << BIT_SHIFT_THP_TEXT_FILE_DIRECT) +#define BIT_MASK_THP_TEXT_ALL (BIT_MASK_THP_TEXT_FILE | \ + BIT_MASK_THP_TEXT_ANON | BIT_MASK_THP_TEXT_FILE_DIRECT) +#endif + struct kobject; struct kobj_attribute; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8111b975b751..c1ea2c7deaaf 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -342,11 +342,11 @@ static ssize_t hugetext_enabled_show(struct kobject *kobj, int val = 0; if (test_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_ENABLED_FLAG, &transparent_hugepage_flags)) - val |= 0x01; + val |= BIT_MASK_THP_TEXT_FILE; if (test_bit(TRANSPARENT_HUGEPAGE_ANON_TEXT_ENABLED_FLAG, &transparent_hugepage_flags)) - val |= 0x02; + val |= BIT_MASK_THP_TEXT_ANON; if (test_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_DIRECT_FLAG, &transparent_hugepage_flags)) - val |= 0x04; + val |= BIT_MASK_THP_TEXT_FILE_DIRECT; return sprintf(buf, "%d\n", val); } @@ -361,29 +361,29 @@ static ssize_t hugetext_enabled_store(struct kobject *kobj, return -EINVAL; ret = kstrtoul(buf, 0, &val); - if (ret < 0 || val > 7) + if (ret < 0 || val > BIT_MASK_THP_TEXT_ALL) return -EINVAL; /* FILE_TEXT_DIRECT depends on FILE_TEXT_ENABLED */ - if ((val & 0x4) && !(val & 0x1)) + if ((val & BIT_MASK_THP_TEXT_FILE_DIRECT) && !(val & BIT_MASK_THP_TEXT_FILE)) return -EINVAL; ret = count; - if (val & 0x01) + if (val & BIT_MASK_THP_TEXT_FILE) set_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); else clear_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); - if (val & 0x02) + if (val & BIT_MASK_THP_TEXT_ANON) set_bit(TRANSPARENT_HUGEPAGE_ANON_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); else clear_bit(TRANSPARENT_HUGEPAGE_ANON_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); - if (val & 0x04) + if (val & BIT_MASK_THP_TEXT_FILE_DIRECT) set_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_DIRECT_FLAG, &transparent_hugepage_flags); else @@ -753,30 +753,30 @@ static int __init setup_hugetext(char *str) goto out; err = kstrtoul(str, 0, &val); - if (err < 0 || val > 7) + if (err < 0 || val > BIT_MASK_THP_TEXT_ALL) goto out; /* FILE_TEXT_DIRECT depends on FILE_TEXT_ENABLED */ - if ((val & 0x4) && !(val & 0x1)) { + if ((val & BIT_MASK_THP_TEXT_FILE_DIRECT) && !(val & BIT_MASK_THP_TEXT_FILE)) { err = -EINVAL; goto out; } - if (val & 0x01) + if (val & BIT_MASK_THP_TEXT_FILE) set_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); else clear_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); - if (val & 0x02) + if (val & BIT_MASK_THP_TEXT_ANON) set_bit(TRANSPARENT_HUGEPAGE_ANON_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); else clear_bit(TRANSPARENT_HUGEPAGE_ANON_TEXT_ENABLED_FLAG, &transparent_hugepage_flags); - if (val & 0x04) + if (val & BIT_MASK_THP_TEXT_FILE_DIRECT) set_bit(TRANSPARENT_HUGEPAGE_FILE_TEXT_DIRECT_FLAG, &transparent_hugepage_flags); else -- Gitee