From 49400f3493719a772b4611503947b1a73b722627 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Thu, 14 Dec 2023 14:29:35 +0100 Subject: [PATCH 01/16] riscv: Use hugepage mappings for vmemmap ANBZ: #25257 commit ff172d4818ad32dba433dae189e36684e43c5c74 upstream. This will allow better TLB utilization and then should be more performant. Before: ---[ vmemmap start ]--- 0xffff8d8002000000-0xffff8d8012000000 0x000000046ec00000 256M PTE . .. .. D A G . . W R V ---[ vmemmap end ]--- After: ---[ vmemmap start ]--- 0xffff8d8002000000-0xffff8d8012000000 0x000000046ec00000 256M PMD . .. .. D A G . . W R V ---[ vmemmap end ]--- Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231214132935.212864-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/mm/init.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index be2305f625c9..8dcb1ee3445c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1534,10 +1534,29 @@ void __init misc_mem_init(void) } #ifdef CONFIG_SPARSEMEM_VMEMMAP +void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, + unsigned long addr, unsigned long next) +{ + pmd_set_huge(pmd, virt_to_phys(p), PAGE_KERNEL); +} + +int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node, + unsigned long addr, unsigned long next) +{ + vmemmap_verify((pte_t *)pmdp, node, addr, next); + return 1; +} + int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { - return vmemmap_populate_basepages(start, end, node, NULL); + /* + * Note that SPARSEMEM_VMEMMAP is only selected for rv64 and that we + * can't use hugepage mappings for 2-level page table because in case of + * memory hotplug, we are not able to update all the page tables with + * the new PMDs. + */ + return vmemmap_populate_hugepages(start, end, node, NULL); } #endif -- Gitee From 64afac331e77f1bf97e805b4c35c9a10c258114e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:44 +0200 Subject: [PATCH 02/16] riscv: mm: Properly forward vmemmap_populate() altmap parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit e3ecf2fdc8f39a898f9e06481e935b460a097e10 upstream. Make sure that the altmap parameter is properly passed on to vmemmap_populate_hugepages(). Reviewed-by: Alexandre Ghiti Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-2-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 8dcb1ee3445c..7870a5048d44 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1556,7 +1556,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, * memory hotplug, we are not able to update all the page tables with * the new PMDs. */ - return vmemmap_populate_hugepages(start, end, node, NULL); + return vmemmap_populate_hugepages(start, end, node, altmap); } #endif -- Gitee From eb5737bd7daeff42ea5d91fce73f84a6ebea8297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:45 +0200 Subject: [PATCH 03/16] riscv: mm: Pre-allocate vmemmap/direct map/kasan PGD entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit 66673099f734fd6512055fee353b5c81dafec216 upstream. The RISC-V port copies the PGD table from init_mm/swapper_pg_dir to all userland page tables, which means that if the PGD level table is changed, other page tables has to be updated as well. Instead of having the PGD changes ripple out to all tables, the synchronization can be avoided by pre-allocating the PGD entries/pages at boot, avoiding the synchronization all together. This is currently done for the bpf/modules, and vmalloc PGD regions. Extend this scheme for the PGD regions touched by memory hotplugging. Prepare the RISC-V port for memory hotplug by pre-allocate vmemmap/direct map/kasan entries at the PGD level. This will roughly waste ~128 (plus 32 if KASAN is enabled) worth of 4K pages when memory hotplugging is enabled in the kernel configuration. Reviewed-by: Alexandre Ghiti Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-3-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/include/asm/kasan.h | 4 ++-- arch/riscv/mm/init.c | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index 0b85e363e778..e6a0071bdb56 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -6,8 +6,6 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_KASAN - /* * The following comment was copied from arm64: * KASAN_SHADOW_START: beginning of the kernel virtual addresses. @@ -34,6 +32,8 @@ */ #define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK) #define KASAN_SHADOW_END MODULES_LOWEST_VADDR + +#ifdef CONFIG_KASAN #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) void kasan_init(void); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7870a5048d44..cee9eb413554 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -1610,10 +1611,18 @@ static void __init preallocate_pgd_pages_range(unsigned long start, unsigned lon panic("Failed to pre-allocate %s pages for %s area\n", lvl, area); } +#define PAGE_END KASAN_SHADOW_START + void __init pgtable_cache_init(void) { preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc"); if (IS_ENABLED(CONFIG_MODULES)) preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules"); + if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) { + preallocate_pgd_pages_range(VMEMMAP_START, VMEMMAP_END, "vmemmap"); + preallocate_pgd_pages_range(PAGE_OFFSET, PAGE_END, "direct map"); + if (IS_ENABLED(CONFIG_KASAN)) + preallocate_pgd_pages_range(KASAN_SHADOW_START, KASAN_SHADOW_END, "kasan"); + } } #endif -- Gitee From a59f47dd1a510e14636ae94e1c69ac2ee5552450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:46 +0200 Subject: [PATCH 04/16] riscv: mm: Change attribute from __init to __meminit for page functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit fe122b89da670be155f3474c0cb28af2fbcd2ecc upstream. Prepare for memory hotplugging support by changing from __init to __meminit for the page table functions that are used by the upcoming architecture specific callbacks. Changing the __init attribute to __meminit, avoids that the functions are removed after init. The __meminit attribute makes sure the functions are kept in the kernel text post init, but only if memory hotplugging is enabled for the build. Reviewed-by: Alexandre Ghiti Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-4-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/include/asm/mmu.h | 4 +-- arch/riscv/include/asm/pgtable.h | 2 +- arch/riscv/mm/init.c | 56 ++++++++++++++------------------ 3 files changed, 28 insertions(+), 34 deletions(-) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index fcc7e64e6870..ba911fea92d1 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -33,8 +33,8 @@ typedef struct { /* Lock the pointer masking mode because this mm is multithreaded */ #define MM_CONTEXT_LOCK_PMLEN 0 -void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, - phys_addr_t sz, pgprot_t prot); +void __meminit create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, + pgprot_t prot); #endif /* __ASSEMBLY__ */ #endif /* _ASM_RISCV_MMU_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 93533f9e74ee..dd10b20253da 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -169,7 +169,7 @@ struct pt_alloc_ops { #endif }; -extern struct pt_alloc_ops pt_ops __initdata; +extern struct pt_alloc_ops pt_ops __meminitdata; #ifdef CONFIG_MMU /* Number of PGD entries that a user-mode program can use */ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index cee9eb413554..5346d2cecff5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -304,7 +304,7 @@ static void __init setup_bootmem(void) } #ifdef CONFIG_MMU -struct pt_alloc_ops pt_ops __initdata; +struct pt_alloc_ops pt_ops __meminitdata; pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; @@ -366,7 +366,7 @@ static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa) return (pte_t *)set_fixmap_offset(FIX_PTE, pa); } -static inline pte_t *__init get_pte_virt_late(phys_addr_t pa) +static inline pte_t *__meminit get_pte_virt_late(phys_addr_t pa) { return (pte_t *) __va(pa); } @@ -385,7 +385,7 @@ static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va) return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); } -static phys_addr_t __init alloc_pte_late(uintptr_t va) +static phys_addr_t __meminit alloc_pte_late(uintptr_t va) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); @@ -393,9 +393,8 @@ static phys_addr_t __init alloc_pte_late(uintptr_t va) return __pa((pte_t *)ptdesc_address(ptdesc)); } -static void __init create_pte_mapping(pte_t *ptep, - uintptr_t va, phys_addr_t pa, - phys_addr_t sz, pgprot_t prot) +static void __meminit create_pte_mapping(pte_t *ptep, uintptr_t va, phys_addr_t pa, phys_addr_t sz, + pgprot_t prot) { uintptr_t pte_idx = pte_index(va); @@ -449,7 +448,7 @@ static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa) return (pmd_t *)set_fixmap_offset(FIX_PMD, pa); } -static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) +static pmd_t *__meminit get_pmd_virt_late(phys_addr_t pa) { return (pmd_t *) __va(pa); } @@ -466,7 +465,7 @@ static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); } -static phys_addr_t __init alloc_pmd_late(uintptr_t va) +static phys_addr_t __meminit alloc_pmd_late(uintptr_t va) { struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); @@ -474,9 +473,9 @@ static phys_addr_t __init alloc_pmd_late(uintptr_t va) return __pa((pmd_t *)ptdesc_address(ptdesc)); } -static void __init create_pmd_mapping(pmd_t *pmdp, - uintptr_t va, phys_addr_t pa, - phys_addr_t sz, pgprot_t prot) +static void __meminit create_pmd_mapping(pmd_t *pmdp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) { pte_t *ptep; phys_addr_t pte_phys; @@ -512,7 +511,7 @@ static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa) return (pud_t *)set_fixmap_offset(FIX_PUD, pa); } -static pud_t *__init get_pud_virt_late(phys_addr_t pa) +static pud_t *__meminit get_pud_virt_late(phys_addr_t pa) { return (pud_t *)__va(pa); } @@ -530,7 +529,7 @@ static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); } -static phys_addr_t alloc_pud_late(uintptr_t va) +static phys_addr_t __meminit alloc_pud_late(uintptr_t va) { unsigned long vaddr; @@ -550,7 +549,7 @@ static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa) return (p4d_t *)set_fixmap_offset(FIX_P4D, pa); } -static p4d_t *__init get_p4d_virt_late(phys_addr_t pa) +static p4d_t *__meminit get_p4d_virt_late(phys_addr_t pa) { return (p4d_t *)__va(pa); } @@ -568,7 +567,7 @@ static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va) return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); } -static phys_addr_t alloc_p4d_late(uintptr_t va) +static phys_addr_t __meminit alloc_p4d_late(uintptr_t va) { unsigned long vaddr; @@ -577,9 +576,8 @@ static phys_addr_t alloc_p4d_late(uintptr_t va) return __pa(vaddr); } -static void __init create_pud_mapping(pud_t *pudp, - uintptr_t va, phys_addr_t pa, - phys_addr_t sz, pgprot_t prot) +static void __meminit create_pud_mapping(pud_t *pudp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, + pgprot_t prot) { pmd_t *nextp; phys_addr_t next_phys; @@ -604,9 +602,8 @@ static void __init create_pud_mapping(pud_t *pudp, create_pmd_mapping(nextp, va, pa, sz, prot); } -static void __init create_p4d_mapping(p4d_t *p4dp, - uintptr_t va, phys_addr_t pa, - phys_addr_t sz, pgprot_t prot) +static void __meminit create_p4d_mapping(p4d_t *p4dp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, + pgprot_t prot) { pud_t *nextp; phys_addr_t next_phys; @@ -662,9 +659,8 @@ static void __init create_p4d_mapping(p4d_t *p4dp, #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) #endif /* __PAGETABLE_PMD_FOLDED */ -void __init create_pgd_mapping(pgd_t *pgdp, - uintptr_t va, phys_addr_t pa, - phys_addr_t sz, pgprot_t prot) +void __meminit create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phys_addr_t sz, + pgprot_t prot) { pgd_next_t *nextp; phys_addr_t next_phys; @@ -689,8 +685,7 @@ void __init create_pgd_mapping(pgd_t *pgdp, create_pgd_next_mapping(nextp, va, pa, sz, prot); } -static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va, - phys_addr_t size) +static uintptr_t __meminit best_map_size(phys_addr_t pa, uintptr_t va, phys_addr_t size) { if (debug_pagealloc_enabled()) return PAGE_SIZE; @@ -726,7 +721,7 @@ asmlinkage void __init __copy_data(void) #endif #ifdef CONFIG_STRICT_KERNEL_RWX -static __init pgprot_t pgprot_from_va(uintptr_t va) +static __meminit pgprot_t pgprot_from_va(uintptr_t va) { if (is_va_kernel_text(va)) return PAGE_KERNEL_READ_EXEC; @@ -753,7 +748,7 @@ void mark_rodata_ro(void) debug_checkwx(); } #else -static __init pgprot_t pgprot_from_va(uintptr_t va) +static __meminit pgprot_t pgprot_from_va(uintptr_t va) { if (IS_ENABLED(CONFIG_64BIT) && !is_kernel_mapping(va)) return PAGE_KERNEL; @@ -1236,9 +1231,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pt_ops_set_fixmap(); } -static void __init create_linear_mapping_range(phys_addr_t start, - phys_addr_t end, - uintptr_t fixed_map_size) +static void __meminit create_linear_mapping_range(phys_addr_t start, phys_addr_t end, + uintptr_t fixed_map_size) { phys_addr_t pa; uintptr_t va, map_size; -- Gitee From 58e1e72f486becbe6d86a4e2ae64a10cc102d00d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:47 +0200 Subject: [PATCH 05/16] riscv: mm: Refactor create_linear_mapping_range() for memory hot add MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit 007480fe84a9963b6deaa6dceb7039eec03ac007 upstream. Add a parameter to the direct map setup function, so it can be used in arch_add_memory() later. Reviewed-by: Alexandre Ghiti Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-5-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/mm/init.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 5346d2cecff5..01492a347619 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1232,7 +1232,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) } static void __meminit create_linear_mapping_range(phys_addr_t start, phys_addr_t end, - uintptr_t fixed_map_size) + uintptr_t fixed_map_size, const pgprot_t *pgprot) { phys_addr_t pa; uintptr_t va, map_size; @@ -1243,7 +1243,7 @@ static void __meminit create_linear_mapping_range(phys_addr_t start, phys_addr_t best_map_size(pa, va, end - pa); create_pgd_mapping(swapper_pg_dir, va, pa, map_size, - pgprot_from_va(va)); + pgprot ? *pgprot : pgprot_from_va(va)); } } @@ -1285,22 +1285,19 @@ static void __init create_linear_mapping_page_table(void) __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); - create_linear_mapping_range(start, end, 0); + create_linear_mapping_range(start, end, 0, NULL); } #ifdef CONFIG_STRICT_KERNEL_RWX - create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0); - create_linear_mapping_range(krodata_start, - krodata_start + krodata_size, 0); + create_linear_mapping_range(ktext_start, ktext_start + ktext_size, 0, NULL); + create_linear_mapping_range(krodata_start, krodata_start + krodata_size, 0, NULL); memblock_clear_nomap(ktext_start, ktext_size); memblock_clear_nomap(krodata_start, krodata_size); #endif #ifdef CONFIG_KFENCE - create_linear_mapping_range(kfence_pool, - kfence_pool + KFENCE_POOL_SIZE, - PAGE_SIZE); + create_linear_mapping_range(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, PAGE_SIZE, NULL); memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); #endif -- Gitee From cf762089722ce4683819719c295a3ee6be131df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:48 +0200 Subject: [PATCH 06/16] riscv: mm: Add pfn_to_kaddr() implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit 6e6c5e21b8cbe94b89d2e848afad4e6fe2a7abd8 upstream. The pfn_to_kaddr() function is used by KASAN's memory hotplugging path. Add the missing function to the RISC-V port, so that it can be built with MHP and CONFIG_KASAN. Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240605114100.315918-6-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/include/asm/page.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index cd189b20bd41..ae8541cf6577 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -185,6 +185,11 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); unsigned long kaslr_offset(void); +static __always_inline void *pfn_to_kaddr(unsigned long pfn) +{ + return __va(pfn << PAGE_SHIFT); +} + #endif /* __ASSEMBLY__ */ #define virt_addr_valid(vaddr) ({ \ -- Gitee From f972242966e831a6c62d268bbdaa106565496b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:49 +0200 Subject: [PATCH 07/16] riscv: mm: Add memory hotplugging support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit c75a74f4ba19c904c0ae1e011ae2568449409ae4 upstream. For an architecture to support memory hotplugging, a couple of callbacks needs to be implemented: arch_add_memory() This callback is responsible for adding the physical memory into the direct map, and call into the memory hotplugging generic code via __add_pages() that adds the corresponding struct page entries, and updates the vmemmap mapping. arch_remove_memory() This is the inverse of the callback above. vmemmap_free() This function tears down the vmemmap mappings (if CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the backing vmemmap pages. Note that for persistent memory, an alternative allocator for the backing pages can be used; The vmem_altmap. This means that when the backing pages are cleared, extra care is needed so that the correct deallocation method is used. arch_get_mappable_range() This functions returns the PA range that the direct map can map. Used by the MHP internals for sanity checks. The page table unmap/teardown functions are heavily based on code from the x86 tree. The same remove_pgd_mapping() function is used in both vmemmap_free() and arch_remove_memory(), but in the latter function the backing pages are not removed. Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240605114100.315918-7-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/mm/init.c | 267 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 267 insertions(+) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 01492a347619..ece882f78572 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1617,3 +1617,270 @@ void __init pgtable_cache_init(void) } } #endif + +#ifdef CONFIG_MEMORY_HOTPLUG +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ + struct page *page = pmd_page(*pmd); + struct ptdesc *ptdesc = page_ptdesc(page); + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (!pte_none(*pte)) + return; + } + + pagetable_dtor(ptdesc); + if (PageReserved(page)) + free_reserved_page(page); + else + pagetable_free(ptdesc); + pmd_clear(pmd); +} + +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +{ + struct page *page = pud_page(*pud); + struct ptdesc *ptdesc = page_ptdesc(page); + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (!pmd_none(*pmd)) + return; + } + + pagetable_dtor(ptdesc); + if (PageReserved(page)) + free_reserved_page(page); + else + pagetable_free(ptdesc); + pud_clear(pud); +} + +static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) +{ + struct page *page = p4d_page(*p4d); + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (!pud_none(*pud)) + return; + } + + if (PageReserved(page)) + free_reserved_page(page); + else + free_pages((unsigned long)page_address(page), 0); + p4d_clear(p4d); +} + +static void __meminit free_vmemmap_storage(struct page *page, size_t size, + struct vmem_altmap *altmap) +{ + int order = get_order(size); + + if (altmap) { + vmem_altmap_free(altmap, size >> PAGE_SHIFT); + return; + } + + if (PageReserved(page)) { + unsigned int nr_pages = 1 << order; + + while (nr_pages--) + free_reserved_page(page++); + return; + } + + free_pages((unsigned long)page_address(page), order); +} + +static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pte_t *ptep, pte; + + for (; addr < end; addr = next) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + ptep = pte_base + pte_index(addr); + pte = ptep_get(ptep); + if (!pte_present(*ptep)) + continue; + + pte_clear(&init_mm, addr, ptep); + if (is_vmemmap) + free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap); + } +} + +static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pte_t *pte_base; + pmd_t *pmdp, pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + pmdp = pmd_base + pmd_index(addr); + pmd = pmdp_get(pmdp); + if (!pmd_present(pmd)) + continue; + + if (pmd_leaf(pmd)) { + pmd_clear(pmdp); + if (is_vmemmap) + free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap); + continue; + } + + pte_base = (pte_t *)pmd_page_vaddr(*pmdp); + remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); + free_pte_table(pte_base, pmdp); + } +} + +static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + pud_t *pudp, pud; + pmd_t *pmd_base; + + for (; addr < end; addr = next) { + next = pud_addr_end(addr, end); + pudp = pud_base + pud_index(addr); + pud = pudp_get(pudp); + if (!pud_present(pud)) + continue; + + if (pud_leaf(pud)) { + if (pgtable_l4_enabled) { + pud_clear(pudp); + if (is_vmemmap) + free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap); + } + continue; + } + + pmd_base = pmd_offset(pudp, 0); + remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap); + + if (pgtable_l4_enabled) + free_pmd_table(pmd_base, pudp); + } +} + +static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end, + bool is_vmemmap, struct vmem_altmap *altmap) +{ + unsigned long next; + p4d_t *p4dp, p4d; + pud_t *pud_base; + + for (; addr < end; addr = next) { + next = p4d_addr_end(addr, end); + p4dp = p4d_base + p4d_index(addr); + p4d = p4dp_get(p4dp); + if (!p4d_present(p4d)) + continue; + + if (p4d_leaf(p4d)) { + if (pgtable_l5_enabled) { + p4d_clear(p4dp); + if (is_vmemmap) + free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap); + } + continue; + } + + pud_base = pud_offset(p4dp, 0); + remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap); + + if (pgtable_l5_enabled) + free_pud_table(pud_base, p4dp); + } +} + +static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap, + struct vmem_altmap *altmap) +{ + unsigned long addr, next; + p4d_t *p4d_base; + pgd_t *pgd; + + for (addr = va; addr < end; addr = next) { + next = pgd_addr_end(addr, end); + pgd = pgd_offset_k(addr); + + if (!pgd_present(*pgd)) + continue; + + if (pgd_leaf(*pgd)) + continue; + + p4d_base = p4d_offset(pgd, 0); + remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap); + } + + flush_tlb_all(); +} + +static void __meminit remove_linear_mapping(phys_addr_t start, u64 size) +{ + unsigned long va = (unsigned long)__va(start); + unsigned long end = (unsigned long)__va(start + size); + + remove_pgd_mapping(va, end, false, NULL); +} + +struct range arch_get_mappable_range(void) +{ + struct range mhp_range; + + mhp_range.start = __pa(PAGE_OFFSET); + mhp_range.end = __pa(PAGE_END - 1); + return mhp_range; +} + +int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) +{ + int ret = 0; + + create_linear_mapping_range(start, start + size, 0, ¶ms->pgprot); + ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params); + if (ret) { + remove_linear_mapping(start, size); + goto out; + } + + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + + out: + flush_tlb_all(); + return ret; +} + +void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +{ + __remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap); + remove_linear_mapping(start, size); + flush_tlb_all(); +} + +void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap) +{ + remove_pgd_mapping(start, end, true, altmap); +} +#endif /* CONFIG_MEMORY_HOTPLUG */ -- Gitee From 6203747adbe9167cce58b75e63ce586265a77778 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:50 +0200 Subject: [PATCH 08/16] riscv: mm: Take memory hotplug read-lock during kernel page table dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit 37992b7f1097ba79ca75ba5a26ddcf0f54f91a08 upstream. During memory hot remove, the ptdump functionality can end up touching stale data. Avoid any potential crashes (or worse), by holding the memory hotplug read-lock while traversing the page table. This change is analogous to arm64's commit bf2b59f60ee1 ("arm64/mm: Hold memory hotplug lock while walking for kernel page table dump"). Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-8-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/mm/ptdump.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index e9090b38f811..dbc2baa95ead 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -351,7 +352,9 @@ void ptdump_check_wx(void) static int ptdump_show(struct seq_file *m, void *v) { + get_online_mems(); ptdump_walk(m, m->private); + put_online_mems(); return 0; } -- Gitee From 35d2f6549a4aef8b303917e820fc598d36dab37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:51 +0200 Subject: [PATCH 09/16] riscv: Enable memory hotplugging for RISC-V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit f8c2a240556eb4fcfcd3ee8e5ececce5be1ae734 upstream. Enable ARCH_ENABLE_MEMORY_HOTPLUG and ARCH_ENABLE_MEMORY_HOTREMOVE for RISC-V. Reviewed-by: Alexandre Ghiti Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-9-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 2ce16e7090af..6b44768c3ada 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -19,6 +19,8 @@ config RISCV select ACPI_SPCR_TABLE if ACPI select ARCH_DMA_DEFAULT_COHERENT select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION + select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP + select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT @@ -45,6 +47,7 @@ config RISCV select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_VDSO_DATA select ARCH_KEEP_MEMBLOCK + select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if 64BIT && MMU select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_STACKWALK -- Gitee From 7575b00a9c20b23a4bdfe3b8a685bdd80958d586 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:52 +0200 Subject: [PATCH 10/16] virtio-mem: Enable virtio-mem for RISC-V MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit 0546d7043e55becec78fa262f2e84c76a96f6e52 upstream. Now that RISC-V has memory hotplugging support, virtio-mem can be used on the platform. Acked-by: David Hildenbrand Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-10-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- drivers/virtio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index da89d498d14e..3d81d3f848a1 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -117,7 +117,7 @@ config VIRTIO_BALLOON config VIRTIO_MEM tristate "Virtio mem driver" - depends on X86_64 || ARM64 || SW64 + depends on X86_64 || ARM64 || SW64 || RISCV depends on VIRTIO depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE -- Gitee From d9d2178b7a77a459b230c954d384087b01c9a7c2 Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 21 Sep 2023 10:50:20 +0800 Subject: [PATCH 11/16] riscv: Improve PTDUMP to show RSW with non-zero value ANBZ: #25257 commit d5d2c264d33b9acf1a0216861942176ee3569258 upstream. RSW field can be used to encode 2 bits of software defined information. Currently, PTDUMP only prints "RSW" when its value is 1 or 3. To fix this issue and improve the debugging experience with PTDUMP, we redefine _PAGE_SPECIAL to its original value and use _PAGE_SOFT as the RSW mask, allow it to print the RSW with any non-zero value. This patch also removes the val from the struct prot_bits as it is no longer needed. Signed-off-by: Yu Chien Peter Lin Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230921025022.3989723-2-peterlin@andestech.com Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/include/asm/pgtable-bits.h | 4 +-- arch/riscv/mm/ptdump.c | 35 ++++++++++++--------------- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index f896708e8331..179bd4afece4 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -16,9 +16,9 @@ #define _PAGE_GLOBAL (1 << 5) /* Global */ #define _PAGE_ACCESSED (1 << 6) /* Set by hardware on any access */ #define _PAGE_DIRTY (1 << 7) /* Set by hardware on any write */ -#define _PAGE_SOFT (1 << 8) /* Reserved for software */ +#define _PAGE_SOFT (3 << 8) /* Reserved for software */ -#define _PAGE_SPECIAL _PAGE_SOFT +#define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index dbc2baa95ead..165d47442efe 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -130,7 +130,6 @@ static struct ptd_mm_info efi_ptd_info = { /* Page Table Entry */ struct prot_bits { u64 mask; - u64 val; const char *set; const char *clear; }; @@ -138,47 +137,38 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { .mask = _PAGE_SOFT, - .val = _PAGE_SOFT, - .set = "RSW", - .clear = " ", + .set = "RSW(%d)", + .clear = " .. ", }, { .mask = _PAGE_DIRTY, - .val = _PAGE_DIRTY, .set = "D", .clear = ".", }, { .mask = _PAGE_ACCESSED, - .val = _PAGE_ACCESSED, .set = "A", .clear = ".", }, { .mask = _PAGE_GLOBAL, - .val = _PAGE_GLOBAL, .set = "G", .clear = ".", }, { .mask = _PAGE_USER, - .val = _PAGE_USER, .set = "U", .clear = ".", }, { .mask = _PAGE_EXEC, - .val = _PAGE_EXEC, .set = "X", .clear = ".", }, { .mask = _PAGE_WRITE, - .val = _PAGE_WRITE, .set = "W", .clear = ".", }, { .mask = _PAGE_READ, - .val = _PAGE_READ, .set = "R", .clear = ".", }, { .mask = _PAGE_PRESENT, - .val = _PAGE_PRESENT, .set = "V", .clear = ".", } @@ -209,15 +199,20 @@ static void dump_prot(struct pg_state *st) unsigned int i; for (i = 0; i < ARRAY_SIZE(pte_bits); i++) { - const char *s; - - if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val) - s = pte_bits[i].set; - else - s = pte_bits[i].clear; + char s[7]; + unsigned long val; + + val = st->current_prot & pte_bits[i].mask; + if (val) { + if (pte_bits[i].mask == _PAGE_SOFT) + sprintf(s, pte_bits[i].set, val >> 8); + else + sprintf(s, "%s", pte_bits[i].set); + } else { + sprintf(s, "%s", pte_bits[i].clear); + } - if (s) - pt_dump_seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } } -- Gitee From cb664d71a628cf29da24c6823b972b754b8eaaa4 Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 21 Sep 2023 10:50:21 +0800 Subject: [PATCH 12/16] riscv: Introduce PBMT field to PTDUMP ANBZ: #25257 commit 0713ff337173884bcaf163e44cadd6a56bc8193a upstream. This patch introduces the PBMT field to the PTDUMP, so it can display the memory attributes for NC or IO. Signed-off-by: Yu Chien Peter Lin Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230921025022.3989723-3-peterlin@andestech.com Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/mm/ptdump.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 165d47442efe..7a18f4f54824 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -136,6 +136,12 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { +#ifdef CONFIG_64BIT + .mask = _PAGE_MTMASK_SVPBMT, + .set = "MT(%s)", + .clear = " .. ", + }, { +#endif .mask = _PAGE_SOFT, .set = "RSW(%d)", .clear = " .. ", @@ -206,6 +212,16 @@ static void dump_prot(struct pg_state *st) if (val) { if (pte_bits[i].mask == _PAGE_SOFT) sprintf(s, pte_bits[i].set, val >> 8); +#ifdef CONFIG_64BIT + else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) { + if (val == _PAGE_NOCACHE_SVPBMT) + sprintf(s, pte_bits[i].set, "NC"); + else if (val == _PAGE_IO_SVPBMT) + sprintf(s, pte_bits[i].set, "IO"); + else + sprintf(s, pte_bits[i].set, "??"); + } +#endif else sprintf(s, "%s", pte_bits[i].set); } else { -- Gitee From d1255cf973e76aa2179277d0a875e2ffb9ef16db Mon Sep 17 00:00:00 2001 From: Yu Chien Peter Lin Date: Thu, 21 Sep 2023 10:50:22 +0800 Subject: [PATCH 13/16] riscv: Introduce NAPOT field to PTDUMP ANBZ: #25257 commit 015c3c370472290e37143d1387c000db7ec273ad upstream. This patch introduces the NAPOT field to PTDUMP, allowing it to display the letter "N" for pages that have the 63rd bit set. Signed-off-by: Yu Chien Peter Lin Reviewed-by: Alexandre Ghiti Tested-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20230921025022.3989723-4-peterlin@andestech.com Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/mm/ptdump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 7a18f4f54824..1516265ea3e6 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -137,6 +137,10 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { #ifdef CONFIG_64BIT + .mask = _PAGE_NAPOT, + .set = "N", + .clear = ".", + }, { .mask = _PAGE_MTMASK_SVPBMT, .set = "MT(%s)", .clear = " .. ", -- Gitee From f8c9970ab045da9a122ff93ae55220a10ca7d111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:53 +0200 Subject: [PATCH 14/16] riscv: mm: Add support for ZONE_DEVICE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit 216e04bf1e4d933fe8338e486a9dff93c208601e upstream. ZONE_DEVICE pages need DEVMAP PTEs support to function (ARCH_HAS_PTE_DEVMAP). Claim another RSW (reserved for software) bit in the PTE for DEVMAP mark, add the corresponding helpers, and enable ARCH_HAS_PTE_DEVMAP for riscv64. Reviewed-by: Alexandre Ghiti Signed-off-by: Björn Töpel Link: https://lore.kernel.org/r/20240605114100.315918-11-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable-64.h | 20 ++++++++++++++++++++ arch/riscv/include/asm/pgtable-bits.h | 1 + arch/riscv/include/asm/pgtable.h | 17 +++++++++++++++++ 4 files changed, 39 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6b44768c3ada..7c468a667ca5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -37,6 +37,7 @@ config RISCV select ARCH_HAS_MMIOWB select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API + select ARCH_HAS_PTE_DEVMAP if 64BIT && MMU select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU select ARCH_HAS_SET_MEMORY if MMU diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index b99bd66107a6..2705aeaeaad4 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -400,4 +400,24 @@ static inline struct page *pgd_page(pgd_t pgd) #define p4d_offset p4d_offset p4d_t *p4d_offset(pgd_t *pgd, unsigned long address); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pte_devmap(pte_t pte); +static inline pte_t pmd_pte(pmd_t pmd); + +static inline int pmd_devmap(pmd_t pmd) +{ + return pte_devmap(pmd_pte(pmd)); +} + +static inline int pud_devmap(pud_t pud) +{ + return 0; +} + +static inline int pgd_devmap(pgd_t pgd) +{ + return 0; +} +#endif + #endif /* _ASM_RISCV_PGTABLE_64_H */ diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 179bd4afece4..a8f5205cea54 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -19,6 +19,7 @@ #define _PAGE_SOFT (3 << 8) /* Reserved for software */ #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ +#define _PAGE_DEVMAP (1 << 9) /* RSW, devmap */ #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index dd10b20253da..d9ae2f9fdcc3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -393,6 +393,13 @@ static inline int pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } +#ifdef CONFIG_ARCH_HAS_PTE_DEVMAP +static inline int pte_devmap(pte_t pte) +{ + return pte_val(pte) & _PAGE_DEVMAP; +} +#endif + /* static inline pte_t pte_rdprotect(pte_t pte) */ static inline pte_t pte_wrprotect(pte_t pte) @@ -434,6 +441,11 @@ static inline pte_t pte_mkspecial(pte_t pte) return __pte(pte_val(pte) | _PAGE_SPECIAL); } +static inline pte_t pte_mkdevmap(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_DEVMAP); +} + static inline pte_t pte_mkhuge(pte_t pte) { return pte; @@ -731,6 +743,11 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pte_pmd(pte_mkdirty(pmd_pte(pmd))); } +static inline pmd_t pmd_mkdevmap(pmd_t pmd) +{ + return pte_pmd(pte_mkdevmap(pmd_pte(pmd))); +} + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { -- Gitee From 309c3335a552e35aa04f603f0b9b49b8d87ab9b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Wed, 5 Jun 2024 13:40:54 +0200 Subject: [PATCH 15/16] riscv: Enable DAX VMEMMAP optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #25257 commit 4705c1571ad39d9469321d2817faf4c4b78ddffb upstream. Now that DAX is usable, enable the DAX VMEMMAP optimization as well. Signed-off-by: Björn Töpel Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20240605114100.315918-12-bjorn@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Anmeng Zhang --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7c468a667ca5..9626d0d487e2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -67,6 +67,7 @@ config RISCV select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL + select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE select ARCH_WEAK_RELEASE_ACQUIRE if ARCH_USE_QUEUED_SPINLOCKS -- Gitee From c3b33d2679b4aa4837939cb6f0002dc314b3a085 Mon Sep 17 00:00:00 2001 From: Anmeng Zhang Date: Mon, 22 Sep 2025 15:31:33 +0800 Subject: [PATCH 16/16] anolis: riscv: configs: Enable Memory Hot(Un)Plug support ANBZ: #25257 Enable CONFIG_MEMORY_HOTPLUG CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE CONFIG_VIRTIO_MEM CONFIG_MEMORY_HOTREMOVE by default for RISC-V. Signed-off-by: Anmeng Zhang --- anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG | 1 - .../L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE | 1 - anolis/configs/L0-MANDATORY/riscv/CONFIG_VIRTIO_MEM | 1 - anolis/configs/L1-RECOMMEND/riscv/CONFIG_MEMORY_HOTREMOVE | 1 - 4 files changed, 4 deletions(-) delete mode 100644 anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG delete mode 100644 anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE delete mode 100644 anolis/configs/L0-MANDATORY/riscv/CONFIG_VIRTIO_MEM delete mode 100644 anolis/configs/L1-RECOMMEND/riscv/CONFIG_MEMORY_HOTREMOVE diff --git a/anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG b/anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG deleted file mode 100644 index 19580d50e40b..000000000000 --- a/anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_MEMORY_HOTPLUG is not set diff --git a/anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE b/anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE deleted file mode 100644 index e7fe50c39b78..000000000000 --- a/anolis/configs/L0-MANDATORY/riscv/CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set diff --git a/anolis/configs/L0-MANDATORY/riscv/CONFIG_VIRTIO_MEM b/anolis/configs/L0-MANDATORY/riscv/CONFIG_VIRTIO_MEM deleted file mode 100644 index a04c19497763..000000000000 --- a/anolis/configs/L0-MANDATORY/riscv/CONFIG_VIRTIO_MEM +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_VIRTIO_MEM is not set diff --git a/anolis/configs/L1-RECOMMEND/riscv/CONFIG_MEMORY_HOTREMOVE b/anolis/configs/L1-RECOMMEND/riscv/CONFIG_MEMORY_HOTREMOVE deleted file mode 100644 index 9fcd5f374e75..000000000000 --- a/anolis/configs/L1-RECOMMEND/riscv/CONFIG_MEMORY_HOTREMOVE +++ /dev/null @@ -1 +0,0 @@ -# CONFIG_MEMORY_HOTREMOVE is not set -- Gitee