From 5742c4bea7a51b1ba1a3dc457738cfcf8dd5a2e0 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Tue, 15 Oct 2024 10:39:44 +0800 Subject: [PATCH 01/15] purgeable memory kernel support Signed-off-by: ma_yulong --- fs/proc/meminfo.c | 31 ++- fs/proc/task_mmu.c | 12 ++ include/linux/mm.h | 18 ++ include/linux/mm_inline.h | 4 + include/linux/mm_types.h | 4 + include/linux/mmzone.h | 21 +- include/linux/page-flags.h | 9 + include/trace/events/mmflags.h | 11 +- kernel/fork.c | 10 +- mm/Kconfig | 23 +++ mm/Makefile | 2 + mm/memory.c | 40 +++- mm/mmap.c | 15 ++ mm/purgeable.c | 348 +++++++++++++++++++++++++++++++++ mm/purgeable_ashmem_trigger.c | 134 +++++++++++++ mm/rmap.c | 28 +++ mm/vmscan.c | 100 +++++++++- mm/vmstat.c | 8 + 18 files changed, 807 insertions(+), 11 deletions(-) create mode 100755 mm/purgeable.c create mode 100755 mm/purgeable_ashmem_trigger.c diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 45af9a989d40..875306ca2b39 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -17,6 +17,9 @@ #ifdef CONFIG_CMA #include #endif +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include #include "internal.h" @@ -40,6 +43,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) unsigned long pages[NR_LRU_LISTS]; unsigned long sreclaimable, sunreclaim; int lru; + unsigned long nr_purg_active = 0; + unsigned long nr_purg_inactive = 0; +#ifdef CONFIG_MEM_PURGEABLE + unsigned long nr_purg_pined = 0; +#endif si_meminfo(&i); si_swapinfo(&i); @@ -53,6 +61,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) pages[lru] = global_node_page_state(NR_LRU_BASE + lru); +#ifdef CONFIG_MEM_PURGEABLE + nr_purg_active = pages[LRU_ACTIVE_PURGEABLE]; + nr_purg_inactive = pages[LRU_INACTIVE_PURGEABLE]; + purg_pages_info(NULL, &nr_purg_pined); + nr_purg_pined = min(nr_purg_pined, nr_purg_active + nr_purg_inactive); +#endif + available = si_mem_available(); sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B); sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B); @@ -64,13 +79,25 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Cached: ", cached); show_val_kb(m, "SwapCached: ", total_swapcache_pages()); show_val_kb(m, "Active: ", pages[LRU_ACTIVE_ANON] + - pages[LRU_ACTIVE_FILE]); +#ifdef CONFIG_MEM_PURGEABLE + pages[LRU_ACTIVE_FILE] + + nr_purg_active); +#else + pages[LRU_ACTIVE_FILE]); +#endif + show_val_kb(m, "Inactive: ", pages[LRU_INACTIVE_ANON] + - pages[LRU_INACTIVE_FILE]); + pages[LRU_INACTIVE_FILE] + + nr_purg_inactive); show_val_kb(m, "Active(anon): ", pages[LRU_ACTIVE_ANON]); show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]); show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]); show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]); +#ifdef CONFIG_MEM_PURGEABLE + show_val_kb(m, "Active(purg): ", nr_purg_active); + show_val_kb(m, "Inactive(purg): ", nr_purg_inactive); + show_val_kb(m, "Pined(purg): ", nr_purg_pined); +#endif show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]); show_val_kb(m, "Mlocked: ", global_zone_page_state(NR_MLOCK)); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index bd8285811728..415a5bef7424 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -20,6 +20,9 @@ #include #include #include +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include @@ -33,6 +36,11 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; +#ifdef CONFIG_MEM_PURGEABLE + unsigned long nr_purg_sum = 0, nr_purg_pin = 0; + + mm_purg_pages_info(mm, &nr_purg_sum, &nr_purg_pin); +#endif anon = get_mm_counter(mm, MM_ANONPAGES); file = get_mm_counter(mm, MM_FILEPAGES); @@ -76,6 +84,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) seq_put_decimal_ull_width(m, " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); +#ifdef CONFIG_MEM_PURGEABLE + SEQ_PUT_DEC(" kB\nPurgSum:\t", nr_purg_sum); + SEQ_PUT_DEC(" kB\nPurgPin:\t", nr_purg_pin); +#endif seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 7179e3f6a030..83f791100b84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -320,16 +320,34 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_7 39 /* bit only usable on 64-bit architectures */ +#ifdef CONFIG_MEM_PURGEABLE +#define VM_HIGH_ARCH_BIT_8 40 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_9 41 /* bit only usable on 64-bit architectures */ +#endif /* CONFIG_MEM_PURGEABLE */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) +#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) #define VM_HIGH_ARCH_7 BIT(VM_HIGH_ARCH_BIT_7) +#ifdef CONFIG_MEM_PURGEABLE +#define VM_HIGH_ARCH_8 BIT(VM_HIGH_ARCH_BIT_8) +#define VM_HIGH_ARCH_9 BIT(VM_HIGH_ARCH_BIT_9) +#endif /* CONFIG_MEM_PURGEABLE */ #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ +#ifdef CONFIG_MEM_PURGEABLE +#define VM_PURGEABLE VM_HIGH_ARCH_8 +#define VM_USEREXPTE VM_HIGH_ARCH_9 +#else /* CONFIG_MEM_PURGEABLE */ +#define VM_PURGEABLE 0 +#define VM_USEREXPTE 0 +#endif /* CONFIG_MEM_PURGEABLE */ + #ifdef CONFIG_SECURITY_XPM #define VM_XPM VM_HIGH_ARCH_7 #else /* CONFIG_SECURITY_XPM */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 96b1c157554c..027591c9decb 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -93,6 +93,10 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio) return LRU_UNEVICTABLE; lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; +#ifdef CONFIG_MEM_PURGEABLE + if (folio_test_purgeable(folio)) + lru = LRU_INACTIVE_PURGEABLE; +#endif if (folio_test_active(folio)) lru += LRU_ACTIVE; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index db7003d2886f..7264a43f8d18 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -703,6 +703,10 @@ struct mm_struct { #endif unsigned long task_size; /* size of task vm space */ pgd_t * pgd; +#ifdef CONFIG_MEM_PURGEABLE + void *uxpgd; + spinlock_t uxpgd_lock; +#endif #ifdef CONFIG_MEMBARRIER /** diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9cf03644fe90..84da48194dbc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -146,6 +146,10 @@ enum zone_stat_item { NR_ZONE_ACTIVE_ANON, NR_ZONE_INACTIVE_FILE, NR_ZONE_ACTIVE_FILE, +#ifdef CONFIG_MEM_PURGEABLE + NR_ZONE_INACTIVE_PURGEABLE, + NR_ZONE_ACTIVE_PURGEABLE, +#endif NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ @@ -166,6 +170,10 @@ enum node_stat_item { NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ +#ifdef CONFIG_MEM_PURGEABLE + NR_INACTIVE_PURGEABLE, + NR_ACTIVE_PURGEABLE, +#endif NR_UNEVICTABLE, /* " " " " " */ NR_SLAB_RECLAIMABLE_B, NR_SLAB_UNRECLAIMABLE_B, @@ -268,12 +276,19 @@ static __always_inline bool vmstat_item_in_bytes(int idx) #define LRU_BASE 0 #define LRU_ACTIVE 1 #define LRU_FILE 2 +#ifdef CONFIG_MEM_PURGEABLE +#define LRU_PURGEABLE 4 +#endif enum lru_list { LRU_INACTIVE_ANON = LRU_BASE, LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, +#ifdef CONFIG_MEM_PURGEABLE + LRU_INACTIVE_PURGEABLE = LRU_BASE + LRU_PURGEABLE, + LRU_ACTIVE_PURGEABLE = LRU_BASE + LRU_PURGEABLE + LRU_ACTIVE, +#endif LRU_UNEVICTABLE, NR_LRU_LISTS }; @@ -288,7 +303,7 @@ enum vmscan_throttle_state { #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) -#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) +#define for_each_evictable_lru(lru) for (lru = 0; lru < LRU_UNEVICTABLE; lru++) static inline bool is_file_lru(enum lru_list lru) { @@ -297,6 +312,10 @@ static inline bool is_file_lru(enum lru_list lru) static inline bool is_active_lru(enum lru_list lru) { +#ifdef CONFIG_MEM_PURGEABLE + if (lru == LRU_ACTIVE_PURGEABLE) + return true; +#endif return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4d2e0c913baf..af9b7524c692 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -136,6 +136,9 @@ enum pageflags { PG_arch_2, PG_arch_3, #endif +#ifdef CONFIG_MEM_PURGEABLE + PG_purgeable, +#endif #ifdef CONFIG_SECURITY_XPM PG_xpm_readonly, PG_xpm_writetainted, @@ -615,6 +618,12 @@ PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #endif +#ifdef CONFIG_MEM_PURGEABLE +PAGEFLAG(Purgeable, purgeable, PF_ANY) +#else +PAGEFLAG_FALSE(Purgeable) +#endif + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 0a224af0e59a..5fee97d06e6e 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -65,10 +65,16 @@ __def_gfpflag_names __def_gfpflag_names_kasan \ ) : "none" +#ifdef CONFIG_MEM_PURGEABLE +#define IF_HAVE_PG_PURGEABLE(_name) ,{1UL << PG_##_name, __stringify(_name)} +#else +#define IF_HAVE_PG_PURGEABLE(_name) +#endif + #ifdef CONFIG_SECURITY_XPM -#define IF_HAVE_PG_XPM_INTEGRITY(flag,string) ,{1UL << flag, string} +#define IF_HAVE_PG_XPM_INTEGRITY(_name) ,{1UL << PG_##_name, __stringify(_name)} #else -#define IF_HAVE_PG_XPM_INTEGRITY(flag,string) +#define IF_HAVE_PG_XPM_INTEGRITY(_name) #endif #ifdef CONFIG_MMU @@ -125,6 +131,7 @@ DEF_PAGEFLAG_NAME(reclaim), \ DEF_PAGEFLAG_NAME(swapbacked), \ DEF_PAGEFLAG_NAME(unevictable) \ +IF_HAVE_PG_PURGEABLE(purgeable) \ IF_HAVE_PG_MLOCK(mlocked) \ IF_HAVE_PG_UNCACHED(uncached) \ IF_HAVE_PG_HWPOISON(hwpoison) \ diff --git a/kernel/fork.c b/kernel/fork.c index 92611a26a392..515267609be9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -99,7 +99,9 @@ #include #include #include - +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include #include @@ -793,6 +795,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, static inline int mm_alloc_pgd(struct mm_struct *mm) { +#ifdef CONFIG_MEM_PURGEABLE + mm_init_uxpgd(mm); +#endif mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) return -ENOMEM; @@ -802,6 +807,9 @@ static inline int mm_alloc_pgd(struct mm_struct *mm) static inline void mm_free_pgd(struct mm_struct *mm) { pgd_free(mm, mm->pgd); +#ifdef CONFIG_MEM_PURGEABLE + mm_clear_uxpgd(mm); +#endif } #else static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) diff --git a/mm/Kconfig b/mm/Kconfig index 544d113729eb..ee4c2cf539d7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1303,6 +1303,29 @@ config LOCK_MM_AND_FIND_VMA bool depends on !STACK_GROWSUP + +config MEM_PURGEABLE + bool "Purgeable memory feature" + default n + depends on 64BIT + select ARCH_USES_HIGH_VMA_FLAGS + help + Support purgeable pages for process + +config MEM_PURGEABLE_DEBUG + bool "Purgeable memory debug" + default n + depends on MEM_PURGEABLE + help + Debug info for purgeable memory + +config PURGEABLE_ASHMEM + bool "Purgeable memory feature for ashmem" + default n + depends on MEM_PURGEABLE + help + Support purgeable ashmem for process + source "mm/damon/Kconfig" endmenu diff --git a/mm/Makefile b/mm/Makefile index f9fb7e07cdd8..f84d4b0f521d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,4 +141,6 @@ obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o obj-$(CONFIG_HYPERHOLD_FILE_LRU) += memcg_reclaim.o obj-$(CONFIG_HYPERHOLD_MEMCG) += memcg_control.o obj-$(CONFIG_HYPERHOLD_ZSWAPD) += zswapd.o zswapd_control.o +obj-$(CONFIG_MEM_PURGEABLE) += purgeable.o +obj-$(CONFIG_PURGEABLE_ASHMEM) += purgeable_ashmem_trigger.o obj-$(CONFIG_MEMORY_MONITOR) += memory_monitor.o diff --git a/mm/memory.c b/mm/memory.c index 78e05d3e9e4a..8a64230a1fec 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -77,7 +77,9 @@ #include #include #include - +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include @@ -1426,6 +1428,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, unsigned int delay_rmap; page = vm_normal_page(vma, addr, ptent); +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_USEREXPTE) + page = NULL; +#endif if (unlikely(!should_zap_page(details, page))) continue; ptent = ptep_get_and_clear_full(mm, addr, pte, @@ -1438,7 +1444,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, ksm_might_unmap_zero_page(mm, ptent); continue; } - +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + uxpte_clear_present(vma, addr); +#endif delay_rmap = 0; if (!PageAnon(page)) { if (pte_dirty(ptent)) { @@ -3144,6 +3153,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) */ ptep_clear_flush(vma, vmf->address, vmf->pte); folio_add_new_anon_rmap(new_folio, vma, vmf->address); +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) { + pr_info("set wp new folio %lx purgeable\n", folio_pfn(new_folio)); + folio_set_purgeable(new_folio); + uxpte_set_present(vma, vmf->address); + } +#endif folio_add_lru_vma(new_folio, vma); /* * We call the notify macro here because, when using secondary @@ -4103,11 +4119,23 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (pte_alloc(vma->vm_mm, vmf->pmd)) return VM_FAULT_OOM; +#ifdef CONFIG_MEM_PURGEABLE + /* use extra page table for userexpte */ + if (vma->vm_flags & VM_USEREXPTE) { + if (do_uxpte_page_fault(vmf, &entry)) + goto oom; + else + goto got_page; + } +#endif /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); +#ifdef CONFIG_MEM_PURGEABLE +got_page: +#endif vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!vmf->pte) @@ -4172,8 +4200,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) inc_mm_counter(vma->vm_mm, MM_ANONPAGES); folio_add_new_anon_rmap(folio, vma, vmf->address); +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + folio_set_purgeable(folio); +#endif folio_add_lru_vma(folio, vma); setpte: +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + uxpte_set_present(vma, vmf->address); +#endif if (uffd_wp) entry = pte_mkuffd_wp(entry); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); diff --git a/mm/mmap.c b/mm/mmap.c index fb2022196845..406077493c66 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -59,6 +59,11 @@ #include "internal.h" +#ifdef CONFIG_MEM_PURGEABLE +#define MAP_PURGEABLE 0x04 /* purgeable memory */ +#define MAP_USEREXPTE 0x08 /* userspace extension page table */ +#endif + #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) #endif @@ -1353,6 +1358,16 @@ unsigned long do_mmap(struct file *file, unsigned long addr, */ pgoff = addr >> PAGE_SHIFT; break; +#ifdef CONFIG_MEM_PURGEABLE + case MAP_PURGEABLE: + vm_flags |= VM_PURGEABLE; + pr_info("vm_flags purgeable = %lx.\n", VM_PURGEABLE); + break; + case MAP_USEREXPTE: + vm_flags |= VM_USEREXPTE; + pr_info("vm_flags useredpte = %lx.\n", VM_USEREXPTE); + break; +#endif default: return -EINVAL; } diff --git a/mm/purgeable.c b/mm/purgeable.c new file mode 100755 index 000000000000..eb493df6dfe6 --- /dev/null +++ b/mm/purgeable.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include /* find_lock_task_mm */ + +#include + +struct uxpte_t { + atomic64_t val; +}; + +#define UXPTE_SIZE_SHIFT 3 +#define UXPTE_SIZE (1 << UXPTE_SIZE_SHIFT) + +#define UXPTE_PER_PAGE_SHIFT (PAGE_SHIFT - UXPTE_SIZE_SHIFT) +#define UXPTE_PER_PAGE (1 << UXPTE_PER_PAGE_SHIFT) + +#define UXPTE_PRESENT_BIT 1 +#define UXPTE_PRESENT_MASK ((1 << UXPTE_PRESENT_BIT) - 1) +#define UXPTE_REFCNT_ONE (1 << UXPTE_PRESENT_BIT) +#define UXPTE_UNDER_RECLAIM (-UXPTE_REFCNT_ONE) + +#define vpn(vaddr) ((vaddr) >> PAGE_SHIFT) +#define uxpte_pn(vaddr) (vpn(vaddr) >> UXPTE_PER_PAGE_SHIFT) +#define uxpte_off(vaddr) (vpn(vaddr) & (UXPTE_PER_PAGE - 1)) +#define uxpn2addr(uxpn) ((uxpn) << (UXPTE_PER_PAGE_SHIFT + PAGE_SHIFT)) +#define uxpte_refcnt(uxpte) ((uxpte) >> UXPTE_PRESENT_BIT) +#define uxpte_present(uxpte) ((uxpte) & UXPTE_PRESENT_MASK) + +static inline long uxpte_read(struct uxpte_t *uxpte) +{ + return atomic64_read(&uxpte->val); +} + +static inline void uxpte_set(struct uxpte_t *uxpte, long val) +{ + atomic64_set(&uxpte->val, val); +} + +static inline bool uxpte_cas(struct uxpte_t *uxpte, long old, long new) +{ + return atomic64_cmpxchg(&uxpte->val, old, new) == old; +} + +void mm_init_uxpgd(struct mm_struct *mm) +{ + mm->uxpgd = NULL; + spin_lock_init(&mm->uxpgd_lock); +} + +void mm_clear_uxpgd(struct mm_struct *mm) +{ + struct page *page = NULL; + void **slot = NULL; + struct radix_tree_iter iter; + + spin_lock(&mm->uxpgd_lock); + if (!mm->uxpgd) + goto out; + radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { + page = radix_tree_delete(mm->uxpgd, iter.index); + put_page(page); + } +out: + kfree(mm->uxpgd); + mm->uxpgd = NULL; + spin_unlock(&mm->uxpgd_lock); +} + +/* should hold uxpgd_lock before invoke */ +static struct page *lookup_uxpte_page(struct vm_area_struct *vma, + unsigned long addr, bool alloc) +{ + struct radix_tree_root *uxpgd = NULL; + struct page *page = NULL; + struct folio *new_folio = NULL; + struct page *new_page = NULL; + struct mm_struct *mm = vma->vm_mm; + unsigned long uxpn = uxpte_pn(addr); + + if (mm->uxpgd) + goto lookup; + if (!alloc) + goto out; + spin_unlock(&mm->uxpgd_lock); + uxpgd = kzalloc(sizeof(struct radix_tree_root), GFP_KERNEL); + if (!uxpgd) { + pr_err("uxpgd alloc failed.\n"); + spin_lock(&mm->uxpgd_lock); + goto out; + } + INIT_RADIX_TREE(uxpgd, GFP_KERNEL); + spin_lock(&mm->uxpgd_lock); + if (mm->uxpgd) + kfree(uxpgd); + else + mm->uxpgd = uxpgd; +lookup: + page = radix_tree_lookup(mm->uxpgd, uxpn); + if (page) + goto out; + if (!alloc) + goto out; + spin_unlock(&mm->uxpgd_lock); + new_folio = vma_alloc_zeroed_movable_folio(vma, addr); + if (!new_folio) { + pr_err("uxpte page alloc fail.\n"); + spin_lock(&mm->uxpgd_lock); + goto out; + } + new_page = &new_folio->page; + if (radix_tree_preload(GFP_KERNEL)) { + put_page(new_page); + pr_err("radix preload fail.\n"); + spin_lock(&mm->uxpgd_lock); + goto out; + } + spin_lock(&mm->uxpgd_lock); + page = radix_tree_lookup(mm->uxpgd, uxpn); + if (page) { + put_page(new_page); + } else { + page = new_page; + radix_tree_insert(mm->uxpgd, uxpn, page); + } + radix_tree_preload_end(); +out: + return page; +} + +/* should hold uxpgd_lock before invoke */ +static struct uxpte_t *lookup_uxpte(struct vm_area_struct *vma, + unsigned long addr, bool alloc) +{ + struct uxpte_t *uxpte = NULL; + struct page *page = NULL; + + page = lookup_uxpte_page(vma, addr, alloc); + if (!page) + return NULL; + uxpte = page_to_virt(page); + + return uxpte + uxpte_off(addr); +} + +bool lock_uxpte(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + long val = 0; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, true); + if (!uxpte) + goto unlock; +retry: + val = uxpte_read(uxpte); + if (val >> 1) + goto unlock; + if (!uxpte_cas(uxpte, val, UXPTE_UNDER_RECLAIM)) + goto retry; + val = UXPTE_UNDER_RECLAIM; +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); + + return val == UXPTE_UNDER_RECLAIM; +} + +void unlock_uxpte(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, false); + if (!uxpte) + goto unlock; + uxpte_set(uxpte, 0); +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); +} + +bool uxpte_set_present(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + long val = 0; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, true); + if (!uxpte) + goto unlock; +retry: + val = uxpte_read(uxpte); + if (val & 1) + goto unlock; + if (!uxpte_cas(uxpte, val, val + 1)) + goto retry; + val++; +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); + + return val & 1; +} + +void uxpte_clear_present(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + long val = 0; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, false); + if (!uxpte) + goto unlock; +retry: + val = uxpte_read(uxpte); + if (!(val & 1)) + goto unlock; + if (!uxpte_cas(uxpte, val, val - 1)) + goto retry; +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); +} + +vm_fault_t do_uxpte_page_fault(struct vm_fault *vmf, pte_t *entry) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long vma_uxpn = vma->vm_pgoff; + unsigned long off_uxpn = vpn(vmf->address - vma->vm_start); + unsigned long addr = uxpn2addr(vma_uxpn + off_uxpn); + struct page *page = NULL; + + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + + spin_lock(&vma->vm_mm->uxpgd_lock); + page = lookup_uxpte_page(vma, addr, true); + spin_unlock(&vma->vm_mm->uxpgd_lock); + + if (!page) + return VM_FAULT_OOM; + + *entry = mk_pte(page, vma->vm_page_prot); + *entry = pte_sw_mkyoung(*entry); + if (vma->vm_flags & VM_WRITE) + *entry = pte_mkwrite(pte_mkdirty(*entry), vma); + return 0; +} + +static void __mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, + unsigned long *pined_purg_pages) +{ + struct page *page = NULL; + void **slot = NULL; + struct radix_tree_iter iter; + struct uxpte_t *uxpte = NULL; + long pte_entry = 0; + int index = 0; + unsigned long nr_total = 0, nr_pined = 0; + + spin_lock(&mm->uxpgd_lock); + if (!mm->uxpgd) + goto out; + radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { + page = radix_tree_deref_slot(slot); + if (unlikely(!page)) + continue; + uxpte = page_to_virt(page); + for (index = 0; index < UXPTE_PER_PAGE; index++) { + pte_entry = uxpte_read(&(uxpte[index])); + if (uxpte_present(pte_entry) == 0) /* not present */ + continue; + nr_total++; + if (uxpte_refcnt(pte_entry) > 0) /* pined by user */ + nr_pined++; + } + } +out: + spin_unlock(&mm->uxpgd_lock); + + if (total_purg_pages) + *total_purg_pages = nr_total; + + if (pined_purg_pages) + *pined_purg_pages = nr_pined; +} + +void mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, + unsigned long *pined_purg_pages) +{ + if (unlikely(!mm)) + return; + + if (!total_purg_pages && !pined_purg_pages) + return; + + __mm_purg_pages_info(mm, total_purg_pages, pined_purg_pages); +} + +void purg_pages_info(unsigned long *total_purg_pages, unsigned long *pined_purg_pages) +{ + struct task_struct *p = NULL; + struct task_struct *tsk = NULL; + unsigned long mm_nr_purge = 0, mm_nr_pined = 0; + unsigned long nr_total = 0, nr_pined = 0; + + if (!total_purg_pages && !pined_purg_pages) + return; + + if (total_purg_pages) + *total_purg_pages = 0; + + if (pined_purg_pages) + *pined_purg_pages = 0; + + rcu_read_lock(); + for_each_process(p) { + tsk = find_lock_task_mm(p); + if (!tsk) { + /* + * It is a kthread or all of p's threads have already + * detached their mm's. + */ + continue; + } + __mm_purg_pages_info(tsk->mm, &mm_nr_purge, &mm_nr_pined); + nr_total += mm_nr_purge; + nr_pined += mm_nr_pined; + task_unlock(tsk); + + if (mm_nr_purge > 0) { + pr_info("purgemm: tsk: %s %lu pined in %lu pages\n", tsk->comm ?: "NULL", + mm_nr_pined, mm_nr_purge); + } + } + rcu_read_unlock(); + if (total_purg_pages) + *total_purg_pages = nr_total; + + if (pined_purg_pages) + *pined_purg_pages = nr_pined; + pr_info("purgemm: Sum: %lu pined in %lu pages\n", nr_pined, nr_total); +} diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c new file mode 100755 index 000000000000..e0124622b320 --- /dev/null +++ b/mm/purgeable_ashmem_trigger.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2023 Huawei Technologies Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include "../drivers/staging/android/ashmem.h" + +#define PURGEABLE_ASHMEM_SHRINKALL_ARG 0 + +struct purgeable_ashmem_trigger_args { + struct seq_file *seq; + struct task_struct *tsk; +}; + +static int purgeable_ashmem_trigger_cb(const void *data, + struct file *f, unsigned int fd) +{ + const struct purgeable_ashmem_trigger_args *args = data; + struct task_struct *tsk = args->tsk; + struct purgeable_ashmem_metadata pmdata; + + if (!is_ashmem_file(f)) + return 0; + if (!get_purgeable_ashmem_metadata(f, &pmdata)) + return 0; + if (pmdata.is_purgeable) { + pmdata.name = pmdata.name == NULL ? "" : pmdata.name; + seq_printf(args->seq, + "%s,%u,%u,%ld,%s,%zu,%u,%u,%d,%d\n", + tsk->comm, tsk->pid, fd, (long)tsk->signal->oom_score_adj, + pmdata.name, pmdata.size, pmdata.id, pmdata.create_time, + pmdata.refc, pmdata.purged); + } + return 0; +} + +static ssize_t purgeable_ashmem_trigger_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + char *buf; + unsigned int ashmem_id = 0; + unsigned int create_time = 0; + const unsigned int params_num = 2; + const struct cred *cred = current_cred(); + + if (!cred) + return 0; + + if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && + !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + pr_err("no permission to shrink purgeable ashmem!\n"); + return 0; + } + buf = memdup_user_nul(buffer, count); + buf = strstrip(buf); + if (sscanf(buf, "%u %u", &ashmem_id, &create_time) != params_num) + return -EINVAL; + if (ashmem_id == PURGEABLE_ASHMEM_SHRINKALL_ARG && + create_time == PURGEABLE_ASHMEM_SHRINKALL_ARG) + ashmem_shrinkall(); + else + ashmem_shrink_by_id(ashmem_id, create_time); + return count; +} + +static int purgeable_ashmem_trigger_show(struct seq_file *s, void *d) +{ + struct task_struct *tsk = NULL; + struct purgeable_ashmem_trigger_args cb_args; + const struct cred *cred = current_cred(); + + if (!cred) + return -EINVAL; + + if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && + !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + pr_err("no permission to shrink purgeable ashmem!\n"); + return -EINVAL; + } + seq_puts(s, "Process purgeable ashmem detail info:\n"); + seq_puts(s, "----------------------------------------------------\n"); + seq_printf(s, "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", + "process_name", "pid", "adj", "fd", + "ashmem_name", "size", "id", "time", "ref_count", "purged"); + + ashmem_mutex_lock(); + rcu_read_lock(); + for_each_process(tsk) { + if (tsk->flags & PF_KTHREAD) + continue; + cb_args.seq = s; + cb_args.tsk = tsk; + + task_lock(tsk); + iterate_fd(tsk->files, 0, + purgeable_ashmem_trigger_cb, (void *)&cb_args); + task_unlock(tsk); + } + rcu_read_unlock(); + ashmem_mutex_unlock(); + seq_puts(s, "----------------------------------------------------\n"); + return 0; +} + +static int purgeable_ashmem_trigger_open(struct inode *inode, + struct file *file) +{ + return single_open(file, purgeable_ashmem_trigger_show, + inode->i_private); +} + +static const struct proc_ops purgeable_ashmem_trigger_fops = { + .proc_open = purgeable_ashmem_trigger_open, + .proc_write = purgeable_ashmem_trigger_write, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +void init_purgeable_ashmem_trigger(void) +{ + struct proc_dir_entry *entry = NULL; + + entry = proc_create_data("purgeable_ashmem_trigger", 0666, + NULL, &purgeable_ashmem_trigger_fops, NULL); + if (!entry) + pr_err("Failed to create purgeable ashmem trigger\n"); +} diff --git a/mm/rmap.c b/mm/rmap.c index 9f795b93cf40..d61242e91b12 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -75,6 +75,7 @@ #include #include #include +#include #include @@ -811,6 +812,10 @@ static bool folio_referenced_one(struct folio *folio, while (page_vma_mapped_walk(&pvmw)) { address = pvmw.address; +#ifdef CONFIG_MEM_PURGEABLE + if (!(vma->vm_flags & VM_PURGEABLE)) + pra->vm_flags &= ~VM_PURGEABLE; +#endif if ((vma->vm_flags & VM_LOCKED) && (!folio_test_large(folio) || !pvmw.pte)) { /* Restore the mlock which got missed */ @@ -850,6 +855,9 @@ static bool folio_referenced_one(struct folio *folio, if (referenced) { pra->referenced++; pra->vm_flags |= vma->vm_flags & ~VM_LOCKED; +#ifdef CONFIG_MEM_PURGEABLE + pra->vm_flags |= vma->vm_flags & ~VM_PURGEABLE; +#endif } if (!pra->mapcount) @@ -901,6 +909,9 @@ int folio_referenced(struct folio *folio, int is_locked, struct folio_referenced_arg pra = { .mapcount = folio_mapcount(folio), .memcg = memcg, +#ifdef CONFIG_MEM_PURGEABLE + .vm_flags = VM_PURGEABLE, +#endif }; struct rmap_walk_control rwc = { .rmap_one = folio_referenced_one, @@ -1522,6 +1533,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); +#ifdef CONFIG_MEM_PURGEABLE + if ((vma->vm_flags & VM_PURGEABLE) && !lock_uxpte(vma, address)) { + ret = false; + page_vma_mapped_walk_done(&pvmw); + break; + } +#endif /* * If the folio is in an mlock()d vma, we must not swap it out. */ @@ -1639,7 +1657,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); } +#ifdef CONFIG_MEM_PURGEABLE + } else if ((vma->vm_flags & VM_PURGEABLE) || (pte_unused(pteval) && + !userfaultfd_armed(vma))) { +#else } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { +#endif +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + unlock_uxpte(vma, address); +#endif + /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan diff --git a/mm/vmscan.c b/mm/vmscan.c index 3f48a713f020..b21c7ac84281 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1445,6 +1445,7 @@ void folio_putback_lru(struct folio *folio) enum folio_references { FOLIOREF_RECLAIM, FOLIOREF_RECLAIM_CLEAN, + FOLIOREF_RECLAIM_PURGEABLE, FOLIOREF_KEEP, FOLIOREF_ACTIVATE, }; @@ -1466,10 +1467,16 @@ static enum folio_references folio_check_references(struct folio *folio, if (vm_flags & VM_LOCKED) return FOLIOREF_ACTIVATE; + /* rmap lock contention: rotate */ if (referenced_ptes == -1) return FOLIOREF_KEEP; +#ifdef CONFIG_MEM_PURGEABLE + if (vm_flags & VM_PURGEABLE) + return FOLIOREF_RECLAIM_PURGEABLE; +#endif + if (referenced_ptes) { /* * All mapped folios start out with page table @@ -1796,6 +1803,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, goto keep_locked; case FOLIOREF_RECLAIM: case FOLIOREF_RECLAIM_CLEAN: + case FOLIOREF_RECLAIM_PURGEABLE: ; /* try to reclaim the folio below */ } @@ -1816,7 +1824,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, * Lazyfree folio could be freed directly */ if (folio_test_anon(folio) && folio_test_swapbacked(folio)) { - if (!folio_test_swapcache(folio)) { + if (!folio_test_swapcache(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; if (folio_maybe_dma_pinned(folio)) @@ -1898,7 +1906,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, goto activate_locked; mapping = folio_mapping(folio); - if (folio_test_dirty(folio)) { + if (folio_test_dirty(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { /* * Only kswapd can writeback filesystem folios * to avoid risk of stack overflow. But avoid @@ -2013,10 +2021,17 @@ unsigned int shrink_folio_list(struct list_head *folio_list, } } - if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) { + if (folio_test_anon(folio) && (!folio_test_swapbacked(folio) || references == FOLIOREF_RECLAIM_PURGEABLE)) { /* follow __remove_mapping for reference */ if (!folio_ref_freeze(folio, 1)) goto keep_locked; +// missing +/* + if (folio_test_dirty(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { + folio__ref_unfreeze(folio, 1); + goto keep_locked; + } +*/ /* * The folio has only one reference left, which is * from the isolation. After the caller puts the @@ -7942,6 +7957,10 @@ void __meminit kswapd_stop(int nid) pgdat_kswapd_unlock(pgdat); } +#ifdef CONFIG_MEM_PURGEABLE_DEBUG +static void __init purgeable_debugfs_init(void); +#endif + static int __init kswapd_init(void) { int nid; @@ -7949,6 +7968,9 @@ static int __init kswapd_init(void) swap_setup(); for_each_node_state(nid, N_MEMORY) kswapd_run(nid); +#ifdef CONFIG_MEM_PURGEABLE_DEBUG + purgeable_debugfs_init(); +#endif return 0; } @@ -8174,3 +8196,75 @@ void check_move_unevictable_folios(struct folio_batch *fbatch) } } EXPORT_SYMBOL_GPL(check_move_unevictable_folios); + +#ifdef CONFIG_MEM_PURGEABLE_DEBUG +static unsigned long purgeable_node(pg_data_t *pgdata, struct scan_control *sc) +{ + struct mem_cgroup *memcg = NULL; + unsigned long nr = 0; +#ifdef CONFIG_MEMCG + while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))) +#endif + { + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdata); + + shrink_list(LRU_ACTIVE_PURGEABLE, -1, lruvec, sc); + nr += shrink_list(LRU_INACTIVE_PURGEABLE, -1, lruvec, sc); + } + + pr_info("reclaim %lu purgeable pages.\n", nr); + + return nr; +} + +static int purgeable(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct scan_control sc = { + .gfp_mask = GFP_KERNEL, + .order = 0, + .priority = DEF_PRIORITY, + .may_deactivate = DEACTIVATE_ANON, + .may_writepage = 1, + .may_unmap = 1, + .may_swap = 1, + .reclaim_idx = MAX_NR_ZONES - 1, + }; + int nid = 0; + const struct cred *cred = current_cred(); + if (!cred) + return 0; + + if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && + !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + pr_err("no permission to shrink purgeable heap!\n"); + return -EINVAL; + } + for_each_node_state(nid, N_MEMORY) + purgeable_node(NODE_DATA(nid), &sc); + return 0; +} + +static struct ctl_table ker_tab[] = { + { + .procname = "purgeable", + .mode = 0666, + .proc_handler = purgeable, + }, + {}, +}; + +static struct ctl_table_header *purgeable_header; + +static void __init purgeable_debugfs_init(void) +{ + purgeable_header = register_sysctl("kernel", ker_tab); + if (!purgeable_header) + pr_err("register purgeable sysctl table failed.\n"); +} + +static void __exit purgeable_debugfs_exit(void) +{ + unregister_sysctl_table(purgeable_header); +} +#endif /* CONFIG_MEM_PURGEABLE_DEBUG */ diff --git a/mm/vmstat.c b/mm/vmstat.c index dcbd443881f9..1195132d5ea1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1172,6 +1172,10 @@ const char * const vmstat_text[] = { "nr_zone_active_anon", "nr_zone_inactive_file", "nr_zone_active_file", +#ifdef CONFIG_MEM_PURGEABLE + "nr_zone_inactive_purgeable", + "nr_zone_active_purgeable", +#endif "nr_zone_unevictable", "nr_zone_write_pending", "nr_mlock", @@ -1199,6 +1203,10 @@ const char * const vmstat_text[] = { "nr_active_anon", "nr_inactive_file", "nr_active_file", +#ifdef CONFIG_MEM_PURGEABLE + "nr_inactive_purgeable", + "nr_active_purgeable", +#endif "nr_unevictable", "nr_slab_reclaimable", "nr_slab_unreclaimable", -- Gitee From 1922a636cd765f99b24dc9d627b5b98a0e86144f Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Thu, 17 Oct 2024 07:03:09 +0000 Subject: [PATCH 02/15] update mm/vmscan.c. Signed-off-by: ma_yulong --- mm/vmscan.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index b21c7ac84281..0203fd116907 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2025,13 +2025,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, /* follow __remove_mapping for reference */ if (!folio_ref_freeze(folio, 1)) goto keep_locked; -// missing -/* - if (folio_test_dirty(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { - folio__ref_unfreeze(folio, 1); - goto keep_locked; - } -*/ + /* * The folio has only one reference left, which is * from the isolation. After the caller puts the -- Gitee From e32299978c58732e55aca65a2b96fe0be18af022 Mon Sep 17 00:00:00 2001 From: "stone.shi" Date: Thu, 17 Oct 2024 09:07:04 +0000 Subject: [PATCH 03/15] update mm/purgeable_ashmem_trigger.c. Signed-off-by: stone.shi --- mm/purgeable_ashmem_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c index e0124622b320..264d978d6d10 100755 --- a/mm/purgeable_ashmem_trigger.c +++ b/mm/purgeable_ashmem_trigger.c @@ -127,7 +127,7 @@ void init_purgeable_ashmem_trigger(void) { struct proc_dir_entry *entry = NULL; - entry = proc_create_data("purgeable_ashmem_trigger", 0666, + entry = proc_create_data("purgeable_ashmem_trigger", 0660, NULL, &purgeable_ashmem_trigger_fops, NULL); if (!entry) pr_err("Failed to create purgeable ashmem trigger\n"); -- Gitee From 75aa48e97017e572306b8d977a54e19eb6c745ab Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Thu, 17 Oct 2024 09:10:53 +0000 Subject: [PATCH 04/15] update mm/mmap.c. Signed-off-by: ma_yulong --- mm/mmap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index 406077493c66..fdd2291a987d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1361,11 +1361,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, #ifdef CONFIG_MEM_PURGEABLE case MAP_PURGEABLE: vm_flags |= VM_PURGEABLE; - pr_info("vm_flags purgeable = %lx.\n", VM_PURGEABLE); break; case MAP_USEREXPTE: vm_flags |= VM_USEREXPTE; - pr_info("vm_flags useredpte = %lx.\n", VM_USEREXPTE); break; #endif default: -- Gitee From 1362dd91f06049ff4778eab29d6c5bd71b63db23 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Thu, 17 Oct 2024 09:11:54 +0000 Subject: [PATCH 05/15] update mm/purgeable.c. Signed-off-by: ma_yulong --- mm/purgeable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable.c b/mm/purgeable.c index eb493df6dfe6..54bee931cb1b 100755 --- a/mm/purgeable.c +++ b/mm/purgeable.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2022 Huawei Device Co., Ltd. + * Copyright (c) 2024 Huawei Device Co., Ltd. */ #include -- Gitee From b6b8f5f584f9afd63f17ccc8f658d004b6b798c8 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Thu, 17 Oct 2024 09:12:32 +0000 Subject: [PATCH 06/15] update mm/purgeable_ashmem_trigger.c. Signed-off-by: ma_yulong --- mm/purgeable_ashmem_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c index 264d978d6d10..73759333d645 100755 --- a/mm/purgeable_ashmem_trigger.c +++ b/mm/purgeable_ashmem_trigger.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2023 Huawei Technologies Co., Ltd. + * Copyright (c) 2024 Huawei Technologies Co., Ltd. */ #include -- Gitee From a836bf00ee99cceaa3e6ab358050f4b85c7c9479 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 01:56:32 +0000 Subject: [PATCH 07/15] Revert "update mm/purgeable_ashmem_trigger.c." This reverts commit b6b8f5f584f9afd63f17ccc8f658d004b6b798c8. --- mm/purgeable_ashmem_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c index 73759333d645..264d978d6d10 100755 --- a/mm/purgeable_ashmem_trigger.c +++ b/mm/purgeable_ashmem_trigger.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * Copyright (c) 2023 Huawei Technologies Co., Ltd. */ #include -- Gitee From 92fb74a02e440dd992cf28dcf6fe4e95721e94da Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 01:56:50 +0000 Subject: [PATCH 08/15] Revert "Revert "update mm/purgeable_ashmem_trigger.c."" This reverts commit a836bf00ee99cceaa3e6ab358050f4b85c7c9479. --- mm/purgeable_ashmem_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c index 264d978d6d10..73759333d645 100755 --- a/mm/purgeable_ashmem_trigger.c +++ b/mm/purgeable_ashmem_trigger.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2023 Huawei Technologies Co., Ltd. + * Copyright (c) 2024 Huawei Technologies Co., Ltd. */ #include -- Gitee From 6865b57c7479f46747ddf18664fd5aaee4a46e46 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 01:57:04 +0000 Subject: [PATCH 09/15] Revert "Revert "Revert "update mm/purgeable_ashmem_trigger.c.""" This reverts commit 92fb74a02e440dd992cf28dcf6fe4e95721e94da. --- mm/purgeable_ashmem_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c index 73759333d645..264d978d6d10 100755 --- a/mm/purgeable_ashmem_trigger.c +++ b/mm/purgeable_ashmem_trigger.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2024 Huawei Technologies Co., Ltd. + * Copyright (c) 2023 Huawei Technologies Co., Ltd. */ #include -- Gitee From e6477d8e50ab85dc455ff90e20052aa3ef46303b Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 02:00:02 +0000 Subject: [PATCH 10/15] Revert "update mm/purgeable.c." This reverts commit 1362dd91f06049ff4778eab29d6c5bd71b63db23. --- mm/purgeable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable.c b/mm/purgeable.c index 54bee931cb1b..eb493df6dfe6 100755 --- a/mm/purgeable.c +++ b/mm/purgeable.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2024 Huawei Device Co., Ltd. + * Copyright (c) 2022 Huawei Device Co., Ltd. */ #include -- Gitee From d0eaa022e1ffb34c3abe02ea00a62c097b37af51 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 02:00:42 +0000 Subject: [PATCH 11/15] Revert "update mm/mmap.c." This reverts commit 75aa48e97017e572306b8d977a54e19eb6c745ab. --- mm/mmap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index fdd2291a987d..406077493c66 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1361,9 +1361,11 @@ unsigned long do_mmap(struct file *file, unsigned long addr, #ifdef CONFIG_MEM_PURGEABLE case MAP_PURGEABLE: vm_flags |= VM_PURGEABLE; + pr_info("vm_flags purgeable = %lx.\n", VM_PURGEABLE); break; case MAP_USEREXPTE: vm_flags |= VM_USEREXPTE; + pr_info("vm_flags useredpte = %lx.\n", VM_USEREXPTE); break; #endif default: -- Gitee From 2bcb9b2b02a74a85e214a72ffe05b305fa4ee933 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 02:01:07 +0000 Subject: [PATCH 12/15] Revert "update mm/purgeable_ashmem_trigger.c." This reverts commit e32299978c58732e55aca65a2b96fe0be18af022. --- mm/purgeable_ashmem_trigger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c index 264d978d6d10..e0124622b320 100755 --- a/mm/purgeable_ashmem_trigger.c +++ b/mm/purgeable_ashmem_trigger.c @@ -127,7 +127,7 @@ void init_purgeable_ashmem_trigger(void) { struct proc_dir_entry *entry = NULL; - entry = proc_create_data("purgeable_ashmem_trigger", 0660, + entry = proc_create_data("purgeable_ashmem_trigger", 0666, NULL, &purgeable_ashmem_trigger_fops, NULL); if (!entry) pr_err("Failed to create purgeable ashmem trigger\n"); -- Gitee From 912b417e38614921ad6a7017ad5e4a5493f3a90d Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 02:01:41 +0000 Subject: [PATCH 13/15] Revert "update mm/vmscan.c." This reverts commit 1922a636cd765f99b24dc9d627b5b98a0e86144f. --- mm/vmscan.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 0203fd116907..b21c7ac84281 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2025,7 +2025,13 @@ unsigned int shrink_folio_list(struct list_head *folio_list, /* follow __remove_mapping for reference */ if (!folio_ref_freeze(folio, 1)) goto keep_locked; - +// missing +/* + if (folio_test_dirty(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { + folio__ref_unfreeze(folio, 1); + goto keep_locked; + } +*/ /* * The folio has only one reference left, which is * from the isolation. After the caller puts the -- Gitee From 08f890c850c0a7764468772a9b2a14f7da749d6d Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 02:02:17 +0000 Subject: [PATCH 14/15] Revert "purgeable memory kernel support" This reverts commit 5742c4bea7a51b1ba1a3dc457738cfcf8dd5a2e0. --- fs/proc/meminfo.c | 31 +-- fs/proc/task_mmu.c | 12 -- include/linux/mm.h | 18 -- include/linux/mm_inline.h | 4 - include/linux/mm_types.h | 4 - include/linux/mmzone.h | 21 +- include/linux/page-flags.h | 9 - include/trace/events/mmflags.h | 11 +- kernel/fork.c | 10 +- mm/Kconfig | 23 --- mm/Makefile | 2 - mm/memory.c | 40 +--- mm/mmap.c | 15 -- mm/purgeable.c | 348 --------------------------------- mm/purgeable_ashmem_trigger.c | 134 ------------- mm/rmap.c | 28 --- mm/vmscan.c | 100 +--------- mm/vmstat.c | 8 - 18 files changed, 11 insertions(+), 807 deletions(-) delete mode 100755 mm/purgeable.c delete mode 100755 mm/purgeable_ashmem_trigger.c diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 875306ca2b39..45af9a989d40 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -17,9 +17,6 @@ #ifdef CONFIG_CMA #include #endif -#ifdef CONFIG_MEM_PURGEABLE -#include -#endif #include #include #include "internal.h" @@ -43,11 +40,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) unsigned long pages[NR_LRU_LISTS]; unsigned long sreclaimable, sunreclaim; int lru; - unsigned long nr_purg_active = 0; - unsigned long nr_purg_inactive = 0; -#ifdef CONFIG_MEM_PURGEABLE - unsigned long nr_purg_pined = 0; -#endif si_meminfo(&i); si_swapinfo(&i); @@ -61,13 +53,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v) for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) pages[lru] = global_node_page_state(NR_LRU_BASE + lru); -#ifdef CONFIG_MEM_PURGEABLE - nr_purg_active = pages[LRU_ACTIVE_PURGEABLE]; - nr_purg_inactive = pages[LRU_INACTIVE_PURGEABLE]; - purg_pages_info(NULL, &nr_purg_pined); - nr_purg_pined = min(nr_purg_pined, nr_purg_active + nr_purg_inactive); -#endif - available = si_mem_available(); sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B); sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B); @@ -79,25 +64,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Cached: ", cached); show_val_kb(m, "SwapCached: ", total_swapcache_pages()); show_val_kb(m, "Active: ", pages[LRU_ACTIVE_ANON] + -#ifdef CONFIG_MEM_PURGEABLE - pages[LRU_ACTIVE_FILE] + - nr_purg_active); -#else - pages[LRU_ACTIVE_FILE]); -#endif - + pages[LRU_ACTIVE_FILE]); show_val_kb(m, "Inactive: ", pages[LRU_INACTIVE_ANON] + - pages[LRU_INACTIVE_FILE] + - nr_purg_inactive); + pages[LRU_INACTIVE_FILE]); show_val_kb(m, "Active(anon): ", pages[LRU_ACTIVE_ANON]); show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]); show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]); show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]); -#ifdef CONFIG_MEM_PURGEABLE - show_val_kb(m, "Active(purg): ", nr_purg_active); - show_val_kb(m, "Inactive(purg): ", nr_purg_inactive); - show_val_kb(m, "Pined(purg): ", nr_purg_pined); -#endif show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]); show_val_kb(m, "Mlocked: ", global_zone_page_state(NR_MLOCK)); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 415a5bef7424..bd8285811728 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -20,9 +20,6 @@ #include #include #include -#ifdef CONFIG_MEM_PURGEABLE -#include -#endif #include #include @@ -36,11 +33,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; -#ifdef CONFIG_MEM_PURGEABLE - unsigned long nr_purg_sum = 0, nr_purg_pin = 0; - - mm_purg_pages_info(mm, &nr_purg_sum, &nr_purg_pin); -#endif anon = get_mm_counter(mm, MM_ANONPAGES); file = get_mm_counter(mm, MM_FILEPAGES); @@ -84,10 +76,6 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) seq_put_decimal_ull_width(m, " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); -#ifdef CONFIG_MEM_PURGEABLE - SEQ_PUT_DEC(" kB\nPurgSum:\t", nr_purg_sum); - SEQ_PUT_DEC(" kB\nPurgPin:\t", nr_purg_pin); -#endif seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 83f791100b84..7179e3f6a030 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -320,34 +320,16 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_7 39 /* bit only usable on 64-bit architectures */ -#ifdef CONFIG_MEM_PURGEABLE -#define VM_HIGH_ARCH_BIT_8 40 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_9 41 /* bit only usable on 64-bit architectures */ -#endif /* CONFIG_MEM_PURGEABLE */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) -#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) #define VM_HIGH_ARCH_7 BIT(VM_HIGH_ARCH_BIT_7) -#ifdef CONFIG_MEM_PURGEABLE -#define VM_HIGH_ARCH_8 BIT(VM_HIGH_ARCH_BIT_8) -#define VM_HIGH_ARCH_9 BIT(VM_HIGH_ARCH_BIT_9) -#endif /* CONFIG_MEM_PURGEABLE */ #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ -#ifdef CONFIG_MEM_PURGEABLE -#define VM_PURGEABLE VM_HIGH_ARCH_8 -#define VM_USEREXPTE VM_HIGH_ARCH_9 -#else /* CONFIG_MEM_PURGEABLE */ -#define VM_PURGEABLE 0 -#define VM_USEREXPTE 0 -#endif /* CONFIG_MEM_PURGEABLE */ - #ifdef CONFIG_SECURITY_XPM #define VM_XPM VM_HIGH_ARCH_7 #else /* CONFIG_SECURITY_XPM */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 027591c9decb..96b1c157554c 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -93,10 +93,6 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio) return LRU_UNEVICTABLE; lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; -#ifdef CONFIG_MEM_PURGEABLE - if (folio_test_purgeable(folio)) - lru = LRU_INACTIVE_PURGEABLE; -#endif if (folio_test_active(folio)) lru += LRU_ACTIVE; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7264a43f8d18..db7003d2886f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -703,10 +703,6 @@ struct mm_struct { #endif unsigned long task_size; /* size of task vm space */ pgd_t * pgd; -#ifdef CONFIG_MEM_PURGEABLE - void *uxpgd; - spinlock_t uxpgd_lock; -#endif #ifdef CONFIG_MEMBARRIER /** diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 84da48194dbc..9cf03644fe90 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -146,10 +146,6 @@ enum zone_stat_item { NR_ZONE_ACTIVE_ANON, NR_ZONE_INACTIVE_FILE, NR_ZONE_ACTIVE_FILE, -#ifdef CONFIG_MEM_PURGEABLE - NR_ZONE_INACTIVE_PURGEABLE, - NR_ZONE_ACTIVE_PURGEABLE, -#endif NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ @@ -170,10 +166,6 @@ enum node_stat_item { NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ -#ifdef CONFIG_MEM_PURGEABLE - NR_INACTIVE_PURGEABLE, - NR_ACTIVE_PURGEABLE, -#endif NR_UNEVICTABLE, /* " " " " " */ NR_SLAB_RECLAIMABLE_B, NR_SLAB_UNRECLAIMABLE_B, @@ -276,19 +268,12 @@ static __always_inline bool vmstat_item_in_bytes(int idx) #define LRU_BASE 0 #define LRU_ACTIVE 1 #define LRU_FILE 2 -#ifdef CONFIG_MEM_PURGEABLE -#define LRU_PURGEABLE 4 -#endif enum lru_list { LRU_INACTIVE_ANON = LRU_BASE, LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, -#ifdef CONFIG_MEM_PURGEABLE - LRU_INACTIVE_PURGEABLE = LRU_BASE + LRU_PURGEABLE, - LRU_ACTIVE_PURGEABLE = LRU_BASE + LRU_PURGEABLE + LRU_ACTIVE, -#endif LRU_UNEVICTABLE, NR_LRU_LISTS }; @@ -303,7 +288,7 @@ enum vmscan_throttle_state { #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) -#define for_each_evictable_lru(lru) for (lru = 0; lru < LRU_UNEVICTABLE; lru++) +#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) static inline bool is_file_lru(enum lru_list lru) { @@ -312,10 +297,6 @@ static inline bool is_file_lru(enum lru_list lru) static inline bool is_active_lru(enum lru_list lru) { -#ifdef CONFIG_MEM_PURGEABLE - if (lru == LRU_ACTIVE_PURGEABLE) - return true; -#endif return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index af9b7524c692..4d2e0c913baf 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -136,9 +136,6 @@ enum pageflags { PG_arch_2, PG_arch_3, #endif -#ifdef CONFIG_MEM_PURGEABLE - PG_purgeable, -#endif #ifdef CONFIG_SECURITY_XPM PG_xpm_readonly, PG_xpm_writetainted, @@ -618,12 +615,6 @@ PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #endif -#ifdef CONFIG_MEM_PURGEABLE -PAGEFLAG(Purgeable, purgeable, PF_ANY) -#else -PAGEFLAG_FALSE(Purgeable) -#endif - /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 5fee97d06e6e..0a224af0e59a 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -65,16 +65,10 @@ __def_gfpflag_names __def_gfpflag_names_kasan \ ) : "none" -#ifdef CONFIG_MEM_PURGEABLE -#define IF_HAVE_PG_PURGEABLE(_name) ,{1UL << PG_##_name, __stringify(_name)} -#else -#define IF_HAVE_PG_PURGEABLE(_name) -#endif - #ifdef CONFIG_SECURITY_XPM -#define IF_HAVE_PG_XPM_INTEGRITY(_name) ,{1UL << PG_##_name, __stringify(_name)} +#define IF_HAVE_PG_XPM_INTEGRITY(flag,string) ,{1UL << flag, string} #else -#define IF_HAVE_PG_XPM_INTEGRITY(_name) +#define IF_HAVE_PG_XPM_INTEGRITY(flag,string) #endif #ifdef CONFIG_MMU @@ -131,7 +125,6 @@ DEF_PAGEFLAG_NAME(reclaim), \ DEF_PAGEFLAG_NAME(swapbacked), \ DEF_PAGEFLAG_NAME(unevictable) \ -IF_HAVE_PG_PURGEABLE(purgeable) \ IF_HAVE_PG_MLOCK(mlocked) \ IF_HAVE_PG_UNCACHED(uncached) \ IF_HAVE_PG_HWPOISON(hwpoison) \ diff --git a/kernel/fork.c b/kernel/fork.c index 515267609be9..92611a26a392 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -99,9 +99,7 @@ #include #include #include -#ifdef CONFIG_MEM_PURGEABLE -#include -#endif + #include #include #include @@ -795,9 +793,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, static inline int mm_alloc_pgd(struct mm_struct *mm) { -#ifdef CONFIG_MEM_PURGEABLE - mm_init_uxpgd(mm); -#endif mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) return -ENOMEM; @@ -807,9 +802,6 @@ static inline int mm_alloc_pgd(struct mm_struct *mm) static inline void mm_free_pgd(struct mm_struct *mm) { pgd_free(mm, mm->pgd); -#ifdef CONFIG_MEM_PURGEABLE - mm_clear_uxpgd(mm); -#endif } #else static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) diff --git a/mm/Kconfig b/mm/Kconfig index ee4c2cf539d7..544d113729eb 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1303,29 +1303,6 @@ config LOCK_MM_AND_FIND_VMA bool depends on !STACK_GROWSUP - -config MEM_PURGEABLE - bool "Purgeable memory feature" - default n - depends on 64BIT - select ARCH_USES_HIGH_VMA_FLAGS - help - Support purgeable pages for process - -config MEM_PURGEABLE_DEBUG - bool "Purgeable memory debug" - default n - depends on MEM_PURGEABLE - help - Debug info for purgeable memory - -config PURGEABLE_ASHMEM - bool "Purgeable memory feature for ashmem" - default n - depends on MEM_PURGEABLE - help - Support purgeable ashmem for process - source "mm/damon/Kconfig" endmenu diff --git a/mm/Makefile b/mm/Makefile index f84d4b0f521d..f9fb7e07cdd8 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,6 +141,4 @@ obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o obj-$(CONFIG_HYPERHOLD_FILE_LRU) += memcg_reclaim.o obj-$(CONFIG_HYPERHOLD_MEMCG) += memcg_control.o obj-$(CONFIG_HYPERHOLD_ZSWAPD) += zswapd.o zswapd_control.o -obj-$(CONFIG_MEM_PURGEABLE) += purgeable.o -obj-$(CONFIG_PURGEABLE_ASHMEM) += purgeable_ashmem_trigger.o obj-$(CONFIG_MEMORY_MONITOR) += memory_monitor.o diff --git a/mm/memory.c b/mm/memory.c index 8a64230a1fec..78e05d3e9e4a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -77,9 +77,7 @@ #include #include #include -#ifdef CONFIG_MEM_PURGEABLE -#include -#endif + #include #include @@ -1428,10 +1426,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, unsigned int delay_rmap; page = vm_normal_page(vma, addr, ptent); -#ifdef CONFIG_MEM_PURGEABLE - if (vma->vm_flags & VM_USEREXPTE) - page = NULL; -#endif if (unlikely(!should_zap_page(details, page))) continue; ptent = ptep_get_and_clear_full(mm, addr, pte, @@ -1444,10 +1438,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, ksm_might_unmap_zero_page(mm, ptent); continue; } -#ifdef CONFIG_MEM_PURGEABLE - if (vma->vm_flags & VM_PURGEABLE) - uxpte_clear_present(vma, addr); -#endif + delay_rmap = 0; if (!PageAnon(page)) { if (pte_dirty(ptent)) { @@ -3153,13 +3144,6 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) */ ptep_clear_flush(vma, vmf->address, vmf->pte); folio_add_new_anon_rmap(new_folio, vma, vmf->address); -#ifdef CONFIG_MEM_PURGEABLE - if (vma->vm_flags & VM_PURGEABLE) { - pr_info("set wp new folio %lx purgeable\n", folio_pfn(new_folio)); - folio_set_purgeable(new_folio); - uxpte_set_present(vma, vmf->address); - } -#endif folio_add_lru_vma(new_folio, vma); /* * We call the notify macro here because, when using secondary @@ -4119,23 +4103,11 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (pte_alloc(vma->vm_mm, vmf->pmd)) return VM_FAULT_OOM; -#ifdef CONFIG_MEM_PURGEABLE - /* use extra page table for userexpte */ - if (vma->vm_flags & VM_USEREXPTE) { - if (do_uxpte_page_fault(vmf, &entry)) - goto oom; - else - goto got_page; - } -#endif /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); -#ifdef CONFIG_MEM_PURGEABLE -got_page: -#endif vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!vmf->pte) @@ -4200,16 +4172,8 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) inc_mm_counter(vma->vm_mm, MM_ANONPAGES); folio_add_new_anon_rmap(folio, vma, vmf->address); -#ifdef CONFIG_MEM_PURGEABLE - if (vma->vm_flags & VM_PURGEABLE) - folio_set_purgeable(folio); -#endif folio_add_lru_vma(folio, vma); setpte: -#ifdef CONFIG_MEM_PURGEABLE - if (vma->vm_flags & VM_PURGEABLE) - uxpte_set_present(vma, vmf->address); -#endif if (uffd_wp) entry = pte_mkuffd_wp(entry); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); diff --git a/mm/mmap.c b/mm/mmap.c index 406077493c66..fb2022196845 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -59,11 +59,6 @@ #include "internal.h" -#ifdef CONFIG_MEM_PURGEABLE -#define MAP_PURGEABLE 0x04 /* purgeable memory */ -#define MAP_USEREXPTE 0x08 /* userspace extension page table */ -#endif - #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) #endif @@ -1358,16 +1353,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr, */ pgoff = addr >> PAGE_SHIFT; break; -#ifdef CONFIG_MEM_PURGEABLE - case MAP_PURGEABLE: - vm_flags |= VM_PURGEABLE; - pr_info("vm_flags purgeable = %lx.\n", VM_PURGEABLE); - break; - case MAP_USEREXPTE: - vm_flags |= VM_USEREXPTE; - pr_info("vm_flags useredpte = %lx.\n", VM_USEREXPTE); - break; -#endif default: return -EINVAL; } diff --git a/mm/purgeable.c b/mm/purgeable.c deleted file mode 100755 index eb493df6dfe6..000000000000 --- a/mm/purgeable.c +++ /dev/null @@ -1,348 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2022 Huawei Device Co., Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include /* find_lock_task_mm */ - -#include - -struct uxpte_t { - atomic64_t val; -}; - -#define UXPTE_SIZE_SHIFT 3 -#define UXPTE_SIZE (1 << UXPTE_SIZE_SHIFT) - -#define UXPTE_PER_PAGE_SHIFT (PAGE_SHIFT - UXPTE_SIZE_SHIFT) -#define UXPTE_PER_PAGE (1 << UXPTE_PER_PAGE_SHIFT) - -#define UXPTE_PRESENT_BIT 1 -#define UXPTE_PRESENT_MASK ((1 << UXPTE_PRESENT_BIT) - 1) -#define UXPTE_REFCNT_ONE (1 << UXPTE_PRESENT_BIT) -#define UXPTE_UNDER_RECLAIM (-UXPTE_REFCNT_ONE) - -#define vpn(vaddr) ((vaddr) >> PAGE_SHIFT) -#define uxpte_pn(vaddr) (vpn(vaddr) >> UXPTE_PER_PAGE_SHIFT) -#define uxpte_off(vaddr) (vpn(vaddr) & (UXPTE_PER_PAGE - 1)) -#define uxpn2addr(uxpn) ((uxpn) << (UXPTE_PER_PAGE_SHIFT + PAGE_SHIFT)) -#define uxpte_refcnt(uxpte) ((uxpte) >> UXPTE_PRESENT_BIT) -#define uxpte_present(uxpte) ((uxpte) & UXPTE_PRESENT_MASK) - -static inline long uxpte_read(struct uxpte_t *uxpte) -{ - return atomic64_read(&uxpte->val); -} - -static inline void uxpte_set(struct uxpte_t *uxpte, long val) -{ - atomic64_set(&uxpte->val, val); -} - -static inline bool uxpte_cas(struct uxpte_t *uxpte, long old, long new) -{ - return atomic64_cmpxchg(&uxpte->val, old, new) == old; -} - -void mm_init_uxpgd(struct mm_struct *mm) -{ - mm->uxpgd = NULL; - spin_lock_init(&mm->uxpgd_lock); -} - -void mm_clear_uxpgd(struct mm_struct *mm) -{ - struct page *page = NULL; - void **slot = NULL; - struct radix_tree_iter iter; - - spin_lock(&mm->uxpgd_lock); - if (!mm->uxpgd) - goto out; - radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { - page = radix_tree_delete(mm->uxpgd, iter.index); - put_page(page); - } -out: - kfree(mm->uxpgd); - mm->uxpgd = NULL; - spin_unlock(&mm->uxpgd_lock); -} - -/* should hold uxpgd_lock before invoke */ -static struct page *lookup_uxpte_page(struct vm_area_struct *vma, - unsigned long addr, bool alloc) -{ - struct radix_tree_root *uxpgd = NULL; - struct page *page = NULL; - struct folio *new_folio = NULL; - struct page *new_page = NULL; - struct mm_struct *mm = vma->vm_mm; - unsigned long uxpn = uxpte_pn(addr); - - if (mm->uxpgd) - goto lookup; - if (!alloc) - goto out; - spin_unlock(&mm->uxpgd_lock); - uxpgd = kzalloc(sizeof(struct radix_tree_root), GFP_KERNEL); - if (!uxpgd) { - pr_err("uxpgd alloc failed.\n"); - spin_lock(&mm->uxpgd_lock); - goto out; - } - INIT_RADIX_TREE(uxpgd, GFP_KERNEL); - spin_lock(&mm->uxpgd_lock); - if (mm->uxpgd) - kfree(uxpgd); - else - mm->uxpgd = uxpgd; -lookup: - page = radix_tree_lookup(mm->uxpgd, uxpn); - if (page) - goto out; - if (!alloc) - goto out; - spin_unlock(&mm->uxpgd_lock); - new_folio = vma_alloc_zeroed_movable_folio(vma, addr); - if (!new_folio) { - pr_err("uxpte page alloc fail.\n"); - spin_lock(&mm->uxpgd_lock); - goto out; - } - new_page = &new_folio->page; - if (radix_tree_preload(GFP_KERNEL)) { - put_page(new_page); - pr_err("radix preload fail.\n"); - spin_lock(&mm->uxpgd_lock); - goto out; - } - spin_lock(&mm->uxpgd_lock); - page = radix_tree_lookup(mm->uxpgd, uxpn); - if (page) { - put_page(new_page); - } else { - page = new_page; - radix_tree_insert(mm->uxpgd, uxpn, page); - } - radix_tree_preload_end(); -out: - return page; -} - -/* should hold uxpgd_lock before invoke */ -static struct uxpte_t *lookup_uxpte(struct vm_area_struct *vma, - unsigned long addr, bool alloc) -{ - struct uxpte_t *uxpte = NULL; - struct page *page = NULL; - - page = lookup_uxpte_page(vma, addr, alloc); - if (!page) - return NULL; - uxpte = page_to_virt(page); - - return uxpte + uxpte_off(addr); -} - -bool lock_uxpte(struct vm_area_struct *vma, unsigned long addr) -{ - struct uxpte_t *uxpte = NULL; - long val = 0; - - spin_lock(&vma->vm_mm->uxpgd_lock); - uxpte = lookup_uxpte(vma, addr, true); - if (!uxpte) - goto unlock; -retry: - val = uxpte_read(uxpte); - if (val >> 1) - goto unlock; - if (!uxpte_cas(uxpte, val, UXPTE_UNDER_RECLAIM)) - goto retry; - val = UXPTE_UNDER_RECLAIM; -unlock: - spin_unlock(&vma->vm_mm->uxpgd_lock); - - return val == UXPTE_UNDER_RECLAIM; -} - -void unlock_uxpte(struct vm_area_struct *vma, unsigned long addr) -{ - struct uxpte_t *uxpte = NULL; - - spin_lock(&vma->vm_mm->uxpgd_lock); - uxpte = lookup_uxpte(vma, addr, false); - if (!uxpte) - goto unlock; - uxpte_set(uxpte, 0); -unlock: - spin_unlock(&vma->vm_mm->uxpgd_lock); -} - -bool uxpte_set_present(struct vm_area_struct *vma, unsigned long addr) -{ - struct uxpte_t *uxpte = NULL; - long val = 0; - - spin_lock(&vma->vm_mm->uxpgd_lock); - uxpte = lookup_uxpte(vma, addr, true); - if (!uxpte) - goto unlock; -retry: - val = uxpte_read(uxpte); - if (val & 1) - goto unlock; - if (!uxpte_cas(uxpte, val, val + 1)) - goto retry; - val++; -unlock: - spin_unlock(&vma->vm_mm->uxpgd_lock); - - return val & 1; -} - -void uxpte_clear_present(struct vm_area_struct *vma, unsigned long addr) -{ - struct uxpte_t *uxpte = NULL; - long val = 0; - - spin_lock(&vma->vm_mm->uxpgd_lock); - uxpte = lookup_uxpte(vma, addr, false); - if (!uxpte) - goto unlock; -retry: - val = uxpte_read(uxpte); - if (!(val & 1)) - goto unlock; - if (!uxpte_cas(uxpte, val, val - 1)) - goto retry; -unlock: - spin_unlock(&vma->vm_mm->uxpgd_lock); -} - -vm_fault_t do_uxpte_page_fault(struct vm_fault *vmf, pte_t *entry) -{ - struct vm_area_struct *vma = vmf->vma; - unsigned long vma_uxpn = vma->vm_pgoff; - unsigned long off_uxpn = vpn(vmf->address - vma->vm_start); - unsigned long addr = uxpn2addr(vma_uxpn + off_uxpn); - struct page *page = NULL; - - if (unlikely(anon_vma_prepare(vma))) - return VM_FAULT_OOM; - - spin_lock(&vma->vm_mm->uxpgd_lock); - page = lookup_uxpte_page(vma, addr, true); - spin_unlock(&vma->vm_mm->uxpgd_lock); - - if (!page) - return VM_FAULT_OOM; - - *entry = mk_pte(page, vma->vm_page_prot); - *entry = pte_sw_mkyoung(*entry); - if (vma->vm_flags & VM_WRITE) - *entry = pte_mkwrite(pte_mkdirty(*entry), vma); - return 0; -} - -static void __mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, - unsigned long *pined_purg_pages) -{ - struct page *page = NULL; - void **slot = NULL; - struct radix_tree_iter iter; - struct uxpte_t *uxpte = NULL; - long pte_entry = 0; - int index = 0; - unsigned long nr_total = 0, nr_pined = 0; - - spin_lock(&mm->uxpgd_lock); - if (!mm->uxpgd) - goto out; - radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { - page = radix_tree_deref_slot(slot); - if (unlikely(!page)) - continue; - uxpte = page_to_virt(page); - for (index = 0; index < UXPTE_PER_PAGE; index++) { - pte_entry = uxpte_read(&(uxpte[index])); - if (uxpte_present(pte_entry) == 0) /* not present */ - continue; - nr_total++; - if (uxpte_refcnt(pte_entry) > 0) /* pined by user */ - nr_pined++; - } - } -out: - spin_unlock(&mm->uxpgd_lock); - - if (total_purg_pages) - *total_purg_pages = nr_total; - - if (pined_purg_pages) - *pined_purg_pages = nr_pined; -} - -void mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, - unsigned long *pined_purg_pages) -{ - if (unlikely(!mm)) - return; - - if (!total_purg_pages && !pined_purg_pages) - return; - - __mm_purg_pages_info(mm, total_purg_pages, pined_purg_pages); -} - -void purg_pages_info(unsigned long *total_purg_pages, unsigned long *pined_purg_pages) -{ - struct task_struct *p = NULL; - struct task_struct *tsk = NULL; - unsigned long mm_nr_purge = 0, mm_nr_pined = 0; - unsigned long nr_total = 0, nr_pined = 0; - - if (!total_purg_pages && !pined_purg_pages) - return; - - if (total_purg_pages) - *total_purg_pages = 0; - - if (pined_purg_pages) - *pined_purg_pages = 0; - - rcu_read_lock(); - for_each_process(p) { - tsk = find_lock_task_mm(p); - if (!tsk) { - /* - * It is a kthread or all of p's threads have already - * detached their mm's. - */ - continue; - } - __mm_purg_pages_info(tsk->mm, &mm_nr_purge, &mm_nr_pined); - nr_total += mm_nr_purge; - nr_pined += mm_nr_pined; - task_unlock(tsk); - - if (mm_nr_purge > 0) { - pr_info("purgemm: tsk: %s %lu pined in %lu pages\n", tsk->comm ?: "NULL", - mm_nr_pined, mm_nr_purge); - } - } - rcu_read_unlock(); - if (total_purg_pages) - *total_purg_pages = nr_total; - - if (pined_purg_pages) - *pined_purg_pages = nr_pined; - pr_info("purgemm: Sum: %lu pined in %lu pages\n", nr_pined, nr_total); -} diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c deleted file mode 100755 index e0124622b320..000000000000 --- a/mm/purgeable_ashmem_trigger.c +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2023 Huawei Technologies Co., Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include "../drivers/staging/android/ashmem.h" - -#define PURGEABLE_ASHMEM_SHRINKALL_ARG 0 - -struct purgeable_ashmem_trigger_args { - struct seq_file *seq; - struct task_struct *tsk; -}; - -static int purgeable_ashmem_trigger_cb(const void *data, - struct file *f, unsigned int fd) -{ - const struct purgeable_ashmem_trigger_args *args = data; - struct task_struct *tsk = args->tsk; - struct purgeable_ashmem_metadata pmdata; - - if (!is_ashmem_file(f)) - return 0; - if (!get_purgeable_ashmem_metadata(f, &pmdata)) - return 0; - if (pmdata.is_purgeable) { - pmdata.name = pmdata.name == NULL ? "" : pmdata.name; - seq_printf(args->seq, - "%s,%u,%u,%ld,%s,%zu,%u,%u,%d,%d\n", - tsk->comm, tsk->pid, fd, (long)tsk->signal->oom_score_adj, - pmdata.name, pmdata.size, pmdata.id, pmdata.create_time, - pmdata.refc, pmdata.purged); - } - return 0; -} - -static ssize_t purgeable_ashmem_trigger_write(struct file *file, - const char __user *buffer, size_t count, loff_t *ppos) -{ - char *buf; - unsigned int ashmem_id = 0; - unsigned int create_time = 0; - const unsigned int params_num = 2; - const struct cred *cred = current_cred(); - - if (!cred) - return 0; - - if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && - !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { - pr_err("no permission to shrink purgeable ashmem!\n"); - return 0; - } - buf = memdup_user_nul(buffer, count); - buf = strstrip(buf); - if (sscanf(buf, "%u %u", &ashmem_id, &create_time) != params_num) - return -EINVAL; - if (ashmem_id == PURGEABLE_ASHMEM_SHRINKALL_ARG && - create_time == PURGEABLE_ASHMEM_SHRINKALL_ARG) - ashmem_shrinkall(); - else - ashmem_shrink_by_id(ashmem_id, create_time); - return count; -} - -static int purgeable_ashmem_trigger_show(struct seq_file *s, void *d) -{ - struct task_struct *tsk = NULL; - struct purgeable_ashmem_trigger_args cb_args; - const struct cred *cred = current_cred(); - - if (!cred) - return -EINVAL; - - if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && - !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { - pr_err("no permission to shrink purgeable ashmem!\n"); - return -EINVAL; - } - seq_puts(s, "Process purgeable ashmem detail info:\n"); - seq_puts(s, "----------------------------------------------------\n"); - seq_printf(s, "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", - "process_name", "pid", "adj", "fd", - "ashmem_name", "size", "id", "time", "ref_count", "purged"); - - ashmem_mutex_lock(); - rcu_read_lock(); - for_each_process(tsk) { - if (tsk->flags & PF_KTHREAD) - continue; - cb_args.seq = s; - cb_args.tsk = tsk; - - task_lock(tsk); - iterate_fd(tsk->files, 0, - purgeable_ashmem_trigger_cb, (void *)&cb_args); - task_unlock(tsk); - } - rcu_read_unlock(); - ashmem_mutex_unlock(); - seq_puts(s, "----------------------------------------------------\n"); - return 0; -} - -static int purgeable_ashmem_trigger_open(struct inode *inode, - struct file *file) -{ - return single_open(file, purgeable_ashmem_trigger_show, - inode->i_private); -} - -static const struct proc_ops purgeable_ashmem_trigger_fops = { - .proc_open = purgeable_ashmem_trigger_open, - .proc_write = purgeable_ashmem_trigger_write, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, -}; - -void init_purgeable_ashmem_trigger(void) -{ - struct proc_dir_entry *entry = NULL; - - entry = proc_create_data("purgeable_ashmem_trigger", 0666, - NULL, &purgeable_ashmem_trigger_fops, NULL); - if (!entry) - pr_err("Failed to create purgeable ashmem trigger\n"); -} diff --git a/mm/rmap.c b/mm/rmap.c index d61242e91b12..9f795b93cf40 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -75,7 +75,6 @@ #include #include #include -#include #include @@ -812,10 +811,6 @@ static bool folio_referenced_one(struct folio *folio, while (page_vma_mapped_walk(&pvmw)) { address = pvmw.address; -#ifdef CONFIG_MEM_PURGEABLE - if (!(vma->vm_flags & VM_PURGEABLE)) - pra->vm_flags &= ~VM_PURGEABLE; -#endif if ((vma->vm_flags & VM_LOCKED) && (!folio_test_large(folio) || !pvmw.pte)) { /* Restore the mlock which got missed */ @@ -855,9 +850,6 @@ static bool folio_referenced_one(struct folio *folio, if (referenced) { pra->referenced++; pra->vm_flags |= vma->vm_flags & ~VM_LOCKED; -#ifdef CONFIG_MEM_PURGEABLE - pra->vm_flags |= vma->vm_flags & ~VM_PURGEABLE; -#endif } if (!pra->mapcount) @@ -909,9 +901,6 @@ int folio_referenced(struct folio *folio, int is_locked, struct folio_referenced_arg pra = { .mapcount = folio_mapcount(folio), .memcg = memcg, -#ifdef CONFIG_MEM_PURGEABLE - .vm_flags = VM_PURGEABLE, -#endif }; struct rmap_walk_control rwc = { .rmap_one = folio_referenced_one, @@ -1533,13 +1522,6 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); -#ifdef CONFIG_MEM_PURGEABLE - if ((vma->vm_flags & VM_PURGEABLE) && !lock_uxpte(vma, address)) { - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; - } -#endif /* * If the folio is in an mlock()d vma, we must not swap it out. */ @@ -1657,17 +1639,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); } -#ifdef CONFIG_MEM_PURGEABLE - } else if ((vma->vm_flags & VM_PURGEABLE) || (pte_unused(pteval) && - !userfaultfd_armed(vma))) { -#else } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { -#endif -#ifdef CONFIG_MEM_PURGEABLE - if (vma->vm_flags & VM_PURGEABLE) - unlock_uxpte(vma, address); -#endif - /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan diff --git a/mm/vmscan.c b/mm/vmscan.c index b21c7ac84281..3f48a713f020 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1445,7 +1445,6 @@ void folio_putback_lru(struct folio *folio) enum folio_references { FOLIOREF_RECLAIM, FOLIOREF_RECLAIM_CLEAN, - FOLIOREF_RECLAIM_PURGEABLE, FOLIOREF_KEEP, FOLIOREF_ACTIVATE, }; @@ -1467,16 +1466,10 @@ static enum folio_references folio_check_references(struct folio *folio, if (vm_flags & VM_LOCKED) return FOLIOREF_ACTIVATE; - /* rmap lock contention: rotate */ if (referenced_ptes == -1) return FOLIOREF_KEEP; -#ifdef CONFIG_MEM_PURGEABLE - if (vm_flags & VM_PURGEABLE) - return FOLIOREF_RECLAIM_PURGEABLE; -#endif - if (referenced_ptes) { /* * All mapped folios start out with page table @@ -1803,7 +1796,6 @@ unsigned int shrink_folio_list(struct list_head *folio_list, goto keep_locked; case FOLIOREF_RECLAIM: case FOLIOREF_RECLAIM_CLEAN: - case FOLIOREF_RECLAIM_PURGEABLE: ; /* try to reclaim the folio below */ } @@ -1824,7 +1816,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, * Lazyfree folio could be freed directly */ if (folio_test_anon(folio) && folio_test_swapbacked(folio)) { - if (!folio_test_swapcache(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { + if (!folio_test_swapcache(folio)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; if (folio_maybe_dma_pinned(folio)) @@ -1906,7 +1898,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, goto activate_locked; mapping = folio_mapping(folio); - if (folio_test_dirty(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { + if (folio_test_dirty(folio)) { /* * Only kswapd can writeback filesystem folios * to avoid risk of stack overflow. But avoid @@ -2021,17 +2013,10 @@ unsigned int shrink_folio_list(struct list_head *folio_list, } } - if (folio_test_anon(folio) && (!folio_test_swapbacked(folio) || references == FOLIOREF_RECLAIM_PURGEABLE)) { + if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) { /* follow __remove_mapping for reference */ if (!folio_ref_freeze(folio, 1)) goto keep_locked; -// missing -/* - if (folio_test_dirty(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { - folio__ref_unfreeze(folio, 1); - goto keep_locked; - } -*/ /* * The folio has only one reference left, which is * from the isolation. After the caller puts the @@ -7957,10 +7942,6 @@ void __meminit kswapd_stop(int nid) pgdat_kswapd_unlock(pgdat); } -#ifdef CONFIG_MEM_PURGEABLE_DEBUG -static void __init purgeable_debugfs_init(void); -#endif - static int __init kswapd_init(void) { int nid; @@ -7968,9 +7949,6 @@ static int __init kswapd_init(void) swap_setup(); for_each_node_state(nid, N_MEMORY) kswapd_run(nid); -#ifdef CONFIG_MEM_PURGEABLE_DEBUG - purgeable_debugfs_init(); -#endif return 0; } @@ -8196,75 +8174,3 @@ void check_move_unevictable_folios(struct folio_batch *fbatch) } } EXPORT_SYMBOL_GPL(check_move_unevictable_folios); - -#ifdef CONFIG_MEM_PURGEABLE_DEBUG -static unsigned long purgeable_node(pg_data_t *pgdata, struct scan_control *sc) -{ - struct mem_cgroup *memcg = NULL; - unsigned long nr = 0; -#ifdef CONFIG_MEMCG - while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))) -#endif - { - struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdata); - - shrink_list(LRU_ACTIVE_PURGEABLE, -1, lruvec, sc); - nr += shrink_list(LRU_INACTIVE_PURGEABLE, -1, lruvec, sc); - } - - pr_info("reclaim %lu purgeable pages.\n", nr); - - return nr; -} - -static int purgeable(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) -{ - struct scan_control sc = { - .gfp_mask = GFP_KERNEL, - .order = 0, - .priority = DEF_PRIORITY, - .may_deactivate = DEACTIVATE_ANON, - .may_writepage = 1, - .may_unmap = 1, - .may_swap = 1, - .reclaim_idx = MAX_NR_ZONES - 1, - }; - int nid = 0; - const struct cred *cred = current_cred(); - if (!cred) - return 0; - - if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && - !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { - pr_err("no permission to shrink purgeable heap!\n"); - return -EINVAL; - } - for_each_node_state(nid, N_MEMORY) - purgeable_node(NODE_DATA(nid), &sc); - return 0; -} - -static struct ctl_table ker_tab[] = { - { - .procname = "purgeable", - .mode = 0666, - .proc_handler = purgeable, - }, - {}, -}; - -static struct ctl_table_header *purgeable_header; - -static void __init purgeable_debugfs_init(void) -{ - purgeable_header = register_sysctl("kernel", ker_tab); - if (!purgeable_header) - pr_err("register purgeable sysctl table failed.\n"); -} - -static void __exit purgeable_debugfs_exit(void) -{ - unregister_sysctl_table(purgeable_header); -} -#endif /* CONFIG_MEM_PURGEABLE_DEBUG */ diff --git a/mm/vmstat.c b/mm/vmstat.c index 1195132d5ea1..dcbd443881f9 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1172,10 +1172,6 @@ const char * const vmstat_text[] = { "nr_zone_active_anon", "nr_zone_inactive_file", "nr_zone_active_file", -#ifdef CONFIG_MEM_PURGEABLE - "nr_zone_inactive_purgeable", - "nr_zone_active_purgeable", -#endif "nr_zone_unevictable", "nr_zone_write_pending", "nr_mlock", @@ -1203,10 +1199,6 @@ const char * const vmstat_text[] = { "nr_active_anon", "nr_inactive_file", "nr_active_file", -#ifdef CONFIG_MEM_PURGEABLE - "nr_inactive_purgeable", - "nr_active_purgeable", -#endif "nr_unevictable", "nr_slab_reclaimable", "nr_slab_unreclaimable", -- Gitee From 3079a6669885bf37c45d24076d71807111f2e854 Mon Sep 17 00:00:00 2001 From: ma_yulong Date: Fri, 18 Oct 2024 10:12:33 +0800 Subject: [PATCH 15/15] purgeable memory kernel support Signed-off-by: ma_yulong --- fs/proc/meminfo.c | 31 ++- fs/proc/task_mmu.c | 12 ++ include/linux/mm.h | 18 ++ include/linux/mm_inline.h | 4 + include/linux/mm_types.h | 4 + include/linux/mmzone.h | 21 +- include/linux/page-flags.h | 9 + include/trace/events/mmflags.h | 11 +- kernel/fork.c | 10 +- mm/Kconfig | 23 +++ mm/Makefile | 2 + mm/memory.c | 40 +++- mm/mmap.c | 13 ++ mm/purgeable.c | 348 +++++++++++++++++++++++++++++++++ mm/purgeable_ashmem_trigger.c | 134 +++++++++++++ mm/rmap.c | 28 +++ mm/vmscan.c | 94 ++++++++- mm/vmstat.c | 8 + 18 files changed, 799 insertions(+), 11 deletions(-) create mode 100644 mm/purgeable.c create mode 100644 mm/purgeable_ashmem_trigger.c diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 45af9a989d40..875306ca2b39 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -17,6 +17,9 @@ #ifdef CONFIG_CMA #include #endif +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include #include "internal.h" @@ -40,6 +43,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) unsigned long pages[NR_LRU_LISTS]; unsigned long sreclaimable, sunreclaim; int lru; + unsigned long nr_purg_active = 0; + unsigned long nr_purg_inactive = 0; +#ifdef CONFIG_MEM_PURGEABLE + unsigned long nr_purg_pined = 0; +#endif si_meminfo(&i); si_swapinfo(&i); @@ -53,6 +61,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) pages[lru] = global_node_page_state(NR_LRU_BASE + lru); +#ifdef CONFIG_MEM_PURGEABLE + nr_purg_active = pages[LRU_ACTIVE_PURGEABLE]; + nr_purg_inactive = pages[LRU_INACTIVE_PURGEABLE]; + purg_pages_info(NULL, &nr_purg_pined); + nr_purg_pined = min(nr_purg_pined, nr_purg_active + nr_purg_inactive); +#endif + available = si_mem_available(); sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B); sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B); @@ -64,13 +79,25 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Cached: ", cached); show_val_kb(m, "SwapCached: ", total_swapcache_pages()); show_val_kb(m, "Active: ", pages[LRU_ACTIVE_ANON] + - pages[LRU_ACTIVE_FILE]); +#ifdef CONFIG_MEM_PURGEABLE + pages[LRU_ACTIVE_FILE] + + nr_purg_active); +#else + pages[LRU_ACTIVE_FILE]); +#endif + show_val_kb(m, "Inactive: ", pages[LRU_INACTIVE_ANON] + - pages[LRU_INACTIVE_FILE]); + pages[LRU_INACTIVE_FILE] + + nr_purg_inactive); show_val_kb(m, "Active(anon): ", pages[LRU_ACTIVE_ANON]); show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]); show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]); show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]); +#ifdef CONFIG_MEM_PURGEABLE + show_val_kb(m, "Active(purg): ", nr_purg_active); + show_val_kb(m, "Inactive(purg): ", nr_purg_inactive); + show_val_kb(m, "Pined(purg): ", nr_purg_pined); +#endif show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]); show_val_kb(m, "Mlocked: ", global_zone_page_state(NR_MLOCK)); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index bd8285811728..415a5bef7424 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -20,6 +20,9 @@ #include #include #include +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include @@ -33,6 +36,11 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) { unsigned long text, lib, swap, anon, file, shmem; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; +#ifdef CONFIG_MEM_PURGEABLE + unsigned long nr_purg_sum = 0, nr_purg_pin = 0; + + mm_purg_pages_info(mm, &nr_purg_sum, &nr_purg_pin); +#endif anon = get_mm_counter(mm, MM_ANONPAGES); file = get_mm_counter(mm, MM_FILEPAGES); @@ -76,6 +84,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) seq_put_decimal_ull_width(m, " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); +#ifdef CONFIG_MEM_PURGEABLE + SEQ_PUT_DEC(" kB\nPurgSum:\t", nr_purg_sum); + SEQ_PUT_DEC(" kB\nPurgPin:\t", nr_purg_pin); +#endif seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 7179e3f6a030..83f791100b84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -320,16 +320,34 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_7 39 /* bit only usable on 64-bit architectures */ +#ifdef CONFIG_MEM_PURGEABLE +#define VM_HIGH_ARCH_BIT_8 40 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_9 41 /* bit only usable on 64-bit architectures */ +#endif /* CONFIG_MEM_PURGEABLE */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) #define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) +#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) #define VM_HIGH_ARCH_7 BIT(VM_HIGH_ARCH_BIT_7) +#ifdef CONFIG_MEM_PURGEABLE +#define VM_HIGH_ARCH_8 BIT(VM_HIGH_ARCH_BIT_8) +#define VM_HIGH_ARCH_9 BIT(VM_HIGH_ARCH_BIT_9) +#endif /* CONFIG_MEM_PURGEABLE */ #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ +#ifdef CONFIG_MEM_PURGEABLE +#define VM_PURGEABLE VM_HIGH_ARCH_8 +#define VM_USEREXPTE VM_HIGH_ARCH_9 +#else /* CONFIG_MEM_PURGEABLE */ +#define VM_PURGEABLE 0 +#define VM_USEREXPTE 0 +#endif /* CONFIG_MEM_PURGEABLE */ + #ifdef CONFIG_SECURITY_XPM #define VM_XPM VM_HIGH_ARCH_7 #else /* CONFIG_SECURITY_XPM */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 96b1c157554c..027591c9decb 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -93,6 +93,10 @@ static __always_inline enum lru_list folio_lru_list(struct folio *folio) return LRU_UNEVICTABLE; lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; +#ifdef CONFIG_MEM_PURGEABLE + if (folio_test_purgeable(folio)) + lru = LRU_INACTIVE_PURGEABLE; +#endif if (folio_test_active(folio)) lru += LRU_ACTIVE; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index db7003d2886f..7264a43f8d18 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -703,6 +703,10 @@ struct mm_struct { #endif unsigned long task_size; /* size of task vm space */ pgd_t * pgd; +#ifdef CONFIG_MEM_PURGEABLE + void *uxpgd; + spinlock_t uxpgd_lock; +#endif #ifdef CONFIG_MEMBARRIER /** diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9cf03644fe90..84da48194dbc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -146,6 +146,10 @@ enum zone_stat_item { NR_ZONE_ACTIVE_ANON, NR_ZONE_INACTIVE_FILE, NR_ZONE_ACTIVE_FILE, +#ifdef CONFIG_MEM_PURGEABLE + NR_ZONE_INACTIVE_PURGEABLE, + NR_ZONE_ACTIVE_PURGEABLE, +#endif NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ @@ -166,6 +170,10 @@ enum node_stat_item { NR_ACTIVE_ANON, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */ +#ifdef CONFIG_MEM_PURGEABLE + NR_INACTIVE_PURGEABLE, + NR_ACTIVE_PURGEABLE, +#endif NR_UNEVICTABLE, /* " " " " " */ NR_SLAB_RECLAIMABLE_B, NR_SLAB_UNRECLAIMABLE_B, @@ -268,12 +276,19 @@ static __always_inline bool vmstat_item_in_bytes(int idx) #define LRU_BASE 0 #define LRU_ACTIVE 1 #define LRU_FILE 2 +#ifdef CONFIG_MEM_PURGEABLE +#define LRU_PURGEABLE 4 +#endif enum lru_list { LRU_INACTIVE_ANON = LRU_BASE, LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE, LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE, LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE, +#ifdef CONFIG_MEM_PURGEABLE + LRU_INACTIVE_PURGEABLE = LRU_BASE + LRU_PURGEABLE, + LRU_ACTIVE_PURGEABLE = LRU_BASE + LRU_PURGEABLE + LRU_ACTIVE, +#endif LRU_UNEVICTABLE, NR_LRU_LISTS }; @@ -288,7 +303,7 @@ enum vmscan_throttle_state { #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++) -#define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++) +#define for_each_evictable_lru(lru) for (lru = 0; lru < LRU_UNEVICTABLE; lru++) static inline bool is_file_lru(enum lru_list lru) { @@ -297,6 +312,10 @@ static inline bool is_file_lru(enum lru_list lru) static inline bool is_active_lru(enum lru_list lru) { +#ifdef CONFIG_MEM_PURGEABLE + if (lru == LRU_ACTIVE_PURGEABLE) + return true; +#endif return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4d2e0c913baf..af9b7524c692 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -136,6 +136,9 @@ enum pageflags { PG_arch_2, PG_arch_3, #endif +#ifdef CONFIG_MEM_PURGEABLE + PG_purgeable, +#endif #ifdef CONFIG_SECURITY_XPM PG_xpm_readonly, PG_xpm_writetainted, @@ -615,6 +618,12 @@ PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY) PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted) #endif +#ifdef CONFIG_MEM_PURGEABLE +PAGEFLAG(Purgeable, purgeable, PF_ANY) +#else +PAGEFLAG_FALSE(Purgeable) +#endif + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 0a224af0e59a..5fee97d06e6e 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -65,10 +65,16 @@ __def_gfpflag_names __def_gfpflag_names_kasan \ ) : "none" +#ifdef CONFIG_MEM_PURGEABLE +#define IF_HAVE_PG_PURGEABLE(_name) ,{1UL << PG_##_name, __stringify(_name)} +#else +#define IF_HAVE_PG_PURGEABLE(_name) +#endif + #ifdef CONFIG_SECURITY_XPM -#define IF_HAVE_PG_XPM_INTEGRITY(flag,string) ,{1UL << flag, string} +#define IF_HAVE_PG_XPM_INTEGRITY(_name) ,{1UL << PG_##_name, __stringify(_name)} #else -#define IF_HAVE_PG_XPM_INTEGRITY(flag,string) +#define IF_HAVE_PG_XPM_INTEGRITY(_name) #endif #ifdef CONFIG_MMU @@ -125,6 +131,7 @@ DEF_PAGEFLAG_NAME(reclaim), \ DEF_PAGEFLAG_NAME(swapbacked), \ DEF_PAGEFLAG_NAME(unevictable) \ +IF_HAVE_PG_PURGEABLE(purgeable) \ IF_HAVE_PG_MLOCK(mlocked) \ IF_HAVE_PG_UNCACHED(uncached) \ IF_HAVE_PG_HWPOISON(hwpoison) \ diff --git a/kernel/fork.c b/kernel/fork.c index 92611a26a392..515267609be9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -99,7 +99,9 @@ #include #include #include - +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include #include @@ -793,6 +795,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, static inline int mm_alloc_pgd(struct mm_struct *mm) { +#ifdef CONFIG_MEM_PURGEABLE + mm_init_uxpgd(mm); +#endif mm->pgd = pgd_alloc(mm); if (unlikely(!mm->pgd)) return -ENOMEM; @@ -802,6 +807,9 @@ static inline int mm_alloc_pgd(struct mm_struct *mm) static inline void mm_free_pgd(struct mm_struct *mm) { pgd_free(mm, mm->pgd); +#ifdef CONFIG_MEM_PURGEABLE + mm_clear_uxpgd(mm); +#endif } #else static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) diff --git a/mm/Kconfig b/mm/Kconfig index 544d113729eb..ee4c2cf539d7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1303,6 +1303,29 @@ config LOCK_MM_AND_FIND_VMA bool depends on !STACK_GROWSUP + +config MEM_PURGEABLE + bool "Purgeable memory feature" + default n + depends on 64BIT + select ARCH_USES_HIGH_VMA_FLAGS + help + Support purgeable pages for process + +config MEM_PURGEABLE_DEBUG + bool "Purgeable memory debug" + default n + depends on MEM_PURGEABLE + help + Debug info for purgeable memory + +config PURGEABLE_ASHMEM + bool "Purgeable memory feature for ashmem" + default n + depends on MEM_PURGEABLE + help + Support purgeable ashmem for process + source "mm/damon/Kconfig" endmenu diff --git a/mm/Makefile b/mm/Makefile index f9fb7e07cdd8..f84d4b0f521d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,4 +141,6 @@ obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o obj-$(CONFIG_HYPERHOLD_FILE_LRU) += memcg_reclaim.o obj-$(CONFIG_HYPERHOLD_MEMCG) += memcg_control.o obj-$(CONFIG_HYPERHOLD_ZSWAPD) += zswapd.o zswapd_control.o +obj-$(CONFIG_MEM_PURGEABLE) += purgeable.o +obj-$(CONFIG_PURGEABLE_ASHMEM) += purgeable_ashmem_trigger.o obj-$(CONFIG_MEMORY_MONITOR) += memory_monitor.o diff --git a/mm/memory.c b/mm/memory.c index 78e05d3e9e4a..8a64230a1fec 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -77,7 +77,9 @@ #include #include #include - +#ifdef CONFIG_MEM_PURGEABLE +#include +#endif #include #include @@ -1426,6 +1428,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, unsigned int delay_rmap; page = vm_normal_page(vma, addr, ptent); +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_USEREXPTE) + page = NULL; +#endif if (unlikely(!should_zap_page(details, page))) continue; ptent = ptep_get_and_clear_full(mm, addr, pte, @@ -1438,7 +1444,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, ksm_might_unmap_zero_page(mm, ptent); continue; } - +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + uxpte_clear_present(vma, addr); +#endif delay_rmap = 0; if (!PageAnon(page)) { if (pte_dirty(ptent)) { @@ -3144,6 +3153,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) */ ptep_clear_flush(vma, vmf->address, vmf->pte); folio_add_new_anon_rmap(new_folio, vma, vmf->address); +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) { + pr_info("set wp new folio %lx purgeable\n", folio_pfn(new_folio)); + folio_set_purgeable(new_folio); + uxpte_set_present(vma, vmf->address); + } +#endif folio_add_lru_vma(new_folio, vma); /* * We call the notify macro here because, when using secondary @@ -4103,11 +4119,23 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (pte_alloc(vma->vm_mm, vmf->pmd)) return VM_FAULT_OOM; +#ifdef CONFIG_MEM_PURGEABLE + /* use extra page table for userexpte */ + if (vma->vm_flags & VM_USEREXPTE) { + if (do_uxpte_page_fault(vmf, &entry)) + goto oom; + else + goto got_page; + } +#endif /* Use the zero-page for reads */ if (!(vmf->flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); +#ifdef CONFIG_MEM_PURGEABLE +got_page: +#endif vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (!vmf->pte) @@ -4172,8 +4200,16 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) inc_mm_counter(vma->vm_mm, MM_ANONPAGES); folio_add_new_anon_rmap(folio, vma, vmf->address); +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + folio_set_purgeable(folio); +#endif folio_add_lru_vma(folio, vma); setpte: +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + uxpte_set_present(vma, vmf->address); +#endif if (uffd_wp) entry = pte_mkuffd_wp(entry); set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); diff --git a/mm/mmap.c b/mm/mmap.c index fb2022196845..fdd2291a987d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -59,6 +59,11 @@ #include "internal.h" +#ifdef CONFIG_MEM_PURGEABLE +#define MAP_PURGEABLE 0x04 /* purgeable memory */ +#define MAP_USEREXPTE 0x08 /* userspace extension page table */ +#endif + #ifndef arch_mmap_check #define arch_mmap_check(addr, len, flags) (0) #endif @@ -1353,6 +1358,14 @@ unsigned long do_mmap(struct file *file, unsigned long addr, */ pgoff = addr >> PAGE_SHIFT; break; +#ifdef CONFIG_MEM_PURGEABLE + case MAP_PURGEABLE: + vm_flags |= VM_PURGEABLE; + break; + case MAP_USEREXPTE: + vm_flags |= VM_USEREXPTE; + break; +#endif default: return -EINVAL; } diff --git a/mm/purgeable.c b/mm/purgeable.c new file mode 100644 index 000000000000..54bee931cb1b --- /dev/null +++ b/mm/purgeable.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Huawei Device Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include /* find_lock_task_mm */ + +#include + +struct uxpte_t { + atomic64_t val; +}; + +#define UXPTE_SIZE_SHIFT 3 +#define UXPTE_SIZE (1 << UXPTE_SIZE_SHIFT) + +#define UXPTE_PER_PAGE_SHIFT (PAGE_SHIFT - UXPTE_SIZE_SHIFT) +#define UXPTE_PER_PAGE (1 << UXPTE_PER_PAGE_SHIFT) + +#define UXPTE_PRESENT_BIT 1 +#define UXPTE_PRESENT_MASK ((1 << UXPTE_PRESENT_BIT) - 1) +#define UXPTE_REFCNT_ONE (1 << UXPTE_PRESENT_BIT) +#define UXPTE_UNDER_RECLAIM (-UXPTE_REFCNT_ONE) + +#define vpn(vaddr) ((vaddr) >> PAGE_SHIFT) +#define uxpte_pn(vaddr) (vpn(vaddr) >> UXPTE_PER_PAGE_SHIFT) +#define uxpte_off(vaddr) (vpn(vaddr) & (UXPTE_PER_PAGE - 1)) +#define uxpn2addr(uxpn) ((uxpn) << (UXPTE_PER_PAGE_SHIFT + PAGE_SHIFT)) +#define uxpte_refcnt(uxpte) ((uxpte) >> UXPTE_PRESENT_BIT) +#define uxpte_present(uxpte) ((uxpte) & UXPTE_PRESENT_MASK) + +static inline long uxpte_read(struct uxpte_t *uxpte) +{ + return atomic64_read(&uxpte->val); +} + +static inline void uxpte_set(struct uxpte_t *uxpte, long val) +{ + atomic64_set(&uxpte->val, val); +} + +static inline bool uxpte_cas(struct uxpte_t *uxpte, long old, long new) +{ + return atomic64_cmpxchg(&uxpte->val, old, new) == old; +} + +void mm_init_uxpgd(struct mm_struct *mm) +{ + mm->uxpgd = NULL; + spin_lock_init(&mm->uxpgd_lock); +} + +void mm_clear_uxpgd(struct mm_struct *mm) +{ + struct page *page = NULL; + void **slot = NULL; + struct radix_tree_iter iter; + + spin_lock(&mm->uxpgd_lock); + if (!mm->uxpgd) + goto out; + radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { + page = radix_tree_delete(mm->uxpgd, iter.index); + put_page(page); + } +out: + kfree(mm->uxpgd); + mm->uxpgd = NULL; + spin_unlock(&mm->uxpgd_lock); +} + +/* should hold uxpgd_lock before invoke */ +static struct page *lookup_uxpte_page(struct vm_area_struct *vma, + unsigned long addr, bool alloc) +{ + struct radix_tree_root *uxpgd = NULL; + struct page *page = NULL; + struct folio *new_folio = NULL; + struct page *new_page = NULL; + struct mm_struct *mm = vma->vm_mm; + unsigned long uxpn = uxpte_pn(addr); + + if (mm->uxpgd) + goto lookup; + if (!alloc) + goto out; + spin_unlock(&mm->uxpgd_lock); + uxpgd = kzalloc(sizeof(struct radix_tree_root), GFP_KERNEL); + if (!uxpgd) { + pr_err("uxpgd alloc failed.\n"); + spin_lock(&mm->uxpgd_lock); + goto out; + } + INIT_RADIX_TREE(uxpgd, GFP_KERNEL); + spin_lock(&mm->uxpgd_lock); + if (mm->uxpgd) + kfree(uxpgd); + else + mm->uxpgd = uxpgd; +lookup: + page = radix_tree_lookup(mm->uxpgd, uxpn); + if (page) + goto out; + if (!alloc) + goto out; + spin_unlock(&mm->uxpgd_lock); + new_folio = vma_alloc_zeroed_movable_folio(vma, addr); + if (!new_folio) { + pr_err("uxpte page alloc fail.\n"); + spin_lock(&mm->uxpgd_lock); + goto out; + } + new_page = &new_folio->page; + if (radix_tree_preload(GFP_KERNEL)) { + put_page(new_page); + pr_err("radix preload fail.\n"); + spin_lock(&mm->uxpgd_lock); + goto out; + } + spin_lock(&mm->uxpgd_lock); + page = radix_tree_lookup(mm->uxpgd, uxpn); + if (page) { + put_page(new_page); + } else { + page = new_page; + radix_tree_insert(mm->uxpgd, uxpn, page); + } + radix_tree_preload_end(); +out: + return page; +} + +/* should hold uxpgd_lock before invoke */ +static struct uxpte_t *lookup_uxpte(struct vm_area_struct *vma, + unsigned long addr, bool alloc) +{ + struct uxpte_t *uxpte = NULL; + struct page *page = NULL; + + page = lookup_uxpte_page(vma, addr, alloc); + if (!page) + return NULL; + uxpte = page_to_virt(page); + + return uxpte + uxpte_off(addr); +} + +bool lock_uxpte(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + long val = 0; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, true); + if (!uxpte) + goto unlock; +retry: + val = uxpte_read(uxpte); + if (val >> 1) + goto unlock; + if (!uxpte_cas(uxpte, val, UXPTE_UNDER_RECLAIM)) + goto retry; + val = UXPTE_UNDER_RECLAIM; +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); + + return val == UXPTE_UNDER_RECLAIM; +} + +void unlock_uxpte(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, false); + if (!uxpte) + goto unlock; + uxpte_set(uxpte, 0); +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); +} + +bool uxpte_set_present(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + long val = 0; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, true); + if (!uxpte) + goto unlock; +retry: + val = uxpte_read(uxpte); + if (val & 1) + goto unlock; + if (!uxpte_cas(uxpte, val, val + 1)) + goto retry; + val++; +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); + + return val & 1; +} + +void uxpte_clear_present(struct vm_area_struct *vma, unsigned long addr) +{ + struct uxpte_t *uxpte = NULL; + long val = 0; + + spin_lock(&vma->vm_mm->uxpgd_lock); + uxpte = lookup_uxpte(vma, addr, false); + if (!uxpte) + goto unlock; +retry: + val = uxpte_read(uxpte); + if (!(val & 1)) + goto unlock; + if (!uxpte_cas(uxpte, val, val - 1)) + goto retry; +unlock: + spin_unlock(&vma->vm_mm->uxpgd_lock); +} + +vm_fault_t do_uxpte_page_fault(struct vm_fault *vmf, pte_t *entry) +{ + struct vm_area_struct *vma = vmf->vma; + unsigned long vma_uxpn = vma->vm_pgoff; + unsigned long off_uxpn = vpn(vmf->address - vma->vm_start); + unsigned long addr = uxpn2addr(vma_uxpn + off_uxpn); + struct page *page = NULL; + + if (unlikely(anon_vma_prepare(vma))) + return VM_FAULT_OOM; + + spin_lock(&vma->vm_mm->uxpgd_lock); + page = lookup_uxpte_page(vma, addr, true); + spin_unlock(&vma->vm_mm->uxpgd_lock); + + if (!page) + return VM_FAULT_OOM; + + *entry = mk_pte(page, vma->vm_page_prot); + *entry = pte_sw_mkyoung(*entry); + if (vma->vm_flags & VM_WRITE) + *entry = pte_mkwrite(pte_mkdirty(*entry), vma); + return 0; +} + +static void __mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, + unsigned long *pined_purg_pages) +{ + struct page *page = NULL; + void **slot = NULL; + struct radix_tree_iter iter; + struct uxpte_t *uxpte = NULL; + long pte_entry = 0; + int index = 0; + unsigned long nr_total = 0, nr_pined = 0; + + spin_lock(&mm->uxpgd_lock); + if (!mm->uxpgd) + goto out; + radix_tree_for_each_slot(slot, mm->uxpgd, &iter, 0) { + page = radix_tree_deref_slot(slot); + if (unlikely(!page)) + continue; + uxpte = page_to_virt(page); + for (index = 0; index < UXPTE_PER_PAGE; index++) { + pte_entry = uxpte_read(&(uxpte[index])); + if (uxpte_present(pte_entry) == 0) /* not present */ + continue; + nr_total++; + if (uxpte_refcnt(pte_entry) > 0) /* pined by user */ + nr_pined++; + } + } +out: + spin_unlock(&mm->uxpgd_lock); + + if (total_purg_pages) + *total_purg_pages = nr_total; + + if (pined_purg_pages) + *pined_purg_pages = nr_pined; +} + +void mm_purg_pages_info(struct mm_struct *mm, unsigned long *total_purg_pages, + unsigned long *pined_purg_pages) +{ + if (unlikely(!mm)) + return; + + if (!total_purg_pages && !pined_purg_pages) + return; + + __mm_purg_pages_info(mm, total_purg_pages, pined_purg_pages); +} + +void purg_pages_info(unsigned long *total_purg_pages, unsigned long *pined_purg_pages) +{ + struct task_struct *p = NULL; + struct task_struct *tsk = NULL; + unsigned long mm_nr_purge = 0, mm_nr_pined = 0; + unsigned long nr_total = 0, nr_pined = 0; + + if (!total_purg_pages && !pined_purg_pages) + return; + + if (total_purg_pages) + *total_purg_pages = 0; + + if (pined_purg_pages) + *pined_purg_pages = 0; + + rcu_read_lock(); + for_each_process(p) { + tsk = find_lock_task_mm(p); + if (!tsk) { + /* + * It is a kthread or all of p's threads have already + * detached their mm's. + */ + continue; + } + __mm_purg_pages_info(tsk->mm, &mm_nr_purge, &mm_nr_pined); + nr_total += mm_nr_purge; + nr_pined += mm_nr_pined; + task_unlock(tsk); + + if (mm_nr_purge > 0) { + pr_info("purgemm: tsk: %s %lu pined in %lu pages\n", tsk->comm ?: "NULL", + mm_nr_pined, mm_nr_purge); + } + } + rcu_read_unlock(); + if (total_purg_pages) + *total_purg_pages = nr_total; + + if (pined_purg_pages) + *pined_purg_pages = nr_pined; + pr_info("purgemm: Sum: %lu pined in %lu pages\n", nr_pined, nr_total); +} diff --git a/mm/purgeable_ashmem_trigger.c b/mm/purgeable_ashmem_trigger.c new file mode 100644 index 000000000000..73759333d645 --- /dev/null +++ b/mm/purgeable_ashmem_trigger.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 Huawei Technologies Co., Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include "../drivers/staging/android/ashmem.h" + +#define PURGEABLE_ASHMEM_SHRINKALL_ARG 0 + +struct purgeable_ashmem_trigger_args { + struct seq_file *seq; + struct task_struct *tsk; +}; + +static int purgeable_ashmem_trigger_cb(const void *data, + struct file *f, unsigned int fd) +{ + const struct purgeable_ashmem_trigger_args *args = data; + struct task_struct *tsk = args->tsk; + struct purgeable_ashmem_metadata pmdata; + + if (!is_ashmem_file(f)) + return 0; + if (!get_purgeable_ashmem_metadata(f, &pmdata)) + return 0; + if (pmdata.is_purgeable) { + pmdata.name = pmdata.name == NULL ? "" : pmdata.name; + seq_printf(args->seq, + "%s,%u,%u,%ld,%s,%zu,%u,%u,%d,%d\n", + tsk->comm, tsk->pid, fd, (long)tsk->signal->oom_score_adj, + pmdata.name, pmdata.size, pmdata.id, pmdata.create_time, + pmdata.refc, pmdata.purged); + } + return 0; +} + +static ssize_t purgeable_ashmem_trigger_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) +{ + char *buf; + unsigned int ashmem_id = 0; + unsigned int create_time = 0; + const unsigned int params_num = 2; + const struct cred *cred = current_cred(); + + if (!cred) + return 0; + + if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && + !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + pr_err("no permission to shrink purgeable ashmem!\n"); + return 0; + } + buf = memdup_user_nul(buffer, count); + buf = strstrip(buf); + if (sscanf(buf, "%u %u", &ashmem_id, &create_time) != params_num) + return -EINVAL; + if (ashmem_id == PURGEABLE_ASHMEM_SHRINKALL_ARG && + create_time == PURGEABLE_ASHMEM_SHRINKALL_ARG) + ashmem_shrinkall(); + else + ashmem_shrink_by_id(ashmem_id, create_time); + return count; +} + +static int purgeable_ashmem_trigger_show(struct seq_file *s, void *d) +{ + struct task_struct *tsk = NULL; + struct purgeable_ashmem_trigger_args cb_args; + const struct cred *cred = current_cred(); + + if (!cred) + return -EINVAL; + + if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && + !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + pr_err("no permission to shrink purgeable ashmem!\n"); + return -EINVAL; + } + seq_puts(s, "Process purgeable ashmem detail info:\n"); + seq_puts(s, "----------------------------------------------------\n"); + seq_printf(s, "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", + "process_name", "pid", "adj", "fd", + "ashmem_name", "size", "id", "time", "ref_count", "purged"); + + ashmem_mutex_lock(); + rcu_read_lock(); + for_each_process(tsk) { + if (tsk->flags & PF_KTHREAD) + continue; + cb_args.seq = s; + cb_args.tsk = tsk; + + task_lock(tsk); + iterate_fd(tsk->files, 0, + purgeable_ashmem_trigger_cb, (void *)&cb_args); + task_unlock(tsk); + } + rcu_read_unlock(); + ashmem_mutex_unlock(); + seq_puts(s, "----------------------------------------------------\n"); + return 0; +} + +static int purgeable_ashmem_trigger_open(struct inode *inode, + struct file *file) +{ + return single_open(file, purgeable_ashmem_trigger_show, + inode->i_private); +} + +static const struct proc_ops purgeable_ashmem_trigger_fops = { + .proc_open = purgeable_ashmem_trigger_open, + .proc_write = purgeable_ashmem_trigger_write, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +void init_purgeable_ashmem_trigger(void) +{ + struct proc_dir_entry *entry = NULL; + + entry = proc_create_data("purgeable_ashmem_trigger", 0660, + NULL, &purgeable_ashmem_trigger_fops, NULL); + if (!entry) + pr_err("Failed to create purgeable ashmem trigger\n"); +} diff --git a/mm/rmap.c b/mm/rmap.c index 9f795b93cf40..d61242e91b12 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -75,6 +75,7 @@ #include #include #include +#include #include @@ -811,6 +812,10 @@ static bool folio_referenced_one(struct folio *folio, while (page_vma_mapped_walk(&pvmw)) { address = pvmw.address; +#ifdef CONFIG_MEM_PURGEABLE + if (!(vma->vm_flags & VM_PURGEABLE)) + pra->vm_flags &= ~VM_PURGEABLE; +#endif if ((vma->vm_flags & VM_LOCKED) && (!folio_test_large(folio) || !pvmw.pte)) { /* Restore the mlock which got missed */ @@ -850,6 +855,9 @@ static bool folio_referenced_one(struct folio *folio, if (referenced) { pra->referenced++; pra->vm_flags |= vma->vm_flags & ~VM_LOCKED; +#ifdef CONFIG_MEM_PURGEABLE + pra->vm_flags |= vma->vm_flags & ~VM_PURGEABLE; +#endif } if (!pra->mapcount) @@ -901,6 +909,9 @@ int folio_referenced(struct folio *folio, int is_locked, struct folio_referenced_arg pra = { .mapcount = folio_mapcount(folio), .memcg = memcg, +#ifdef CONFIG_MEM_PURGEABLE + .vm_flags = VM_PURGEABLE, +#endif }; struct rmap_walk_control rwc = { .rmap_one = folio_referenced_one, @@ -1522,6 +1533,13 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Unexpected PMD-mapped THP? */ VM_BUG_ON_FOLIO(!pvmw.pte, folio); +#ifdef CONFIG_MEM_PURGEABLE + if ((vma->vm_flags & VM_PURGEABLE) && !lock_uxpte(vma, address)) { + ret = false; + page_vma_mapped_walk_done(&pvmw); + break; + } +#endif /* * If the folio is in an mlock()d vma, we must not swap it out. */ @@ -1639,7 +1657,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, set_pte_at(mm, address, pvmw.pte, pteval); } +#ifdef CONFIG_MEM_PURGEABLE + } else if ((vma->vm_flags & VM_PURGEABLE) || (pte_unused(pteval) && + !userfaultfd_armed(vma))) { +#else } else if (pte_unused(pteval) && !userfaultfd_armed(vma)) { +#endif +#ifdef CONFIG_MEM_PURGEABLE + if (vma->vm_flags & VM_PURGEABLE) + unlock_uxpte(vma, address); +#endif + /* * The guest indicated that the page content is of no * interest anymore. Simply discard the pte, vmscan diff --git a/mm/vmscan.c b/mm/vmscan.c index 3f48a713f020..0203fd116907 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1445,6 +1445,7 @@ void folio_putback_lru(struct folio *folio) enum folio_references { FOLIOREF_RECLAIM, FOLIOREF_RECLAIM_CLEAN, + FOLIOREF_RECLAIM_PURGEABLE, FOLIOREF_KEEP, FOLIOREF_ACTIVATE, }; @@ -1466,10 +1467,16 @@ static enum folio_references folio_check_references(struct folio *folio, if (vm_flags & VM_LOCKED) return FOLIOREF_ACTIVATE; + /* rmap lock contention: rotate */ if (referenced_ptes == -1) return FOLIOREF_KEEP; +#ifdef CONFIG_MEM_PURGEABLE + if (vm_flags & VM_PURGEABLE) + return FOLIOREF_RECLAIM_PURGEABLE; +#endif + if (referenced_ptes) { /* * All mapped folios start out with page table @@ -1796,6 +1803,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, goto keep_locked; case FOLIOREF_RECLAIM: case FOLIOREF_RECLAIM_CLEAN: + case FOLIOREF_RECLAIM_PURGEABLE: ; /* try to reclaim the folio below */ } @@ -1816,7 +1824,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, * Lazyfree folio could be freed directly */ if (folio_test_anon(folio) && folio_test_swapbacked(folio)) { - if (!folio_test_swapcache(folio)) { + if (!folio_test_swapcache(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; if (folio_maybe_dma_pinned(folio)) @@ -1898,7 +1906,7 @@ unsigned int shrink_folio_list(struct list_head *folio_list, goto activate_locked; mapping = folio_mapping(folio); - if (folio_test_dirty(folio)) { + if (folio_test_dirty(folio) && references != FOLIOREF_RECLAIM_PURGEABLE) { /* * Only kswapd can writeback filesystem folios * to avoid risk of stack overflow. But avoid @@ -2013,10 +2021,11 @@ unsigned int shrink_folio_list(struct list_head *folio_list, } } - if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) { + if (folio_test_anon(folio) && (!folio_test_swapbacked(folio) || references == FOLIOREF_RECLAIM_PURGEABLE)) { /* follow __remove_mapping for reference */ if (!folio_ref_freeze(folio, 1)) goto keep_locked; + /* * The folio has only one reference left, which is * from the isolation. After the caller puts the @@ -7942,6 +7951,10 @@ void __meminit kswapd_stop(int nid) pgdat_kswapd_unlock(pgdat); } +#ifdef CONFIG_MEM_PURGEABLE_DEBUG +static void __init purgeable_debugfs_init(void); +#endif + static int __init kswapd_init(void) { int nid; @@ -7949,6 +7962,9 @@ static int __init kswapd_init(void) swap_setup(); for_each_node_state(nid, N_MEMORY) kswapd_run(nid); +#ifdef CONFIG_MEM_PURGEABLE_DEBUG + purgeable_debugfs_init(); +#endif return 0; } @@ -8174,3 +8190,75 @@ void check_move_unevictable_folios(struct folio_batch *fbatch) } } EXPORT_SYMBOL_GPL(check_move_unevictable_folios); + +#ifdef CONFIG_MEM_PURGEABLE_DEBUG +static unsigned long purgeable_node(pg_data_t *pgdata, struct scan_control *sc) +{ + struct mem_cgroup *memcg = NULL; + unsigned long nr = 0; +#ifdef CONFIG_MEMCG + while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))) +#endif + { + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdata); + + shrink_list(LRU_ACTIVE_PURGEABLE, -1, lruvec, sc); + nr += shrink_list(LRU_INACTIVE_PURGEABLE, -1, lruvec, sc); + } + + pr_info("reclaim %lu purgeable pages.\n", nr); + + return nr; +} + +static int purgeable(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct scan_control sc = { + .gfp_mask = GFP_KERNEL, + .order = 0, + .priority = DEF_PRIORITY, + .may_deactivate = DEACTIVATE_ANON, + .may_writepage = 1, + .may_unmap = 1, + .may_swap = 1, + .reclaim_idx = MAX_NR_ZONES - 1, + }; + int nid = 0; + const struct cred *cred = current_cred(); + if (!cred) + return 0; + + if (!uid_eq(cred->euid, GLOBAL_MEMMGR_UID) && + !uid_eq(cred->euid, GLOBAL_ROOT_UID)) { + pr_err("no permission to shrink purgeable heap!\n"); + return -EINVAL; + } + for_each_node_state(nid, N_MEMORY) + purgeable_node(NODE_DATA(nid), &sc); + return 0; +} + +static struct ctl_table ker_tab[] = { + { + .procname = "purgeable", + .mode = 0666, + .proc_handler = purgeable, + }, + {}, +}; + +static struct ctl_table_header *purgeable_header; + +static void __init purgeable_debugfs_init(void) +{ + purgeable_header = register_sysctl("kernel", ker_tab); + if (!purgeable_header) + pr_err("register purgeable sysctl table failed.\n"); +} + +static void __exit purgeable_debugfs_exit(void) +{ + unregister_sysctl_table(purgeable_header); +} +#endif /* CONFIG_MEM_PURGEABLE_DEBUG */ diff --git a/mm/vmstat.c b/mm/vmstat.c index dcbd443881f9..1195132d5ea1 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1172,6 +1172,10 @@ const char * const vmstat_text[] = { "nr_zone_active_anon", "nr_zone_inactive_file", "nr_zone_active_file", +#ifdef CONFIG_MEM_PURGEABLE + "nr_zone_inactive_purgeable", + "nr_zone_active_purgeable", +#endif "nr_zone_unevictable", "nr_zone_write_pending", "nr_mlock", @@ -1199,6 +1203,10 @@ const char * const vmstat_text[] = { "nr_active_anon", "nr_inactive_file", "nr_active_file", +#ifdef CONFIG_MEM_PURGEABLE + "nr_inactive_purgeable", + "nr_active_purgeable", +#endif "nr_unevictable", "nr_slab_reclaimable", "nr_slab_unreclaimable", -- Gitee