diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 377a7eb604d567761044d794e20ba325d4b71cbc..1b22926f00ebe1481ae6ab54d0fb44f039a85815 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1315,6 +1315,9 @@ PAGE_SIZE multiple when read back. kernel_stack Amount of memory allocated to kernel stacks. + pagetables + Amount of memory allocated for page tables. + percpu(npn) Amount of memory used for storing per-cpu kernel data structures. diff --git a/arch/nds32/mm/mm-nds32.c b/arch/nds32/mm/mm-nds32.c index 55bec50ccc03318ed472ebfa21033b46bac892f4..f2778f2b39f65ba8330ca2fab42b86232d15a95b 100644 --- a/arch/nds32/mm/mm-nds32.c +++ b/arch/nds32/mm/mm-nds32.c @@ -34,8 +34,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm) cpu_dcache_wb_range((unsigned long)new_pgd, (unsigned long)new_pgd + PTRS_PER_PGD * sizeof(pgd_t)); - inc_zone_page_state(virt_to_page((unsigned long *)new_pgd), - NR_PAGETABLE); + inc_lruvec_page_state(virt_to_page((unsigned long *)new_pgd), + NR_PAGETABLE); return new_pgd; } @@ -59,7 +59,7 @@ void pgd_free(struct mm_struct *mm, pgd_t * pgd) pte = pmd_page(*pmd); pmd_clear(pmd); - dec_zone_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE); + dec_lruvec_page_state(virt_to_page((unsigned long *)pgd), NR_PAGETABLE); pte_free(mm, pte); mm_dec_nr_ptes(mm); pmd_free(mm, pmd); diff --git a/drivers/base/node.c b/drivers/base/node.c index e5d68e88b787f5bfd2f49d27bc381f14dba7dbb7..bddb0ff9a286f4bf7404c839d4ae67308f257d5e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -454,7 +454,7 @@ static ssize_t node_read_meminfo(struct device *dev, #ifdef CONFIG_SHADOW_CALL_STACK nid, node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif - nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), + nid, K(node_page_state(pgdat, NR_PAGETABLE)), nid, 0UL, nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), @@ -465,16 +465,11 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(sunreclaimable) #ifdef CONFIG_TRANSPARENT_HUGEPAGE , - nid, K(node_page_state(pgdat, NR_ANON_THPS) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_FILE_THPS) * - HPAGE_PMD_NR), - nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) * - HPAGE_PMD_NR) + nid, K(node_page_state(pgdat, NR_ANON_THPS)), + nid, K(node_page_state(pgdat, NR_SHMEM_THPS)), + nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), + nid, K(node_page_state(pgdat, NR_FILE_THPS)), + nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED)) #endif #ifdef CONFIG_DUPTEXT , @@ -531,10 +526,14 @@ static ssize_t node_read_vmstat(struct device *dev, sum_zone_numa_state(nid, i)); #endif - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) - len += sysfs_emit_at(buf, len, "%s %lu\n", - node_stat_name(i), - node_page_state_pages(pgdat, i)); + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + unsigned long pages = node_page_state_pages(pgdat, i); + + if (vmstat_item_print_in_thp(i)) + pages /= HPAGE_PMD_NR; + len += sysfs_emit_at(buf, len, "%s %lu\n", node_stat_name(i), + pages); + } return len; } diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 97a65fff0b41f4c66951a3acc67cd0b8da195184..9c2ae395e08181b4f0d4443d575273bfa2564de7 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -145,7 +145,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) global_node_page_state(NR_KERNEL_SCS_KB)); #endif show_val_kb(m, "PageTables: ", - global_zone_page_state(NR_PAGETABLE)); + global_node_page_state(NR_PAGETABLE)); show_val_kb(m, "NFS_Unstable: ", 0); show_val_kb(m, "Bounce: ", @@ -165,13 +165,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE - show_val_kb(m, "AnonHugePages: ", ext.anon_thps * HPAGE_PMD_NR); - show_val_kb(m, "ShmemHugePages: ", ext.shmem_thps * HPAGE_PMD_NR); - show_val_kb(m, "ShmemPmdMapped: ", ext.shmem_pmd_mapped * HPAGE_PMD_NR); + show_val_kb(m, "AnonHugePages: ", ext.anon_thps); + show_val_kb(m, "ShmemHugePages: ", ext.shmem_thps); + show_val_kb(m, "ShmemPmdMapped: ", ext.shmem_pmd_mapped); show_val_kb(m, "FileHugePages: ", - global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR); + global_node_page_state(NR_FILE_THPS)); show_val_kb(m, "FilePmdMapped: ", - global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR); + global_node_page_state(NR_FILE_PMDMAPPED)); #endif #ifdef CONFIG_CMA diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 524255a9501f526a7ba84925fb3a362a8633510c..9417da60019d5e62b587aed7135097b5119a96c9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -145,6 +145,10 @@ struct lruvec_stat { long count[NR_VM_NODE_STAT_ITEMS]; }; +struct batched_lruvec_stat { + s32 count[NR_VM_NODE_STAT_ITEMS]; +}; + /* * Bitmap of shrinker::id corresponding to memcg-aware shrinkers, * which have elements charged to this memcg. @@ -160,11 +164,17 @@ struct memcg_shrinker_map { struct mem_cgroup_per_node { struct lruvec lruvec; - /* Legacy local VM stats */ + /* + * Legacy local VM stats. This should be struct lruvec_stat and + * cannot be optimized to struct batched_lruvec_stat. Because + * the threshold of the lruvec_stat_cpu can be as big as + * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this + * filed has no upper limit. + */ struct lruvec_stat __percpu *lruvec_stat_local; /* Subtree VM stats (batched updates) */ - struct lruvec_stat __percpu *lruvec_stat_cpu; + struct batched_lruvec_stat __percpu *lruvec_stat_cpu; atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; @@ -1045,8 +1055,6 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); -void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, - int val); void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val); void mod_memcg_obj_state(void *p, int idx, int val); @@ -1071,43 +1079,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec, local_irq_restore(flags); } -static inline void mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_state(lruvec, idx, val); - local_irq_restore(flags); -} - -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - struct page *head = compound_head(page); /* rmap on tail pages */ - pg_data_t *pgdat = page_pgdat(page); - struct lruvec *lruvec; - - /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!head->mem_cgroup) { - __mod_node_page_state(pgdat, idx, val); - return; - } - - lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); - __mod_lruvec_state(lruvec, idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - unsigned long flags; - - local_irq_save(flags); - __mod_lruvec_page_state(page, idx, val); - local_irq_restore(flags); -} - unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned); @@ -1597,30 +1568,6 @@ static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec, { } -static inline void __mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); -} - -static inline void mod_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx, int val) -{ - mod_node_page_state(lruvec_pgdat(lruvec), idx, val); -} - -static inline void __mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - __mod_node_page_state(page_pgdat(page), idx, val); -} - -static inline void mod_lruvec_page_state(struct page *page, - enum node_stat_item idx, int val) -{ - mod_node_page_state(page_pgdat(page), idx, val); -} - static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) { @@ -1766,30 +1713,6 @@ static inline void __dec_memcg_page_state(struct page *page, __mod_memcg_page_state(page, idx, -1); } -static inline void __inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - __mod_lruvec_state(lruvec, idx, 1); -} - -static inline void __dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - __mod_lruvec_state(lruvec, idx, -1); -} - -static inline void __inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - __mod_lruvec_page_state(page, idx, 1); -} - -static inline void __dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - __mod_lruvec_page_state(page, idx, -1); -} - static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx) { __mod_lruvec_slab_state(p, idx, 1); @@ -1828,30 +1751,6 @@ static inline void dec_memcg_page_state(struct page *page, mod_memcg_page_state(page, idx, -1); } -static inline void inc_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, 1); -} - -static inline void dec_lruvec_state(struct lruvec *lruvec, - enum node_stat_item idx) -{ - mod_lruvec_state(lruvec, idx, -1); -} - -static inline void inc_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, 1); -} - -static inline void dec_lruvec_page_state(struct page *page, - enum node_stat_item idx) -{ - mod_lruvec_page_state(page, idx, -1); -} - static inline struct lruvec *parent_lruvec(struct lruvec *lruvec) { struct mem_cgroup *memcg; diff --git a/include/linux/mm.h b/include/linux/mm.h index 971473a4b06614fc40ac89768f502010042bba3c..485aa0901089e5d09486824820ccdebcb5ad69ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2344,7 +2344,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page) if (!ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2352,7 +2352,7 @@ static inline void pgtable_pte_page_dtor(struct page *page) { ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } #define pte_offset_map_lock(mm, pmd, address, ptlp) \ @@ -2439,7 +2439,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) if (!pmd_ptlock_init(page)) return false; __SetPageTable(page); - inc_zone_page_state(page, NR_PAGETABLE); + inc_lruvec_page_state(page, NR_PAGETABLE); return true; } @@ -2447,7 +2447,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) { pmd_ptlock_free(page); __ClearPageTable(page); - dec_zone_page_state(page, NR_PAGETABLE); + dec_lruvec_page_state(page, NR_PAGETABLE); } /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f702f24a83e9fc69e01b2fbb7c6f15e964280ba0..23dfd069dc6e23e44af4960e895e5e30912c264d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -156,7 +156,6 @@ enum zone_stat_item { NR_ZONE_UNEVICTABLE, NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ - NR_PAGETABLE, /* used for pagetables */ /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) @@ -217,9 +216,27 @@ enum node_stat_item { #ifdef CONFIG_DUPTEXT NR_DUPTEXT, #endif + NR_PAGETABLE, /* used for pagetables */ NR_VM_NODE_STAT_ITEMS }; +/* + * Returns true if the item should be printed in THPs (/proc/vmstat + * currently prints number of anon, file and shmem THPs. But the item + * is charged in pages). + */ +static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return false; + + return item == NR_ANON_THPS || + item == NR_FILE_THPS || + item == NR_SHMEM_THPS || + item == NR_SHMEM_PMDMAPPED || + item == NR_FILE_PMDMAPPED; +} + /* * Returns true if the value is measured in bytes (most vmstat values are * measured in pages). This defines the API part, the internal representation diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 9e2bbed53d41ad6d40e4c15c28552de69d31f721..690ed25ad3da2345b5d9d2961067a4f74eb0d8f2 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -467,4 +467,108 @@ static inline const char *vm_event_name(enum vm_event_item item) } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ +#ifdef CONFIG_MEMCG + +void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, + int val); + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_lruvec_state(lruvec, idx, val); + local_irq_restore(flags); +} + +void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val); + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + unsigned long flags; + + local_irq_save(flags); + __mod_lruvec_page_state(page, idx, val); + local_irq_restore(flags); +} + +#else + +static inline void __mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void mod_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx, int val) +{ + mod_node_page_state(lruvec_pgdat(lruvec), idx, val); +} + +static inline void __mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + __mod_node_page_state(page_pgdat(page), idx, val); +} + +static inline void mod_lruvec_page_state(struct page *page, + enum node_stat_item idx, int val) +{ + mod_node_page_state(page_pgdat(page), idx, val); +} + +#endif /* CONFIG_MEMCG */ + +static inline void __inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, 1); +} + +static inline void __dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + __mod_lruvec_state(lruvec, idx, -1); +} + +static inline void __inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, 1); +} + +static inline void __dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + __mod_lruvec_page_state(page, idx, -1); +} + +static inline void inc_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, 1); +} + +static inline void dec_lruvec_state(struct lruvec *lruvec, + enum node_stat_item idx) +{ + mod_lruvec_state(lruvec, idx, -1); +} + +static inline void inc_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, 1); +} + +static inline void dec_lruvec_page_state(struct page *page, + enum node_stat_item idx) +{ + mod_lruvec_page_state(page, idx, -1); +} + #endif /* _LINUX_VMSTAT_H */ diff --git a/mm/filemap.c b/mm/filemap.c index 2eb445b624c995323de9aaeac61fa403aac6bf96..1546af29db8a9c8f0d0c507fac4ec92be03f02cb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -210,9 +210,9 @@ static void unaccount_page_cache_page(struct address_space *mapping, if (PageSwapBacked(page)) { __mod_lruvec_page_state(page, NR_SHMEM, -nr); if (PageTransHuge(page)) - __dec_lruvec_page_state(page, NR_SHMEM_THPS); + __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr); } else if (PageTransHuge(page)) { - __dec_lruvec_page_state(page, NR_FILE_THPS); + __mod_lruvec_page_state(page, NR_FILE_THPS, -nr); filemap_nr_thps_dec(mapping); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 72844109e0a875ce7ebaaf2a5010b167fe969b62..1b4f1856189e2bbf4df418eaba916c495515e6cc 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2500,7 +2500,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, lock_page_memcg(page); if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { /* Last compound_mapcount is gone. */ - __dec_lruvec_page_state(page, NR_ANON_THPS); + __mod_lruvec_page_state(page, NR_ANON_THPS, + -HPAGE_PMD_NR); if (TestClearPageDoubleMap(page)) { /* No need in mapcount reference anymore */ for (i = 0; i < HPAGE_PMD_NR; i++) @@ -3087,10 +3088,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) } spin_unlock(&ds_queue->split_queue_lock); if (mapping) { + int nr = thp_nr_pages(head); + if (PageSwapBacked(head)) { - __dec_lruvec_page_state(head, NR_SHMEM_THPS); + __mod_lruvec_page_state(head, NR_SHMEM_THPS, + -nr); } else { - __dec_lruvec_page_state(head, NR_FILE_THPS); + __mod_lruvec_page_state(head, NR_FILE_THPS, + -nr); filemap_nr_thps_dec(mapping); } } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a98bffe45151f2d761403db24765a1b7fe3a0246..44d7ed94efc9f691672ff3c0e8b5eeaa8eb331ac 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2064,6 +2064,7 @@ static void collapse_file(struct mm_struct *mm, XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); int nr_none = 0, result = SCAN_SUCCEED; bool is_shmem = shmem_file(file); + int nr; VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1)); @@ -2282,11 +2283,12 @@ static void collapse_file(struct mm_struct *mm, put_page(page); goto xa_unlocked; } + nr = thp_nr_pages(new_page); if (is_shmem) - __inc_lruvec_page_state(new_page, NR_SHMEM_THPS); + __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); else { - __inc_lruvec_page_state(new_page, NR_FILE_THPS); + __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr); filemap_nr_thps_inc(mapping); /* * Paired with smp_mb() in do_dentry_open() to ensure @@ -2297,7 +2299,7 @@ static void collapse_file(struct mm_struct *mm, smp_mb(); if (inode_is_open_for_write(mapping->host)) { result = SCAN_FAIL; - __dec_lruvec_page_state(new_page, NR_FILE_THPS); + __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr); filemap_nr_thps_dec(mapping); goto xa_locked; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dd3122bb3c3556c93a1e7be215e29f33c22b448f..0aaf98bc05e1e33864314b727591b7fb544449d3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -906,6 +906,24 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, __mod_memcg_lruvec_state(lruvec, idx, val); } +void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, + int val) +{ + struct page *head = compound_head(page); /* rmap on tail pages */ + pg_data_t *pgdat = page_pgdat(page); + struct lruvec *lruvec; + + /* Untracked pages have no memcg, no lruvec. Update only the node */ + if (!head->mem_cgroup) { + __mod_node_page_state(pgdat, idx, val); + return; + } + + lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); + __mod_lruvec_state(lruvec, idx, val); +} +EXPORT_SYMBOL(__mod_lruvec_page_state); + void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) { pg_data_t *pgdat = page_pgdat(virt_to_page(p)); @@ -1771,71 +1789,70 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) struct memory_stat { const char *name; - unsigned int ratio; unsigned int idx; }; -static struct memory_stat memory_stats[] = { - { "anon", PAGE_SIZE, NR_ANON_MAPPED }, - { "file", PAGE_SIZE, NR_FILE_PAGES }, - { "kernel_stack", 1024, NR_KERNEL_STACK_KB }, - { "percpu", 1, MEMCG_PERCPU_B }, - { "sock", PAGE_SIZE, MEMCG_SOCK }, - { "shmem", PAGE_SIZE, NR_SHMEM }, - { "file_mapped", PAGE_SIZE, NR_FILE_MAPPED }, - { "file_dirty", PAGE_SIZE, NR_FILE_DIRTY }, - { "file_writeback", PAGE_SIZE, NR_WRITEBACK }, +static const struct memory_stat memory_stats[] = { + { "anon", NR_ANON_MAPPED }, + { "file", NR_FILE_PAGES }, + { "kernel_stack", NR_KERNEL_STACK_KB }, + { "pagetables", NR_PAGETABLE }, + { "percpu", MEMCG_PERCPU_B }, + { "sock", MEMCG_SOCK }, + { "shmem", NR_SHMEM }, + { "file_mapped", NR_FILE_MAPPED }, + { "file_dirty", NR_FILE_DIRTY }, + { "file_writeback", NR_WRITEBACK }, #ifdef CONFIG_TRANSPARENT_HUGEPAGE - /* - * The ratio will be initialized in memory_stats_init(). Because - * on some architectures, the macro of HPAGE_PMD_SIZE is not - * constant(e.g. powerpc). - */ - { "anon_thp", 0, NR_ANON_THPS }, - { "file_thp", 0, NR_FILE_THPS }, - { "shmem_thp", 0, NR_SHMEM_THPS }, + { "anon_thp", NR_ANON_THPS }, + { "file_thp", NR_FILE_THPS }, + { "shmem_thp", NR_SHMEM_THPS }, #endif - { "inactive_anon", PAGE_SIZE, NR_INACTIVE_ANON }, - { "active_anon", PAGE_SIZE, NR_ACTIVE_ANON }, - { "inactive_file", PAGE_SIZE, NR_INACTIVE_FILE }, - { "active_file", PAGE_SIZE, NR_ACTIVE_FILE }, - { "unevictable", PAGE_SIZE, NR_UNEVICTABLE }, - - /* - * Note: The slab_reclaimable and slab_unreclaimable must be - * together and slab_reclaimable must be in front. - */ - { "slab_reclaimable", 1, NR_SLAB_RECLAIMABLE_B }, - { "slab_unreclaimable", 1, NR_SLAB_UNRECLAIMABLE_B }, + { "inactive_anon", NR_INACTIVE_ANON }, + { "active_anon", NR_ACTIVE_ANON }, + { "inactive_file", NR_INACTIVE_FILE }, + { "active_file", NR_ACTIVE_FILE }, + { "unevictable", NR_UNEVICTABLE }, + { "slab_reclaimable", NR_SLAB_RECLAIMABLE_B }, + { "slab_unreclaimable", NR_SLAB_UNRECLAIMABLE_B }, /* The memory events */ - { "workingset_refault_anon", 1, WORKINGSET_REFAULT_ANON }, - { "workingset_refault_file", 1, WORKINGSET_REFAULT_FILE }, - { "workingset_activate_anon", 1, WORKINGSET_ACTIVATE_ANON }, - { "workingset_activate_file", 1, WORKINGSET_ACTIVATE_FILE }, - { "workingset_restore_anon", 1, WORKINGSET_RESTORE_ANON }, - { "workingset_restore_file", 1, WORKINGSET_RESTORE_FILE }, - { "workingset_nodereclaim", 1, WORKINGSET_NODERECLAIM }, + { "workingset_refault_anon", WORKINGSET_REFAULT_ANON }, + { "workingset_refault_file", WORKINGSET_REFAULT_FILE }, + { "workingset_activate_anon", WORKINGSET_ACTIVATE_ANON }, + { "workingset_activate_file", WORKINGSET_ACTIVATE_FILE }, + { "workingset_restore_anon", WORKINGSET_RESTORE_ANON }, + { "workingset_restore_file", WORKINGSET_RESTORE_FILE }, + { "workingset_nodereclaim", WORKINGSET_NODERECLAIM }, }; -static int __init memory_stats_init(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(memory_stats); i++) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (memory_stats[i].idx == NR_ANON_THPS || - memory_stats[i].idx == NR_FILE_THPS || - memory_stats[i].idx == NR_SHMEM_THPS) - memory_stats[i].ratio = HPAGE_PMD_SIZE; -#endif - VM_BUG_ON(!memory_stats[i].ratio); - VM_BUG_ON(memory_stats[i].idx >= MEMCG_NR_STAT); +/* Translate stat items to the correct unit for memory.stat output */ +static int memcg_page_state_unit(int item) +{ + switch (item) { + case MEMCG_PERCPU_B: + case NR_SLAB_RECLAIMABLE_B: + case NR_SLAB_UNRECLAIMABLE_B: + case WORKINGSET_REFAULT_ANON: + case WORKINGSET_REFAULT_FILE: + case WORKINGSET_ACTIVATE_ANON: + case WORKINGSET_ACTIVATE_FILE: + case WORKINGSET_RESTORE_ANON: + case WORKINGSET_RESTORE_FILE: + case WORKINGSET_NODERECLAIM: + return 1; + case NR_KERNEL_STACK_KB: + return SZ_1K; + default: + return PAGE_SIZE; } +} - return 0; +static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg, + int item) +{ + return memcg_page_state(memcg, item) * memcg_page_state_unit(item); } -pure_initcall(memory_stats_init); static char *memory_stat_format(struct mem_cgroup *memcg) { @@ -1860,13 +1877,12 @@ static char *memory_stat_format(struct mem_cgroup *memcg) for (i = 0; i < ARRAY_SIZE(memory_stats); i++) { u64 size; - size = memcg_page_state(memcg, memory_stats[i].idx); - size *= memory_stats[i].ratio; + size = memcg_page_state_output(memcg, memory_stats[i].idx); seq_buf_printf(&s, "%s %llu\n", memory_stats[i].name, size); if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) { - size = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + - memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B); + size += memcg_page_state_output(memcg, + NR_SLAB_RECLAIMABLE_B); seq_buf_printf(&s, "slab %llu\n", size); } } @@ -4899,10 +4915,6 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; nr = memcg_page_state_local(memcg, memcg1_stats[i]); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (memcg1_stats[i] == NR_ANON_THPS) - nr *= HPAGE_PMD_NR; -#endif seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); } @@ -4933,10 +4945,6 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account()) continue; nr = memcg_page_state(memcg, memcg1_stats[i]); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (memcg1_stats[i] == NR_ANON_THPS) - nr *= HPAGE_PMD_NR; -#endif seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], (u64)nr * PAGE_SIZE); } @@ -7286,7 +7294,7 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) return 1; } - pn->lruvec_stat_cpu = alloc_percpu_gfp(struct lruvec_stat, + pn->lruvec_stat_cpu = alloc_percpu_gfp(struct batched_lruvec_stat, GFP_KERNEL_ACCOUNT); if (!pn->lruvec_stat_cpu) { free_percpu(pn->lruvec_stat_local); @@ -7845,10 +7853,11 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); if (PageTransHuge(page)) { - __dec_lruvec_state(from_vec, NR_ANON_THPS); - __inc_lruvec_state(to_vec, NR_ANON_THPS); + __mod_lruvec_state(from_vec, NR_ANON_THPS, + -nr_pages); + __mod_lruvec_state(to_vec, NR_ANON_THPS, + nr_pages); } - } } else { __mod_lruvec_state(from_vec, NR_FILE_PAGES, -nr_pages); @@ -8690,6 +8699,12 @@ static int memory_stat_show(struct seq_file *m, void *v) } #ifdef CONFIG_NUMA +static inline unsigned long lruvec_page_state_output(struct lruvec *lruvec, + int item) +{ + return lruvec_page_state(lruvec, item) * memcg_page_state_unit(item); +} + static int memory_numa_stat_show(struct seq_file *m, void *v) { int i; @@ -8707,8 +8722,8 @@ static int memory_numa_stat_show(struct seq_file *m, void *v) struct lruvec *lruvec; lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); - size = lruvec_page_state(lruvec, memory_stats[i].idx); - size *= memory_stats[i].ratio; + size = lruvec_page_state_output(lruvec, + memory_stats[i].idx); seq_printf(m, " N%d=%llu", nid, size); } seq_putc(m, '\n'); @@ -9563,6 +9578,14 @@ static int __init mem_cgroup_init(void) return -ENOMEM; #endif + /* + * Currently s32 type (can refer to struct batched_lruvec_stat) is + * used for per-memcg-per-cpu caching of per-node statistics. In order + * to work fine, we should make sure that the overfill threshold can't + * exceed S32_MAX / PAGE_SIZE. + */ + BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S32_MAX / PAGE_SIZE); + cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL, memcg_hotplug_cpu_dead); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e6e79e9dde40e093cdeafec9352495aa44817acd..16fff0a7f5b2bfbf79e63543a69f371943900ea5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5761,7 +5761,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), - global_zone_page_state(NR_PAGETABLE), + global_node_page_state(NR_PAGETABLE), global_zone_page_state(NR_BOUNCE), global_zone_page_state(NR_FREE_PAGES), free_pcp, @@ -5793,6 +5793,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) #ifdef CONFIG_SHADOW_CALL_STACK " shadow_call_stack:%lukB" #endif + " pagetables:%lukB" " all_unreclaimable? %s" "\n", pgdat->node_id, @@ -5808,16 +5809,16 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(node_page_state(pgdat, NR_WRITEBACK)), K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE - K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR), - K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) - * HPAGE_PMD_NR), - K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), + K(node_page_state(pgdat, NR_SHMEM_THPS)), + K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), + K(node_page_state(pgdat, NR_ANON_THPS)), #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), node_page_state(pgdat, NR_KERNEL_STACK_KB), #ifdef CONFIG_SHADOW_CALL_STACK node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif + K(node_page_state(pgdat, NR_PAGETABLE)), pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? "yes" : "no"); } @@ -5849,7 +5850,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " present:%lukB" " managed:%lukB" " mlocked:%lukB" - " pagetables:%lukB" " bounce:%lukB" " free_pcp:%lukB" " local_pcp:%ukB" @@ -5870,7 +5870,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), - K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), K(this_cpu_read(zone->pageset->pcp.count)), diff --git a/mm/rmap.c b/mm/rmap.c index a5903687399daa9724f80d2f1dedf2f969097f0e..31ae8a8c7456ea5909528e9ec1bf26d4f392d85e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1156,7 +1156,7 @@ void do_page_add_anon_rmap(struct page *page, * disabled. */ if (compound) - __inc_lruvec_page_state(page, NR_ANON_THPS); + __mod_lruvec_page_state(page, NR_ANON_THPS, nr); __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr); } @@ -1198,7 +1198,7 @@ void page_add_new_anon_rmap(struct page *page, if (hpage_pincount_available(page)) atomic_set(compound_pincount_ptr(page), 0); - __inc_lruvec_page_state(page, NR_ANON_THPS); + __mod_lruvec_page_state(page, NR_ANON_THPS, nr); } else { /* Anon THP always mapped first with PMD */ VM_BUG_ON_PAGE(PageTransCompound(page), page); @@ -1223,16 +1223,20 @@ void page_add_file_rmap(struct page *page, bool compound) VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); lock_page_memcg(page); if (compound && PageTransHuge(page)) { - for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { + int nr_pages = thp_nr_pages(page); + + for (i = 0, nr = 0; i < nr_pages; i++) { if (atomic_inc_and_test(&page[i]._mapcount)) nr++; } if (!atomic_inc_and_test(compound_mapcount_ptr(page))) goto out; if (PageSwapBacked(page)) - __inc_node_page_state(page, NR_SHMEM_PMDMAPPED); + __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED, + nr_pages); else - __inc_node_page_state(page, NR_FILE_PMDMAPPED); + __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED, + nr_pages); } else { if (PageTransCompound(page) && page_mapping(page) && !page_dup_slave(page)) { @@ -1265,16 +1269,20 @@ static void page_remove_file_rmap(struct page *page, bool compound) /* page still mapped by someone else? */ if (compound && PageTransHuge(page)) { - for (i = 0, nr = 0; i < thp_nr_pages(page); i++) { + int nr_pages = thp_nr_pages(page); + + for (i = 0, nr = 0; i < nr_pages; i++) { if (atomic_add_negative(-1, &page[i]._mapcount)) nr++; } if (!atomic_add_negative(-1, compound_mapcount_ptr(page))) return; if (PageSwapBacked(page)) - __dec_node_page_state(page, NR_SHMEM_PMDMAPPED); + __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED, + -nr_pages); else - __dec_node_page_state(page, NR_FILE_PMDMAPPED); + __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED, + -nr_pages); } else { if (!atomic_add_negative(-1, &page->_mapcount)) return; @@ -1306,7 +1314,7 @@ static void page_remove_anon_compound_rmap(struct page *page) if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return; - __dec_lruvec_page_state(page, NR_ANON_THPS); + __mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page)); if (TestClearPageDoubleMap(page)) { /* diff --git a/mm/shmem.c b/mm/shmem.c index 4622177acaeb20281ecb77d05735aceb7f4f60e8..bdf8c607e84c99e6960af348eebcd075ca43076b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -719,7 +719,7 @@ static int shmem_add_to_page_cache(struct page *page, } if (PageTransHuge(page)) { count_vm_event(THP_FILE_ALLOC); - __inc_lruvec_page_state(page, NR_SHMEM_THPS); + __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr); } mapping->nrpages += nr; __mod_lruvec_page_state(page, NR_FILE_PAGES, nr); diff --git a/mm/vmstat.c b/mm/vmstat.c index 0b6d3aa29ddf95378310f127383948eb552921ba..602e329e7e59de3822f4eee3176a9e3fc9bb2c77 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1157,7 +1157,6 @@ const char * const vmstat_text[] = { "nr_zone_unevictable", "nr_zone_write_pending", "nr_mlock", - "nr_page_table_pages", "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", @@ -1221,6 +1220,7 @@ const char * const vmstat_text[] = { #ifdef CONFIG_DUPTEXT "nr_duptext", #endif + "nr_page_table_pages", /* enum writeback_stat_item counters */ "nr_dirty_threshold", @@ -1659,8 +1659,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, if (is_zone_first_populated(pgdat, zone)) { seq_printf(m, "\n per-node stats"); for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + unsigned long pages = node_page_state_pages(pgdat, i); + + if (vmstat_item_print_in_thp(i)) + pages /= HPAGE_PMD_NR; seq_printf(m, "\n %-12s %lu", node_stat_name(i), - node_page_state_pages(pgdat, i)); + pages); } } seq_printf(m, @@ -1788,8 +1792,11 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) v += NR_VM_NUMA_STAT_ITEMS; #endif - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { v[i] = global_node_page_state_pages(i); + if (vmstat_item_print_in_thp(i)) + v[i] /= HPAGE_PMD_NR; + } v += NR_VM_NODE_STAT_ITEMS; global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,