From ac589cc553455bb1dae85f3dab7c225236e5880c Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Wed, 24 Jan 2024 09:39:20 +0800 Subject: [PATCH 1/3] mm: memcg: add THP swap out info for anonymous reclaim mainline inclusion from mainline-v6.7-rc1 commit 811244a501b967b00fecb1ae906d5dc6329c91e0 category: other bugzilla: https://gitee.com/openeuler/kernel/issues/I8YBMI Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=811244a501b967b00fecb1ae906d5dc6329c91e0 -------------------------------- At present, we support per-memcg reclaim strategy, however we do not know the number of transparent huge pages being reclaimed, as we know the transparent huge pages need to be splited before reclaim them, and they will bring some performance bottleneck effect. for example, when two memcg (A & B) are doing reclaim for anonymous pages at same time, and 'A' memcg is reclaiming a large number of transparent huge pages, we can better analyze that the performance bottleneck will be caused by 'A' memcg. therefore, in order to better analyze such problems, there add THP swap out info for per-memcg. [akpm@linux-foundation.orgL fix swap_writepage_fs(), per Johannes] Link: https://lkml.kernel.org/r/20230913213343.GB48476@cmpxchg.org Link: https://lkml.kernel.org/r/20230913164938.16918-1-vernhao@tencent.com Signed-off-by: Xin Hao Suggested-by: Johannes Weiner Acked-by: Johannes Weiner Cc: Michal Hocko Cc: Roman Gushchin Cc: Shakeel Butt Cc: Muchun Song Signed-off-by: Andrew Morton Signed-off-by: ZhangPeng --- Documentation/admin-guide/cgroup-v2.rst | 9 +++++++++ mm/memcontrol.c | 2 ++ mm/page_io.c | 8 ++++---- mm/vmscan.c | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 84cbbeaf0d78c..e998aa071cedf 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1534,6 +1534,15 @@ PAGE_SIZE multiple when read back. collapsing an existing range of pages. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE is not set. + thp_swpout (npn) + Number of transparent hugepages which are swapout in one piece + without splitting. + + thp_swpout_fallback (npn) + Number of transparent hugepages which were split before swapout. + Usually because failed to allocate some continuous swap space + for the huge page. + memory.numa_stat A read-only nested-keyed file which exists on non-root cgroups. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8327cef9d53a9..346be82922947 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -731,6 +731,8 @@ static const unsigned int memcg_vm_event_stat[] = { #ifdef CONFIG_TRANSPARENT_HUGEPAGE THP_FAULT_ALLOC, THP_COLLAPSE_ALLOC, + THP_SWPOUT, + THP_SWPOUT_FALLBACK, #endif }; diff --git a/mm/page_io.c b/mm/page_io.c index 8fa62c6ba517c..af65db0be7310 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -208,8 +208,10 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) static inline void count_swpout_vm_event(struct folio *folio) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (unlikely(folio_test_pmd_mappable(folio))) + if (unlikely(folio_test_pmd_mappable(folio))) { + count_memcg_folio_events(folio, THP_SWPOUT, 1); count_vm_event(THP_SWPOUT); + } #endif count_vm_events(PSWPOUT, folio_nr_pages(folio)); } @@ -278,9 +280,6 @@ static void sio_write_complete(struct kiocb *iocb, long ret) set_page_dirty(page); ClearPageReclaim(page); } - } else { - for (p = 0; p < sio->pages; p++) - count_swpout_vm_event(page_folio(sio->bvec[p].bv_page)); } for (p = 0; p < sio->pages; p++) @@ -296,6 +295,7 @@ static void swap_writepage_fs(struct page *page, struct writeback_control *wbc) struct file *swap_file = sis->swap_file; loff_t pos = page_file_offset(page); + count_swpout_vm_event(page_folio(page)); set_page_writeback(page); unlock_page(page); if (wbc->swap_plug) diff --git a/mm/vmscan.c b/mm/vmscan.c index b36d839e972ac..06d41011958c9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1919,6 +1919,7 @@ static unsigned int shrink_folio_list(struct list_head *folio_list, folio_list)) goto activate_locked; #ifdef CONFIG_TRANSPARENT_HUGEPAGE + count_memcg_folio_events(folio, THP_SWPOUT_FALLBACK, 1); count_vm_event(THP_SWPOUT_FALLBACK); #endif if (!add_to_swap(folio)) -- Gitee From b168928248d2d2d0bc0c5a5fd93a377600073d2d Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 24 Jan 2024 09:39:21 +0800 Subject: [PATCH 2/3] mm/vmstat: move pgdemote_* to per-node stats mainline inclusion from mainline-v6.8-rc1 commit 23e9f0138963ceef2a252d887534923a0502b2da category: other bugzilla: https://gitee.com/openeuler/kernel/issues/I8YBMI Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=23e9f0138963ceef2a252d887534923a0502b2da -------------------------------- Demotion will migrate pages across nodes. Previously, only the global demotion statistics were accounted for. Changed them to per-node statistics, making it easier to observe where demotion occurs on each node. This will help to identify which nodes are under pressure. This patch also make pgdemote_* behind CONFIG_NUMA_BALANCING, since demotion is not available for !CONFIG_NUMA_BALANCING With this patch, here is a sample where node0 node1 are DRAM, node3 is PMEM: Global stats: $ grep demote /proc/vmstat pgdemote_kswapd 254288 pgdemote_direct 113497 pgdemote_khugepaged 0 Per-node stats: $ grep demote /sys/devices/system/node/node0/vmstat # demotion source pgdemote_kswapd 68454 pgdemote_direct 83431 pgdemote_khugepaged 0 $ grep demote /sys/devices/system/node/node1/vmstat # demotion source pgdemote_kswapd 185834 pgdemote_direct 30066 pgdemote_khugepaged 0 $ grep demote /sys/devices/system/node/node3/vmstat # demotion target pgdemote_kswapd 0 pgdemote_direct 0 pgdemote_khugepaged 0 Link: https://lkml.kernel.org/r/20231103031450.1456523-1-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Acked-by: "Huang, Ying" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: ZhangPeng --- include/linux/mmzone.h | 4 ++++ include/linux/vm_event_item.h | 3 --- mm/vmscan.c | 12 ++++++++---- mm/vmstat.c | 6 +++--- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d055148f47add..cefb93d7d2283 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -206,6 +206,10 @@ enum node_stat_item { #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ PGPROMOTE_CANDIDATE, /* candidate pages to promote */ + /* PGDEMOTE_*: pages demoted */ + PGDEMOTE_KSWAPD, + PGDEMOTE_DIRECT, + PGDEMOTE_KHUGEPAGED, #endif NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 8abfa12400400..d1b847502f09c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -41,9 +41,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSTEAL_KHUGEPAGED, - PGDEMOTE_KSWAPD, - PGDEMOTE_DIRECT, - PGDEMOTE_KHUGEPAGED, PGSCAN_KSWAPD, PGSCAN_DIRECT, PGSCAN_KHUGEPAGED, diff --git a/mm/vmscan.c b/mm/vmscan.c index 06d41011958c9..f85742cfff072 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1114,12 +1114,14 @@ void drop_slab(void) static int reclaimer_offset(void) { +#ifdef CONFIG_NUMA_BALANCING BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD); - BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != - PGSCAN_DIRECT - PGSCAN_KSWAPD); BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD); +#endif + BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != + PGSCAN_DIRECT - PGSCAN_KSWAPD); BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD); @@ -1681,8 +1683,10 @@ static unsigned int demote_folio_list(struct list_head *demote_folios, migrate_pages(demote_folios, alloc_demote_folio, NULL, (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); - - __count_vm_events(PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded); +#ifdef CONFIG_NUMA_BALANCING + mod_node_page_state(pgdat, PGDEMOTE_KSWAPD + reclaimer_offset(), + nr_succeeded); +#endif return nr_succeeded; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 2f716ad141682..0c56a3db0957d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1244,6 +1244,9 @@ const char * const vmstat_text[] = { #ifdef CONFIG_NUMA_BALANCING "pgpromote_success", "pgpromote_candidate", + "pgdemote_kswapd", + "pgdemote_direct", + "pgdemote_khugepaged", #endif /* enum writeback_stat_item counters */ @@ -1275,9 +1278,6 @@ const char * const vmstat_text[] = { "pgsteal_kswapd", "pgsteal_direct", "pgsteal_khugepaged", - "pgdemote_kswapd", - "pgdemote_direct", - "pgdemote_khugepaged", "pgscan_kswapd", "pgscan_direct", "pgscan_khugepaged", -- Gitee From f6032e524808ebe531f74ef5a9d80d820e54afcb Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 24 Jan 2024 09:39:22 +0800 Subject: [PATCH 3/3] mm/vmstat: move pgdemote_* out of CONFIG_NUMA_BALANCING mainline inclusion from mainline-v6.8-rc1 commit b805ab3c6935d14654ccc28f16ffce7a13c2c528 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8YBMI Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b805ab3c6935d14654ccc28f16ffce7a13c2c528 -------------------------------- Demotion can work well without CONFIG_NUMA_BALANCING. But the commit 23e9f0138963 ("mm/vmstat: move pgdemote_* to per-node stats") wrongly hid it behind CONFIG_NUMA_BALANCING. Fix it by moving them out of CONFIG_NUMA_BALANCING. Link: https://lkml.kernel.org/r/20231229022651.3229174-1-lizhijian@fujitsu.com Fixes: 23e9f0138963 ("mm/vmstat: move pgdemote_* to per-node stats") Signed-off-by: Li Zhijian Cc: "Huang, Ying" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: ZhangPeng --- include/linux/mmzone.h | 2 +- mm/vmscan.c | 5 +---- mm/vmstat.c | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index cefb93d7d2283..14e48764dcfb0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -206,11 +206,11 @@ enum node_stat_item { #ifdef CONFIG_NUMA_BALANCING PGPROMOTE_SUCCESS, /* promote successfully */ PGPROMOTE_CANDIDATE, /* candidate pages to promote */ +#endif /* PGDEMOTE_*: pages demoted */ PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, -#endif NR_VM_NODE_STAT_ITEMS }; diff --git a/mm/vmscan.c b/mm/vmscan.c index f85742cfff072..3b670b1d2b611 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1114,12 +1114,10 @@ void drop_slab(void) static int reclaimer_offset(void) { -#ifdef CONFIG_NUMA_BALANCING BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD); BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD); -#endif BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD != PGSCAN_DIRECT - PGSCAN_KSWAPD); BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD != @@ -1683,10 +1681,9 @@ static unsigned int demote_folio_list(struct list_head *demote_folios, migrate_pages(demote_folios, alloc_demote_folio, NULL, (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION, &nr_succeeded); -#ifdef CONFIG_NUMA_BALANCING + mod_node_page_state(pgdat, PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded); -#endif return nr_succeeded; } diff --git a/mm/vmstat.c b/mm/vmstat.c index 0c56a3db0957d..f1770efc8be05 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1244,10 +1244,10 @@ const char * const vmstat_text[] = { #ifdef CONFIG_NUMA_BALANCING "pgpromote_success", "pgpromote_candidate", +#endif "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", -#endif /* enum writeback_stat_item counters */ "nr_dirty_threshold", -- Gitee