diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 2bfb380e8380a8e4d3a3efd3e13a8f7c07c51cfb..1038ff8e184d0c4bc90293631e1011f94bea08d4 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -160,6 +160,15 @@ library) may want to know the size (in bytes) of a transparent hugepage:: cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size +If CONFIG_THP_NUMA_CONTROL is on, user can control THP migration when +do numa balancing, 0 is default which means keep the default behavior, +writing 1 will disable thp migrate while tasks still have chance to +migrate, writing 2 will skip THP totally from numa balancing:: + + echo 0 > /sys/kernel/mm/transparent_hugepage/numa_control + echo 1 > /sys/kernel/mm/transparent_hugepage/numa_control + echo 2 > /sys/kernel/mm/transparent_hugepage/numa_control + khugepaged will be automatically started when transparent_hugepage/enabled is set to "always" or "madvise, and it'll be automatically shutdown if it's set to "never". diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cae54a9bf65df9dcd099bfac63777e2b4b6eadc1..bde9ec4af773cb4c9be8211cb840ff4bec66af6d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -216,6 +216,7 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_LIVEPATCH_WO_FTRACE + select THP_NUMA_CONTROL if ARM64_64K_PAGES help ARM 64-bit (AArch64) Linux support. diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index efb370e79ac3a7a215d5722e93bbbc35ce08d7eb..c2bf15d2d9692b1eef7c66eb666878a9081af3a0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -498,6 +498,30 @@ static inline unsigned long thp_size(struct page *page) return PAGE_SIZE << thp_order(page); } +#ifdef CONFIG_THP_NUMA_CONTROL +#define THP_DISABLE_NUMA_MIGRATE 1 +#define THP_DISABLE_AUTONUMA 2 +extern unsigned long thp_numa_control; +static inline bool thp_numa_migrate_disabled(void) +{ + return thp_numa_control == THP_DISABLE_NUMA_MIGRATE; +} + +static inline bool thp_autonuma_disabled(void) +{ + return thp_numa_control == THP_DISABLE_AUTONUMA; +} +#else +static inline bool thp_numa_migrate_disabled(void) +{ + return false; +} + +static inline bool thp_numa_migrate_disabled(void) +{ + return false; +} +#endif /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to diff --git a/mm/Kconfig b/mm/Kconfig index ccbad233f2b1e73726b6571e6affdd85c0058a01..cc43f5124cb389e1687a4c2f4a2083617c2c1926 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1038,6 +1038,16 @@ config NUMABALANCING_MEM_SAMPLING if unsure, say N to disable the NUMABALANCING_MEM_SAMPLING. +config THP_NUMA_CONTROL + bool "Control THP migration when numa balancing" + depends on NUMA_BALANCING && TRANSPARENT_HUGEPAGE + default n + help + Sometimes migrate THP is not beneficial, for example, when 64K page + size is set on ARM64, THP will be 512M, migration will be expensive. + This featrue add a switch to control the behavior of THP migration + when do numa balancing. + source "mm/damon/Kconfig" endmenu diff --git a/mm/huge_memory.c b/mm/huge_memory.c index eb293d17a1049f37c7286e93e822aec5911a531a..332f712906e171c345fa8679761e4df368f39e82 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -316,6 +316,36 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj, static struct kobj_attribute hpage_pmd_size_attr = __ATTR_RO(hpage_pmd_size); +#ifdef CONFIG_THP_NUMA_CONTROL +unsigned long thp_numa_control; + +static ssize_t numa_control_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", thp_numa_control); +} + +static ssize_t numa_control_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned long value; + int ret; + + ret = kstrtoul(buf, 10, &value); + if (ret < 0) + return ret; + if (value > THP_DISABLE_AUTONUMA) + return -EINVAL; + + thp_numa_control = value; + + return count; +} + +static struct kobj_attribute numa_control_attr = + __ATTR(numa_control, 0644, numa_control_show, numa_control_store); +#endif + static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, @@ -323,6 +353,9 @@ static struct attribute *hugepage_attr[] = { &hpage_pmd_size_attr.attr, #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, +#endif +#ifdef CONFIG_THP_NUMA_CONTROL + &numa_control_attr.attr, #endif NULL, }; @@ -1743,6 +1776,9 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (prot_numa && !thp_migration_supported()) return 1; + if (prot_numa && thp_autonuma_disabled()) + return 1; + ptl = __pmd_trans_huge_lock(pmd, vma); if (!ptl) return 0; diff --git a/mm/mem_sampling.c b/mm/mem_sampling.c index 1d8a831be531d473317db6fc1c05b494bdd541d6..ffc0e4964cadec0461d7bdf6fbb08f60198b2005 100644 --- a/mm/mem_sampling.c +++ b/mm/mem_sampling.c @@ -145,6 +145,9 @@ static inline void do_thp_numa_access(struct mm_struct *mm, pmd_t *pmd, pmde; spinlock_t *ptl; + if (thp_autonuma_disabled()) + return; + pgd = pgd_offset(mm, vaddr); if (!pgd_present(*pgd)) return; diff --git a/mm/migrate.c b/mm/migrate.c index 3f5b217d5af14b6f9af61af9b8608b2d0c11637e..faaa7b790da01c4249e4a26b176019751f47035c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2157,6 +2157,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, */ compound = PageTransHuge(page); + if (compound && thp_numa_migrate_disabled()) + return 0; + if (compound) new = alloc_misplaced_dst_page_thp; else