diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 936da10c526055c29ff7affd91ce5f83a8c28873..e52cd57bb5127dcc655251c85657c44c1990ace3 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -203,11 +203,22 @@ PMD-mappable transparent hugepage:: cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size The kernel tries to use huge, PMD-mappable page on read page fault for -file exec mapping if CONFIG_READ_ONLY_THP_FOR_FS enabled. It's possible -to enabled the feature by writing 1 or disablt by writing 0:: +if CONFIG_READ_ONLY_THP_FOR_FS enabled, or try non-PMD size page(eg, +64K arm64) for file exec mapping, BIT0 for PMD THP, BIT1 for mTHP. It's +possible to enable/disable it by configurate the corresponding bit:: - echo 0x0 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled echo 0x1 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled + echo 0x2 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled + echo 0x3 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled + +The kernel could try to enable other larger size mappings align other +than THP size, eg, 64K on arm64, BIT0 for file mapping, BIT1 for anon +mapping, it is disabled by default, and could enable this feature by +writing the corresponding bit to 1:: + + echo 0x1 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align + echo 0x2 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align + echo 0x3 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align khugepaged will be automatically started when one or more hugepage sizes are enabled (either by directly setting "always" or "madvise", diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 07948fe59b9d98fb7051818178d583c2d17d3fca..8d68d00de0a4dc903f74e10ee373a686951cd98c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1147,6 +1147,18 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf, */ #define arch_wants_old_prefaulted_pte cpu_has_hw_af +/* + * Request exec memory is read into pagecache in at least 64K folios. The + * trade-off here is performance improvement due to storing translations more + * effciently in the iTLB vs the potential for read amplification due to reading + * data from disk that won't be used. The latter is independent of base page + * size, so we set a page-size independent block size of 64K. This size can be + * contpte-mapped when 4K base pages are in use (16 pages into 1 iTLB entry), + * and HPA can coalesce it (4 pages into 1 TLB entry) when 16K base pages are in + * use. + */ +#define arch_wants_exec_folio_order() ilog2(SZ_64K >> PAGE_SHIFT) + static inline bool pud_sect_supported(void) { return PAGE_SIZE == SZ_4K; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 67b0e2212ca0bf736b8f7b65a7d4efef28e5a18e..27d4eff7994106e4a03dfc92e7dd4d908a47f9d9 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -314,7 +314,8 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb, if (ret <= 0) goto out; - if (ext4_test_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP)) + if (ext4_test_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP) && + iov_iter_count(from) > PAGE_SIZE) ret = ext4_iomap_buffered_write(iocb, from); else ret = generic_perform_write(iocb, from); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 93a9dd03cb5c9c9e22e0ea5de944656c85742ef6..2c0e61f531f1cadc2c64142b3d576670afc02696 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3955,6 +3955,128 @@ static int ext4_iomap_writepages(struct address_space *mapping, return ret; } +static int ext4_iomap_write_begin(struct file *file, + struct address_space *mapping, loff_t pos, + unsigned len, struct page **pagep, + void **fsdata) +{ + struct inode *inode = mapping->host; + struct iomap_iter iter = { + .inode = inode, + .flags = IOMAP_WRITE, + }; + int ret = 0, retries = 0; + struct folio *folio; + bool delalloc; + + if (unlikely(ext4_forced_shutdown(inode->i_sb))) + return -EIO; + + trace_ext4_iomap_write_begin(inode, pos, len); + + delalloc = test_opt(inode->i_sb, DELALLOC) && + !ext4_nonda_switch(inode->i_sb); + *fsdata = delalloc ? (void *)0 : (void *)FALL_BACK_TO_NONDELALLOC; + +retry: + iter.pos = pos; + iter.len = len; + + folio = iomap_get_folio(&iter, pos, len); + if (IS_ERR(folio)) + return PTR_ERR(folio); + + WARN_ON_ONCE(pos + len > folio_pos(folio) + folio_size(folio)); + + if (folio_test_dirty(folio) && (i_blocks_per_folio(inode, folio) == 1)) + goto out; + + do { + int length; + + ret = __ext4_iomap_buffered_io_begin(inode, iter.pos, iter.len, + iter.flags, &iter.iomap, NULL, delalloc); + if (ret) + goto out; + + WARN_ON_ONCE(iter.iomap.offset > iter.pos); + WARN_ON_ONCE(iter.iomap.length == 0); + WARN_ON_ONCE(iter.iomap.offset + iter.iomap.length <= iter.pos); + + length = iomap_length(&iter); + ret = __iomap_write_begin(&iter, iter.pos, length, folio); + if (ret) + goto out; + + iter.pos += length; + iter.len -= length; + } while (iter.len); + +out: + if (ret < 0) { + folio_unlock(folio); + folio_put(folio); + + /* + * __ext4_iomap_buffered_io_begin() may have instantiated + * a few blocks outside i_size. Trim these off again. Don't + * need i_size_read because we hold inode lock. + */ + if (pos + len > inode->i_size) + ext4_truncate_failed_write(inode); + + if (ret == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + } + + *pagep = folio_file_page(folio, pos >> PAGE_SHIFT); + return ret; +} + +static int ext4_iomap_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = mapping->host; + int write_mode = (int)(unsigned long)fsdata; + struct folio *folio = page_folio(page); + loff_t old_size = inode->i_size; + size_t written; + + trace_ext4_iomap_write_end(inode, pos, len, copied); + + written = __iomap_write_end(inode, pos, len, copied, folio) ? + copied : 0; + + /* + * Update the in-memory inode size after copying the data into + * the page cache. It's important to update i_size while still + * holding folio lock, because folio writeout could otherwise + * come in and zero beyond i_size. + */ + if (pos + written > old_size) + i_size_write(inode, pos + written); + + folio_unlock(folio); + folio_put(folio); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + + /* + * For delalloc, if we have pre-allocated more blocks and copied + * less, we will have delalloc extents allocated outside i_size, + * drop pre-allocated blocks that were not used, prevent the + * write back path from allocating blocks for them. + */ + if (unlikely(!written) && write_mode != FALL_BACK_TO_NONDELALLOC) + ext4_truncate_failed_write(inode); + + return written; +} + /* * For data=journal mode, folio should be marked dirty only when it was * writeably mapped. When that happens, it was already attached to the @@ -4048,6 +4170,8 @@ static const struct address_space_operations ext4_iomap_aops = { .read_folio = ext4_iomap_read_folio, .readahead = ext4_iomap_readahead, .writepages = ext4_iomap_writepages, + .write_begin = ext4_iomap_write_begin, + .write_end = ext4_iomap_write_end, .dirty_folio = iomap_dirty_folio, .bmap = ext4_bmap, .invalidate_folio = iomap_invalidate_folio, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 292143ce354c3ef10e7088d562cebac81a527eb5..0ef4b804e18eff9ef5677d4d9418884f07ff46a1 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1738,7 +1738,8 @@ enum { Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, - Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan, Opt_buffered_iomap, + Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan, + Opt_buffered_iomap, Opt_nobuffered_iomap, Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type, #ifdef CONFIG_EXT4_DEBUG Opt_fc_debug_max_replay, Opt_fc_debug_force @@ -1882,6 +1883,7 @@ static const struct fs_parameter_spec ext4_param_specs[] = { Opt_no_prefetch_block_bitmaps), fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan), fsparam_flag ("buffered_iomap", Opt_buffered_iomap), + fsparam_flag ("nobuffered_iomap", Opt_nobuffered_iomap), fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */ fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */ fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */ @@ -1978,6 +1980,8 @@ static const struct mount_opts { MOPT_SET}, {Opt_buffered_iomap, EXT4_MOUNT2_BUFFERED_IOMAP, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, + {Opt_nobuffered_iomap, EXT4_MOUNT2_BUFFERED_IOMAP, + MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY}, #ifdef CONFIG_EXT4_DEBUG {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, @@ -2464,11 +2468,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } return 0; - case Opt_buffered_iomap: - ext4_msg(NULL, KERN_WARNING, - "iomap for buffered enabled. Warning: EXPERIMENTAL, use at your own risk"); - ctx_set_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP); - return 0; } /* @@ -2908,12 +2907,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc, !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) { goto fail_dax_change_remount; } - - if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP) && - !test_opt2(sb, BUFFERED_IOMAP)) { - ext4_msg(NULL, KERN_ERR, "can't enable iomap for buffered IO on remount"); - return -EINVAL; - } } return ext4_check_quota_consistency(fc, sb); @@ -4481,6 +4474,10 @@ static void ext4_set_def_opts(struct super_block *sb, if (sb->s_blocksize == PAGE_SIZE) set_opt(sb, DIOREAD_NOLOCK); + + /* Use iomap for buffered IO path on 4k pagesize */ + if (PAGE_SIZE == SZ_4K) + set_opt2(sb, BUFFERED_IOMAP); } static int ext4_handle_clustersize(struct super_block *sb) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fee0bb9b5d7583c6118986d732a2cac496db1f3a..dbd56c36ce2b9e2c57fecefa81212b89ea9195ca 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -665,7 +665,7 @@ static int iomap_read_folio_sync(loff_t block_start, struct folio *folio, return submit_bio_wait(&bio); } -static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, +int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, size_t len, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); @@ -727,6 +727,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, return 0; } +EXPORT_SYMBOL_GPL(__iomap_write_begin); static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len) @@ -825,7 +826,7 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, return status; } -static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, +bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct folio *folio) { flush_dcache_folio(folio); @@ -848,6 +849,7 @@ static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, filemap_dirty_folio(inode->i_mapping, folio); return true; } +EXPORT_SYMBOL_GPL(__iomap_write_end); static void iomap_write_end_inline(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t copied) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index abf2340a2d18a646fa37c14c86d63a9fb74b1d71..8fdf17e80359cfa7328e6e214bf1b91f18b3c118 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -51,6 +51,9 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, TRANSPARENT_HUGEPAGE_FILE_EXEC_THP_FLAG, + TRANSPARENT_HUGEPAGE_FILE_EXEC_MTHP_FLAG, + TRANSPARENT_HUGEPAGE_FILE_MAPPING_ALIGN_FLAG, + TRANSPARENT_HUGEPAGE_ANON_MAPPING_ALIGN_FLAG, }; struct kobject; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 6fc1c858013d1e4dda4ed38fa4083acf25d16d36..daf0a86ba37789d80cdd1d9d68a4553320fb81fb 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -258,6 +258,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos, + size_t len, struct folio *folio); +bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, + size_t copied, struct folio *folio); int iomap_file_buffered_write_punch_delalloc(struct inode *inode, struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, int (*punch)(struct inode *inode, loff_t pos, loff_t length)); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ecc561d49d5b000219609454c1f969df9ceaec24..a0fafb8e7005a18b7dcec6d6a9b4d1e861d8ec0d 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -435,6 +435,18 @@ static inline bool arch_has_hw_pte_young(void) } #endif +#ifndef arch_wants_exec_folio_order +/* + * Returns preferred minimum folio order for executable file-backed memory. Must + * be in range [0, PMD_ORDER]. Negative value implies that the HW has no + * preference and mm will not special-case executable memory in the pagecache. + */ +static inline int arch_wants_exec_folio_order(void) +{ + return -1; +} +#endif + #ifndef arch_check_zapped_pte static inline void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte) diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 588991b57c127e804f1fecfba3ea861777eeebc2..d500568daeb1ecb9251d702cba22f284276f3a5e 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -389,6 +389,13 @@ DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, TP_ARGS(inode, pos, len) ); +DEFINE_EVENT(ext4__write_begin, ext4_iomap_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len), + + TP_ARGS(inode, pos, len) +); + DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), @@ -441,6 +448,14 @@ DEFINE_EVENT(ext4__write_end, ext4_da_write_end, TP_ARGS(inode, pos, len, copied) ); +DEFINE_EVENT(ext4__write_end, ext4_iomap_write_end, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied) +); + TRACE_EVENT(ext4_writepages, TP_PROTO(struct inode *inode, struct writeback_control *wbc), diff --git a/mm/filemap.c b/mm/filemap.c index a274d2c5e232f97671a78af6135737675546e33b..d3c813429bf212487d30f74f62729fe192784b6b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -3141,6 +3142,10 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio, (transparent_hugepage_flags & \ (1<= 0) { + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + ra->size = 1UL << order; + ra->async_size = 0; + ractl._index &= ~((unsigned long)ra->size - 1); + page_cache_ra_order(&ractl, ra, order); + return fpin; + } + } #endif /* If we don't want any read-ahead, don't bother */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0c61e7c7c2c13cc881455463dbdf35e0711f90e3..8cb3e014a881fde45f3424aca7f63ac257790f06 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -426,30 +426,106 @@ static struct kobj_attribute hpage_pmd_size_attr = __ATTR_RO(hpage_pmd_size); #ifdef CONFIG_READ_ONLY_THP_FOR_FS +#define FILE_EXEC_THP_ENABLE BIT(0) +#else +#define FILE_EXEC_THP_ENABLE 0 +#endif + +#define FILE_EXEC_MTHP_ENABLE BIT(1) +#define FILE_EXEC_THP_ALL (FILE_EXEC_THP_ENABLE | FILE_EXEC_MTHP_ENABLE) + +static void thp_flag_set(enum transparent_hugepage_flag flag, bool enable) +{ + if (enable) + set_bit(flag, &transparent_hugepage_flags); + else + clear_bit(flag, &transparent_hugepage_flags); +} + static ssize_t thp_exec_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return single_hugepage_flag_show(kobj, attr, buf, - TRANSPARENT_HUGEPAGE_FILE_EXEC_THP_FLAG); + unsigned long val = 0; + +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + if (test_bit(TRANSPARENT_HUGEPAGE_FILE_EXEC_THP_FLAG, + &transparent_hugepage_flags)) + val |= FILE_EXEC_THP_ENABLE; +#endif + + if (test_bit(TRANSPARENT_HUGEPAGE_FILE_EXEC_MTHP_FLAG, + &transparent_hugepage_flags)) + val |= FILE_EXEC_MTHP_ENABLE; + + return sysfs_emit(buf, "0x%lx\n", val); } static ssize_t thp_exec_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - size_t ret = single_hugepage_flag_store(kobj, attr, buf, count, - TRANSPARENT_HUGEPAGE_FILE_EXEC_THP_FLAG); - if (ret > 0) { - int err = start_stop_khugepaged(); + unsigned long val; + int ret; - if (err) - ret = err; - } + ret = kstrtoul(buf, 16, &val); + if (ret < 0) + return ret; + if (val & ~FILE_EXEC_THP_ALL) + return -EINVAL; - return ret; +#ifdef CONFIG_READ_ONLY_THP_FOR_FS + thp_flag_set(TRANSPARENT_HUGEPAGE_FILE_EXEC_THP_FLAG, + val & FILE_EXEC_THP_ENABLE); + ret = start_stop_khugepaged(); + if (ret) + return ret; +#endif + thp_flag_set(TRANSPARENT_HUGEPAGE_FILE_EXEC_MTHP_FLAG, + val & FILE_EXEC_MTHP_ENABLE); + + return count; } static struct kobj_attribute thp_exec_enabled_attr = __ATTR_RW(thp_exec_enabled); -#endif +#define FILE_MAPPING_ALIGN BIT(0) +#define ANON_MAPPING_ALIGN BIT(1) +#define THP_MAPPING_ALIGN_ALL (FILE_MAPPING_ALIGN | ANON_MAPPING_ALIGN) + +static ssize_t thp_mapping_align_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + unsigned long val = 0; + + if (test_bit(TRANSPARENT_HUGEPAGE_FILE_MAPPING_ALIGN_FLAG, + &transparent_hugepage_flags)) + val |= FILE_MAPPING_ALIGN; + + if (test_bit(TRANSPARENT_HUGEPAGE_ANON_MAPPING_ALIGN_FLAG, + &transparent_hugepage_flags)) + val |= ANON_MAPPING_ALIGN; + + return sysfs_emit(buf, "0x%lx\n", val); +} +static ssize_t thp_mapping_align_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned long val; + int ret; + + ret = kstrtoul(buf, 16, &val); + if (ret < 0) + return ret; + if (val & ~THP_MAPPING_ALIGN_ALL) + return -EINVAL; + + thp_flag_set(TRANSPARENT_HUGEPAGE_FILE_MAPPING_ALIGN_FLAG, + val & FILE_MAPPING_ALIGN); + thp_flag_set(TRANSPARENT_HUGEPAGE_ANON_MAPPING_ALIGN_FLAG, + val & ANON_MAPPING_ALIGN); + + return count; +} +static struct kobj_attribute thp_mapping_align_attr = + __ATTR_RW(thp_mapping_align); static struct attribute *hugepage_attr[] = { &enabled_attr.attr, @@ -459,9 +535,8 @@ static struct attribute *hugepage_attr[] = { #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, #endif -#ifdef CONFIG_READ_ONLY_THP_FOR_FS &thp_exec_enabled_attr.attr, -#endif + &thp_mapping_align_attr.attr, NULL, }; @@ -853,6 +928,65 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, return ret; } +#define transparent_hugepage_file_mapping_align_enabled() \ + (transparent_hugepage_flags & \ + (1<f_mapping; + if (!mapping || !mapping_large_folio_support(mapping)) + return false; + + return true; +} + +static bool anon_mapping_align_enabled(int order) +{ + unsigned long mask; + + if (!transparent_hugepage_anon_mapping_align_enabled()) + return 0; + + mask = READ_ONCE(huge_anon_orders_always) | + READ_ONCE(huge_anon_orders_madvise); + + if (hugepage_global_enabled()) + mask |= READ_ONCE(huge_anon_orders_inherit); + + mask = BIT(order) & mask; + if (!mask) + return false; + + return true; +} + +static unsigned long folio_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + int order = arch_wants_exec_folio_order(); + + if (order < 0) + return 0; + + if (file_mapping_align_enabled(filp) || anon_mapping_align_enabled(order)) + return __thp_get_unmapped_area(filp, addr, len, pgoff, flags, + PAGE_SIZE << order); + return 0; +} + unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -863,6 +997,10 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, if (ret) return ret; + ret = folio_get_unmapped_area(filp, addr, len, off, flags); + if (ret) + return ret; + return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); } EXPORT_SYMBOL_GPL(thp_get_unmapped_area);