diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3b59ac862ff8bc2423e4d3080c3d23e7a7609e97..52b8c9d7536efd2dcc66ba2638b08495a2cafe83 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4567,6 +4567,16 @@ static long ext4_zero_range(struct file *file, loff_t offset, new_size, flags); if (ret) return ret; + + ret = filemap_write_and_wait_range(file->f_mapping, + round_down(offset, 1 << blkbits), offset); + if (ret) + return ret; + + ret = filemap_write_and_wait_range(file->f_mapping, offset + len, + round_up((offset + len), 1 << blkbits)); + if (ret) + return ret; } ret = ext4_update_disksize_before_punch(inode, offset, len); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5082e11727f939b63ec4f1dc981759c7c10138f4..108e76e5becd9cc6e35922415d61b5cdcab663ab 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4022,6 +4022,7 @@ static const struct address_space_operations ext4_iomap_aops = { .bmap = ext4_bmap, .invalidate_folio = iomap_invalidate_folio, .release_folio = iomap_release_folio, + .direct_IO = noop_direct_IO, .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_folio = generic_error_remove_folio, diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index d49c0b88597f92f7a785a53be79d8195eb6326e8..26656d3d8e90942b685ddfdfb519002a7e43c7bd 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -545,6 +545,35 @@ mext_check_arguments(struct inode *orig_inode, return 0; } +/* + * Disable buffered iomap path for the inode that requiring move extents, + * fallback to buffer_head path. + */ +static int ext4_disable_buffered_iomap_aops(struct inode *inode) +{ + int err; + + /* + * The buffered_head aops don't know how to handle folios + * dirtied by iomap, so before falling back, flush all dirty + * folios the inode has. + */ + filemap_invalidate_lock(inode->i_mapping); + err = filemap_write_and_wait(inode->i_mapping); + if (err < 0) { + filemap_invalidate_unlock(inode->i_mapping); + return err; + } + truncate_inode_pages(inode->i_mapping, 0); + + ext4_clear_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP); + mapping_clear_large_folios(inode->i_mapping); + ext4_set_aops(inode); + filemap_invalidate_unlock(inode->i_mapping); + + return 0; +} + /** * ext4_move_extents - Exchange the specified range of a file * @@ -609,13 +638,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, return -EOPNOTSUPP; } - if (ext4_test_inode_state(orig_inode, EXT4_STATE_BUFFERED_IOMAP) || - ext4_test_inode_state(donor_inode, EXT4_STATE_BUFFERED_IOMAP)) { - ext4_msg(orig_inode->i_sb, KERN_ERR, - "Online defrag not supported for inode with iomap buffered IO path"); - return -EOPNOTSUPP; - } - /* Protect orig and donor inodes against a truncate */ lock_two_nondirectories(orig_inode, donor_inode); @@ -623,6 +645,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, inode_dio_wait(orig_inode); inode_dio_wait(donor_inode); + /* Fallback to buffer_head aops for inodes with buffered iomap aops */ + if (ext4_test_inode_state(orig_inode, EXT4_STATE_BUFFERED_IOMAP)) { + ret = ext4_disable_buffered_iomap_aops(orig_inode); + if (ret) + goto out_unlock; + } + if (ext4_test_inode_state(donor_inode, EXT4_STATE_BUFFERED_IOMAP)) { + ret = ext4_disable_buffered_iomap_aops(donor_inode); + if (ret) + goto out_unlock; + } + /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ @@ -706,6 +740,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk, ext4_free_ext_path(path); ext4_double_up_write_data_sem(orig_inode, donor_inode); +out_unlock: unlock_two_nondirectories(orig_inode, donor_inode); return ret; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 00591af033facf14bd8398a205f5a458bb4f63a8..a58f21a9c877fe9cda142f62dbd2104201a6bc5f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1712,8 +1712,7 @@ enum { Opt_dioread_nolock, Opt_dioread_lock, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache, - Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan, - Opt_buffered_iomap, Opt_nobuffered_iomap, + Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan, Opt_buffered_iomap, Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type, #ifdef CONFIG_EXT4_DEBUG Opt_fc_debug_max_replay, Opt_fc_debug_force @@ -1857,7 +1856,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = { Opt_no_prefetch_block_bitmaps), fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan), fsparam_flag ("buffered_iomap", Opt_buffered_iomap), - fsparam_flag ("nobuffered_iomap", Opt_nobuffered_iomap), fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */ fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */ fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */ @@ -1954,8 +1952,6 @@ static const struct mount_opts { MOPT_SET}, {Opt_buffered_iomap, EXT4_MOUNT2_BUFFERED_IOMAP, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, - {Opt_nobuffered_iomap, EXT4_MOUNT2_BUFFERED_IOMAP, - MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY}, #ifdef CONFIG_EXT4_DEBUG {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT, MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, @@ -2442,6 +2438,11 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) return -EINVAL; } return 0; + case Opt_buffered_iomap: + ext4_msg(NULL, KERN_WARNING, + "buffered iomap enabled. Waring: EXPERIMENTAL, use at your own risk"); + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP); + return 0; } /* @@ -2829,10 +2830,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc, return -EINVAL; } - if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP)) - ext4_msg(NULL, KERN_WARNING, - "Warning: mounting with an experimental option 'buffered_iomap'"); - err = ext4_check_test_dummy_encryption(fc, sb); if (err) return err; @@ -2876,6 +2873,12 @@ static int ext4_check_opt_consistency(struct fs_context *fc, !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) { goto fail_dax_change_remount; } + + if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP) && + !test_opt2(sb, BUFFERED_IOMAP)) { + ext4_msg(NULL, KERN_ERR, "can't enable buffered iomap while remounting"); + return -EINVAL; + } } return ext4_check_quota_consistency(fc, sb); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 2f91ac685a4d28871103cb6798a795d3481083cb..f41266202776cd06d0c3256d2e6e5c82115fa1e4 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -135,11 +135,14 @@ static void ifs_clear_range_dirty(struct folio *folio, { struct inode *inode = folio->mapping->host; unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); - unsigned int first_blk = (off >> inode->i_blkbits); - unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; - unsigned int nr_blks = last_blk - first_blk + 1; + unsigned int first_blk = DIV_ROUND_UP(off, i_blocksize(inode)); + unsigned int last_blk = (off + len) >> inode->i_blkbits; + unsigned int nr_blks = last_blk - first_blk; unsigned long flags; + if (!nr_blks) + return; + spin_lock_irqsave(&ifs->state_lock, flags); bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks); spin_unlock_irqrestore(&ifs->state_lock, flags); @@ -626,6 +629,8 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) WARN_ON_ONCE(folio_test_writeback(folio)); folio_cancel_dirty(folio); ifs_free(folio); + } else { + iomap_clear_range_dirty(folio, offset, len); } } EXPORT_SYMBOL_GPL(iomap_invalidate_folio); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 623159bc6cc8d8411e83d5c915d71877d20d09bf..28155f6b75ee74a7116d4ea739bad0a13b4e1940 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -391,6 +391,20 @@ static inline void mapping_set_large_folios(struct address_space *mapping) __set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } +/** + * mapping_clear_large_folios() - The file disable supports large folios. + * @mapping: The file. + * + * The filesystem have to make sure the file is in atomic context and all + * cached folios have been cleared under mapping->invalidate_lock before + * calling this function. + */ +static inline void mapping_clear_large_folios(struct address_space *mapping) +{ + WARN_ON_ONCE(!rwsem_is_locked(&mapping->invalidate_lock)); + __clear_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); +} + /* * Large folio support currently depends on THP. These dependencies are * being worked on but are not yet fixed. diff --git a/mm/readahead.c b/mm/readahead.c index 222f71c4d40e9b2e63dcb03d7a4c8bb34244b35e..1a93d750f61081a3580d282cacea18afb3d10828 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -549,6 +549,13 @@ void page_cache_ra_order(struct readahead_control *ractl, /* See comment in page_cache_ra_unbounded() */ nofs = memalloc_nofs_save(); filemap_invalidate_lock_shared(mapping); + + if (unlikely(!mapping_large_folio_support(mapping))) { + filemap_invalidate_unlock_shared(mapping); + memalloc_nofs_restore(nofs); + goto fallback; + } + while (index <= limit) { unsigned int order = new_order;