From c804408112aef04354efc0bd6b3ab88d39449a0d Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Mon, 12 May 2025 15:33:22 +0800
Subject: [PATCH 1/7] anolis: ext4: add direct_IO for ext4_iomap_aops

ANBZ: #20084

Add the missing .direct_IO for ext4_iomap_aops. Now just use the default
noop_direct_IO like others.

Fixes: e71c3e732afb ("ext4: add a new iomap aops for regular file's buffered IO path")
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 fs/ext4/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5082e11727f9..108e76e5becd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4022,6 +4022,7 @@ static const struct address_space_operations ext4_iomap_aops = {
 	.bmap                   = ext4_bmap,
 	.invalidate_folio       = iomap_invalidate_folio,
 	.release_folio          = iomap_release_folio,
+	.direct_IO		= noop_direct_IO,
 	.migrate_folio          = filemap_migrate_folio,
 	.is_partially_uptodate  = iomap_is_partially_uptodate,
 	.error_remove_folio     = generic_error_remove_folio,
-- 
Gitee


From a23326db09bf8d2cca55131ab7cddb61d4f3df2d Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Mon, 12 May 2025 16:04:21 +0800
Subject: [PATCH 2/7] anolis: ext4: writeback partial blocks before zeroing out
 range

ANBZ: #20084

If we zero partial blocks, iomap_zero_iter() will skip zeroing out the
IOMAP_UNWRITTEN srcmap, it works fine in xfs because this type means the
block is pure unwritten and doesn't contain any delayed data. But it
doesn't work in ext4, because IOMAP_UNWRITTEN may contain delayed data
in ext4. For now it's hard to unify the meaning of this flag, so just
fix it by writeback partial blocks before zeroing out.

This is cherry-picked from:
https://lore.kernel.org/linux-ext4/20240127015825.1608160-23-yi.zhang@huaweicloud.com/

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 fs/ext4/extents.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3b59ac862ff8..52b8c9d7536e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4567,6 +4567,16 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 					     new_size, flags);
 		if (ret)
 			return ret;
+
+		ret = filemap_write_and_wait_range(file->f_mapping,
+				round_down(offset, 1 << blkbits), offset);
+		if (ret)
+			return ret;
+
+		ret = filemap_write_and_wait_range(file->f_mapping, offset + len,
+				round_up((offset + len), 1 << blkbits));
+		if (ret)
+			return ret;
 	}
 
 	ret = ext4_update_disksize_before_punch(inode, offset, len);
-- 
Gitee


From 2a6d59d4e2d3b1b0a52349d6fb74e338e06f9d14 Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Mon, 12 May 2025 16:12:31 +0800
Subject: [PATCH 3/7] anolis: ext4: fall back to buffer_head path for defrag

ANBZ: #20084

Online defrag doesn't support iomap path yet, we have to fall back to
buffer_head path for the inode which has been using iomap. Changing
active inode is dangerous, before we start, we must hold the inode lock
and the mapping->invalidate_lock, and writeback all dirty folios and
drop the inode's pagecache.

This is cherry-picked from:
https://lore.kernel.org/linux-ext4/20240127015825.1608160-24-yi.zhang@huaweicloud.com/

Fixes: 52925b270812 ("ext4: disable online defrag when inode using iomap buffered I/O path")
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 fs/ext4/move_extent.c | 48 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index d49c0b88597f..6a0d2824d4fb 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -545,6 +545,34 @@ mext_check_arguments(struct inode *orig_inode,
 	return 0;
 }
 
+/*
+ * Disable buffered iomap path for the inode that requiring move extents,
+ * fallback to buffer_head path.
+ */
+static int ext4_disable_buffered_iomap_aops(struct inode *inode)
+{
+	int err;
+
+	/*
+	 * The buffered_head aops don't know how to handle folios
+	 * dirtied by iomap, so before falling back, flush all dirty
+	 * folios the inode has.
+	 */
+	filemap_invalidate_lock(inode->i_mapping);
+	err = filemap_write_and_wait(inode->i_mapping);
+	if (err < 0) {
+		filemap_invalidate_unlock(inode->i_mapping);
+		return err;
+	}
+	truncate_inode_pages(inode->i_mapping, 0);
+
+	ext4_clear_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP);
+	ext4_set_aops(inode);
+	filemap_invalidate_unlock(inode->i_mapping);
+
+	return 0;
+}
+
 /**
  * ext4_move_extents - Exchange the specified range of a file
  *
@@ -609,13 +637,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 		return -EOPNOTSUPP;
 	}
 
-	if (ext4_test_inode_state(orig_inode, EXT4_STATE_BUFFERED_IOMAP) ||
-	    ext4_test_inode_state(donor_inode, EXT4_STATE_BUFFERED_IOMAP)) {
-		ext4_msg(orig_inode->i_sb, KERN_ERR,
-			 "Online defrag not supported for inode with iomap buffered IO path");
-		return -EOPNOTSUPP;
-	}
-
 	/* Protect orig and donor inodes against a truncate */
 	lock_two_nondirectories(orig_inode, donor_inode);
 
@@ -623,6 +644,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 	inode_dio_wait(orig_inode);
 	inode_dio_wait(donor_inode);
 
+	/* Fallback to buffer_head aops for inodes with buffered iomap aops */
+	if (ext4_test_inode_state(orig_inode, EXT4_STATE_BUFFERED_IOMAP)) {
+		ret = ext4_disable_buffered_iomap_aops(orig_inode);
+		if (ret)
+			goto out_unlock;
+	}
+	if (ext4_test_inode_state(donor_inode, EXT4_STATE_BUFFERED_IOMAP)) {
+		ret = ext4_disable_buffered_iomap_aops(donor_inode);
+		if (ret)
+			goto out_unlock;
+	}
+
 	/* Protect extent tree against block allocations via delalloc */
 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
 	/* Check the filesystem environment whether move_extent can be done */
@@ -706,6 +739,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
 
 	ext4_free_ext_path(path);
 	ext4_double_up_write_data_sem(orig_inode, donor_inode);
+out_unlock:
 	unlock_two_nondirectories(orig_inode, donor_inode);
 
 	return ret;
-- 
Gitee


From 36fc6e017e688556d9211cc0aa688e01f2a65e40 Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Mon, 12 May 2025 16:25:12 +0800
Subject: [PATCH 4/7] anolis: filemap: support disable large folios on active
 inode

ANBZ: #20084

Since commit 730633f0b7f9 ("mm: Protect operations adding pages to page
cache with invalidate_lock"), mapping->invalidate_lock can protect us
from adding new folios into page cache. So it's possible to disable
active inodes' large folios support, even through it might be dangerous.
Filesystems can disable it under mapping->invalidate_lock and drop all
page cache before dropping AS_LARGE_FOLIO_SUPPORT.

This is cherry-picked from:
https://lore.kernel.org/linux-ext4/20240127015825.1608160-26-yi.zhang@huaweicloud.com/

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 fs/ext4/move_extent.c   |  1 +
 include/linux/pagemap.h | 14 ++++++++++++++
 mm/readahead.c          |  7 +++++++
 3 files changed, 22 insertions(+)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 6a0d2824d4fb..26656d3d8e90 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -567,6 +567,7 @@ static int ext4_disable_buffered_iomap_aops(struct inode *inode)
 	truncate_inode_pages(inode->i_mapping, 0);
 
 	ext4_clear_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP);
+	mapping_clear_large_folios(inode->i_mapping);
 	ext4_set_aops(inode);
 	filemap_invalidate_unlock(inode->i_mapping);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 623159bc6cc8..28155f6b75ee 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -391,6 +391,20 @@ static inline void mapping_set_large_folios(struct address_space *mapping)
 	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
+/**
+ * mapping_clear_large_folios() - The file disable supports large folios.
+ * @mapping: The file.
+ *
+ * The filesystem have to make sure the file is in atomic context and all
+ * cached folios have been cleared under mapping->invalidate_lock before
+ * calling this function.
+ */
+static inline void mapping_clear_large_folios(struct address_space *mapping)
+{
+	WARN_ON_ONCE(!rwsem_is_locked(&mapping->invalidate_lock));
+	__clear_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+}
+
 /*
  * Large folio support currently depends on THP.  These dependencies are
  * being worked on but are not yet fixed.
diff --git a/mm/readahead.c b/mm/readahead.c
index 222f71c4d40e..1a93d750f610 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -549,6 +549,13 @@ void page_cache_ra_order(struct readahead_control *ractl,
 	/* See comment in page_cache_ra_unbounded() */
 	nofs = memalloc_nofs_save();
 	filemap_invalidate_lock_shared(mapping);
+
+	if (unlikely(!mapping_large_folio_support(mapping))) {
+		filemap_invalidate_unlock_shared(mapping);
+		memalloc_nofs_restore(nofs);
+		goto fallback;
+	}
+
 	while (index <= limit) {
 		unsigned int order = new_order;
 
-- 
Gitee


From f498b06cfe6f3b5cde6fadfbbddf4cb72d04953f Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Wed, 14 May 2025 10:17:14 +0800
Subject: [PATCH 5/7] anolis: iomap: correct the range of a partial dirty clear

ANBZ: #20084

The block range calculation in ifs_clear_range_dirty() is incorrect when
partial clear a range in a folio. We can't clear the dirty bit of the
first block or the last block if the start or end offset is blocksize
unaligned, this has not yet caused any issue since we always clear a
whole folio in iomap_writepage_map()->iomap_clear_range_dirty(). Fix
this by round up the first block and round down the last block and
correct the calculation of nr_blks.

This is cherry-picked from:
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I3UAW4Z5SJGRJFSK2MIOMPEGQG5P7SPP/

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 fs/iomap/buffered-io.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 2f91ac685a4d..ee5bd42fb927 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -135,11 +135,14 @@ static void ifs_clear_range_dirty(struct folio *folio,
 {
 	struct inode *inode = folio->mapping->host;
 	unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
-	unsigned int first_blk = (off >> inode->i_blkbits);
-	unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
-	unsigned int nr_blks = last_blk - first_blk + 1;
+	unsigned int first_blk = DIV_ROUND_UP(off, i_blocksize(inode));
+	unsigned int last_blk = (off + len) >> inode->i_blkbits;
+	unsigned int nr_blks = last_blk - first_blk;
 	unsigned long flags;
 
+	if (!nr_blks)
+		return;
+
 	spin_lock_irqsave(&ifs->state_lock, flags);
 	bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks);
 	spin_unlock_irqrestore(&ifs->state_lock, flags);
-- 
Gitee


From 9e722828baeff92b6be53a8be965403faf1e20be Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Tue, 13 May 2025 19:32:12 +0800
Subject: [PATCH 6/7] anolis: iomap: support invalidating partial folios

ANBZ: #20084

Current iomap_invalidate_folio() could only invalidate an entire folio,
if we truncate a partial folio on a filesystem with blocksize < folio
size, it will left over the dirty bits of truncated/punched blocks, and
the writeback process will try to map the invalid hole range, but
fortunately it hasn't trigger any real problems now since ->map() will
fix the length. Fix this by supporting invalidating partial folios.

This is cherry-picked from:
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I3UAW4Z5SJGRJFSK2MIOMPEGQG5P7SPP/

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 fs/iomap/buffered-io.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index ee5bd42fb927..f41266202776 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -629,6 +629,8 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
 		WARN_ON_ONCE(folio_test_writeback(folio));
 		folio_cancel_dirty(folio);
 		ifs_free(folio);
+	} else {
+		iomap_clear_range_dirty(folio, offset, len);
 	}
 }
 EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
-- 
Gitee


From 17411a9b8a164f4cd5a628fa28a14d28a34aca90 Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Wed, 14 May 2025 16:16:26 +0800
Subject: [PATCH 7/7] anolis: ext4: don't allow remount with buffered iomap

ANBZ: #20084

It's not safe to switch between buffer head and buffered iomap mode. So
don't allow remount with buffered iomap now.
Also remove nobuffered iomap option as of now since it has no usage.

Fixes: 86f76a4b13aa ("ext4: introduce a mount option for iomap buffered I/O path")
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
---
 fs/ext4/super.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 00591af033fa..a58f21a9c877 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1712,8 +1712,7 @@ enum {
 	Opt_dioread_nolock, Opt_dioread_lock,
 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
-	Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
-	Opt_buffered_iomap, Opt_nobuffered_iomap,
+	Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan, Opt_buffered_iomap,
 	Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
 #ifdef CONFIG_EXT4_DEBUG
 	Opt_fc_debug_max_replay, Opt_fc_debug_force
@@ -1857,7 +1856,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
 						Opt_no_prefetch_block_bitmaps),
 	fsparam_s32	("mb_optimize_scan",	Opt_mb_optimize_scan),
 	fsparam_flag	("buffered_iomap",	Opt_buffered_iomap),
-	fsparam_flag	("nobuffered_iomap",	Opt_nobuffered_iomap),
 	fsparam_string	("check",		Opt_removed),	/* mount option from ext2/3 */
 	fsparam_flag	("nocheck",		Opt_removed),	/* mount option from ext2/3 */
 	fsparam_flag	("reservation",		Opt_removed),	/* mount option from ext2/3 */
@@ -1954,8 +1952,6 @@ static const struct mount_opts {
 	 MOPT_SET},
 	{Opt_buffered_iomap, EXT4_MOUNT2_BUFFERED_IOMAP,
 	 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
-	{Opt_nobuffered_iomap, EXT4_MOUNT2_BUFFERED_IOMAP,
-	 MOPT_CLEAR | MOPT_2 | MOPT_EXT4_ONLY},
 #ifdef CONFIG_EXT4_DEBUG
 	{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
 	 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
@@ -2442,6 +2438,11 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
 			return -EINVAL;
 		}
 		return 0;
+	case Opt_buffered_iomap:
+		ext4_msg(NULL, KERN_WARNING,
+			 "buffered iomap enabled. Waring: EXPERIMENTAL, use at your own risk");
+		ctx_set_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP);
+		return 0;
 	}
 
 	/*
@@ -2829,10 +2830,6 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
 		return -EINVAL;
 	}
 
-	if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP))
-		ext4_msg(NULL, KERN_WARNING,
-			 "Warning: mounting with an experimental option 'buffered_iomap'");
-
 	err = ext4_check_test_dummy_encryption(fc, sb);
 	if (err)
 		return err;
@@ -2876,6 +2873,12 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
 			    !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
 			goto fail_dax_change_remount;
 		}
+
+		if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_BUFFERED_IOMAP) &&
+		    !test_opt2(sb, BUFFERED_IOMAP)) {
+			ext4_msg(NULL, KERN_ERR, "can't enable buffered iomap while remounting");
+			return -EINVAL;
+		}
 	}
 
 	return ext4_check_quota_consistency(fc, sb);
-- 
Gitee