diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bceb4c64754a3fe17b4f7db8c49ef3b1646049a1..59f46934928d9e9a17fa755da68e78fc2d110c95 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4230,7 +4230,9 @@ void ext4_set_aops(struct inode *inode) } static int __ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) + struct address_space *mapping, + loff_t from, loff_t length, + bool *did_zero) { ext4_fsblk_t index = from >> PAGE_SHIFT; unsigned offset = from & (PAGE_SIZE-1); @@ -4240,12 +4242,22 @@ static int __ext4_block_zero_page_range(handle_t *handle, struct buffer_head *bh; struct folio *folio; int err = 0; + bool orig_handle_valid = true; + + if (ext4_should_journal_data(inode) && handle == NULL) { + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) + return PTR_ERR(handle); + orig_handle_valid = false; + } folio = __filemap_get_folio(mapping, from >> PAGE_SHIFT, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mapping_gfp_constraint(mapping, ~__GFP_FS)); - if (IS_ERR(folio)) - return PTR_ERR(folio); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); + goto out; + } blocksize = inode->i_sb->s_blocksize; @@ -4298,36 +4310,43 @@ static int __ext4_block_zero_page_range(handle_t *handle, } } } + if (ext4_should_journal_data(inode)) { BUFFER_TRACE(bh, "get write access"); err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); if (err) goto unlock; - } - folio_zero_range(folio, offset, length); - BUFFER_TRACE(bh, "zeroed end of block"); + folio_zero_range(folio, offset, length); + BUFFER_TRACE(bh, "zeroed end of block"); - if (ext4_should_journal_data(inode)) { err = ext4_dirty_journalled_data(handle, bh); + if (err) + goto unlock; } else { err = 0; + folio_zero_range(folio, offset, length); + BUFFER_TRACE(bh, "zeroed end of block"); + mark_buffer_dirty(bh); - if (ext4_should_order_data(inode)) - err = ext4_jbd2_inode_add_write(handle, inode, from, - length); } + if (did_zero) + *did_zero = true; + unlock: folio_unlock(folio); folio_put(folio); +out: + if (ext4_should_journal_data(inode) && orig_handle_valid == false) + ext4_journal_stop(handle); return err; } -static int ext4_iomap_zero_range(struct inode *inode, - loff_t from, loff_t length) +static int ext4_iomap_zero_range(struct inode *inode, loff_t from, + loff_t length, bool *did_zero) { - return iomap_zero_range(inode, from, length, NULL, + return iomap_zero_range(inode, from, length, did_zero, &ext4_iomap_buffered_read_ops); } @@ -4339,7 +4358,9 @@ static int ext4_iomap_zero_range(struct inode *inode, * that corresponds to 'from' */ static int ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) + struct address_space *mapping, + loff_t from, loff_t length, + bool *did_zero) { struct inode *inode = mapping->host; unsigned offset = from & (PAGE_SIZE-1); @@ -4357,9 +4378,10 @@ static int ext4_block_zero_page_range(handle_t *handle, return dax_zero_range(inode, from, length, NULL, &ext4_iomap_ops); } else if (ext4_test_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP)) { - return ext4_iomap_zero_range(inode, from, length); + return ext4_iomap_zero_range(inode, from, length, did_zero); } - return __ext4_block_zero_page_range(handle, mapping, from, length); + return __ext4_block_zero_page_range(handle, mapping, from, length, + did_zero); } /* @@ -4368,13 +4390,15 @@ static int ext4_block_zero_page_range(handle_t *handle, * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ -static int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from) +static loff_t ext4_block_truncate_page(struct address_space *mapping, + loff_t from) { unsigned offset = from & (PAGE_SIZE-1); unsigned length; unsigned blocksize; struct inode *inode = mapping->host; + bool did_zero = false; + int err; /* If we are processing an encrypted inode during orphan list handling */ if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode)) @@ -4383,7 +4407,28 @@ static int ext4_block_truncate_page(handle_t *handle, blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); - return ext4_block_zero_page_range(handle, mapping, from, length); + err = ext4_block_zero_page_range(NULL, mapping, from, length, + &did_zero); + if (err) + return err; + + /* + * inode with an iomap buffered I/O path does not order data, + * so it is necessary to write out zeroed data before the + * updating i_disksize transaction is committed. Otherwise, + * stale data may remain in the last block, which could be + * exposed during the next expand truncate operation. + */ + if (length && ext4_test_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP)) { + loff_t zero_end = inode->i_size + length; + + err = filemap_write_and_wait_range(mapping, + inode->i_size, zero_end - 1); + if (err) + return err; + } + + return length; } int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, @@ -4406,13 +4451,14 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, if (start == end && (partial_start || (partial_end != sb->s_blocksize - 1))) { err = ext4_block_zero_page_range(handle, mapping, - lstart, length); + lstart, length, NULL); return err; } /* Handle partial zero out on the start of the range */ if (partial_start) { err = ext4_block_zero_page_range(handle, mapping, - lstart, sb->s_blocksize); + lstart, sb->s_blocksize, + NULL); if (err) return err; } @@ -4420,7 +4466,7 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, if (partial_end != sb->s_blocksize - 1) err = ext4_block_zero_page_range(handle, mapping, byte_end - partial_end, - partial_end + 1); + partial_end + 1, NULL); return err; } @@ -4715,6 +4761,7 @@ int ext4_truncate(struct inode *inode) int err = 0, err2; handle_t *handle; struct address_space *mapping = inode->i_mapping; + loff_t zero_len = 0; /* * There is a possibility that we're either freeing the inode @@ -4744,6 +4791,12 @@ int ext4_truncate(struct inode *inode) err = ext4_inode_attach_jinode(inode); if (err) goto out_trace; + + zero_len = ext4_block_truncate_page(mapping, inode->i_size); + if (zero_len < 0) { + err = zero_len; + goto out_trace; + } } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) @@ -4757,8 +4810,12 @@ int ext4_truncate(struct inode *inode) goto out_trace; } - if (inode->i_size & (inode->i_sb->s_blocksize - 1)) - ext4_block_truncate_page(handle, mapping, inode->i_size); + if (zero_len && ext4_should_order_data(inode)) { + err = ext4_jbd2_inode_add_write(handle, inode, inode->i_size, + zero_len); + if (err) + goto out_stop; + } /* * We add the inode to the orphan list, so that if this @@ -6059,7 +6116,19 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, oldsize & (inode->i_sb->s_blocksize - 1)) { error = ext4_inode_attach_jinode(inode); if (error) - goto err_out; + goto out_mmap_sem; + } + + /* Tail zero the EOF folio on truncate up. */ + if (!shrink && oldsize & (inode->i_sb->s_blocksize - 1)) { + loff_t zero_len; + + zero_len = ext4_block_truncate_page( + inode->i_mapping, oldsize); + if (zero_len < 0) { + error = zero_len; + goto out_mmap_sem; + } } handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); @@ -6072,17 +6141,12 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, orphan = 1; } /* - * Update c/mtime and tail zero the EOF folio on - * truncate up. ext4_truncate() handles the shrink case - * below. + * Update c/mtime on truncate up, ext4_truncate() will + * update c/mtime in shrink case below */ - if (!shrink) { + if (!shrink) inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); - if (oldsize & (inode->i_sb->s_blocksize - 1)) - ext4_block_truncate_page(handle, - inode->i_mapping, oldsize); - } if (shrink) ext4_fc_track_range(handle, inode, diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 0c06e97d08de7d6eac79393092a0d661e69e39e4..c994b2f058c34b26256f0d9cd8733a397dc81cc9 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -844,7 +844,12 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, if (srcmap != &iter->iomap) BUG_ON(pos + len > srcmap->offset + srcmap->length); - if (fatal_signal_pending(current)) + /* + * Zero range operations typically involve small amounts of data + * and are frequently used to prevent the exposure of stale data. + * Therefore, do not interrupt it here. + */ + if (iter->flags != IOMAP_ZERO && fatal_signal_pending(current)) return -EINTR; if (!mapping_large_folio_support(iter->inode->i_mapping)) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 98014cdcb714dec5166360ef9d229c67a240a068..68d3141a2f369b065fad497497ff96d10a6decba 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2000,17 +2000,15 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) return err; } - if (block_start == ~0ULL) { - block_start = phys_block; - block_stop = block_start - 1; - } + if (block_start == ~0ULL) + block_stop = block_start = phys_block; /* * last block not contiguous with current block, * process last contiguous region and return to this block on * next loop */ - if (phys_block != block_stop + 1) { + if (phys_block != block_stop) { block--; } else { block_stop++; @@ -2029,11 +2027,10 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) */ byte_start = block_start * journal->j_blocksize; byte_stop = block_stop * journal->j_blocksize; - byte_count = (block_stop - block_start + 1) * - journal->j_blocksize; + byte_count = (block_stop - block_start) * journal->j_blocksize; truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping, - byte_start, byte_stop); + byte_start, byte_stop - 1); if (flags & JBD2_JOURNAL_FLUSH_DISCARD) { err = blkdev_issue_discard(journal->j_dev, @@ -2048,7 +2045,7 @@ static int __jbd2_journal_erase(journal_t *journal, unsigned int flags) } if (unlikely(err != 0)) { - pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu", + pr_err("JBD2: (error %d) unable to wipe journal at physical blocks [%llu, %llu)", err, block_start, block_stop); return err; } diff --git a/mm/truncate.c b/mm/truncate.c index fa9728073eeb51bafa6662c01bc4ffdfc86bf9e6..1557a0503f8eabad78f293f5bb369e505e06f613 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -765,15 +765,15 @@ EXPORT_SYMBOL(truncate_setsize); * @from: original inode size * @to: new inode size * - * Handle extension of inode size either caused by extending truncate or by - * write starting after current i_size. We mark the page straddling current - * i_size RO so that page_mkwrite() is called on the nearest write access to - * the page. This way filesystem can be sure that page_mkwrite() is called on - * the page before user writes to the page via mmap after the i_size has been - * changed. + * Handle extension of inode size either caused by extending truncate or + * by write starting after current i_size. We mark the page straddling + * current i_size RO so that page_mkwrite() is called on the first + * write access to the page. The filesystem will update its per-block + * information before user writes to the page via mmap after the i_size + * has been changed. * * The function must be called after i_size is updated so that page fault - * coming after we unlock the page will already see the new i_size. + * coming after we unlock the folio will already see the new i_size. * The function must be called while we still hold i_rwsem - this not only * makes sure i_size is stable but also that userspace cannot observe new * i_size value before we are prepared to store mmap writes at new inode size. @@ -782,31 +782,44 @@ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) { int bsize = i_blocksize(inode); loff_t rounded_from; - struct page *page; - pgoff_t index; + struct folio *folio; WARN_ON(to > inode->i_size); - if (from >= to || bsize == PAGE_SIZE) + if (from >= to || bsize >= PAGE_SIZE) return; /* Page straddling @from will not have any hole block created? */ rounded_from = round_up(from, bsize); if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1))) return; - index = from >> PAGE_SHIFT; - page = find_lock_page(inode->i_mapping, index); - /* Page not cached? Nothing to do */ - if (!page) + folio = filemap_lock_folio(inode->i_mapping, from / PAGE_SIZE); + /* Folio not cached? Nothing to do */ + if (IS_ERR(folio)) return; /* - * See clear_page_dirty_for_io() for details why set_page_dirty() + * See folio_clear_dirty_for_io() for details why folio_mark_dirty() * is needed. */ - if (page_mkclean(page)) - set_page_dirty(page); - unlock_page(page); - put_page(page); + if (folio_mkclean(folio)) + folio_mark_dirty(folio); + + /* + * The post-eof range of the folio must be zeroed before it is exposed + * to the file. Writeback normally does this, but since i_size has been + * increased we handle it here. + */ + if (folio_test_dirty(folio)) { + unsigned int offset, end; + + offset = from - folio_pos(folio); + end = min_t(unsigned int, to - folio_pos(folio), + folio_size(folio)); + folio_zero_segment(folio, offset, end); + } + + folio_unlock(folio); + folio_put(folio); } EXPORT_SYMBOL(pagecache_isize_extended);