diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 45471ee7e91900ba2e551f5a25eb56ef8de04885..0bb3257cba42998eab6b8ad76b9bd1d3cc7340c0 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "trace.h" #include "../internal.h" @@ -1044,11 +1045,10 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, EXPORT_SYMBOL_GPL(iomap_zero_range); int -iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops) +iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, + bool *did_zero, const struct iomap_ops *ops) { - unsigned int blocksize = i_blocksize(inode); - unsigned int off = pos & (blocksize - 1); + unsigned int off = rem_u64(pos, blocksize); /* Block boundary? Nothing to do */ if (!off) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 95bfe89651add0e260964d756fb295f9a9872d04..23c0e666d2f49389266fb9c0adf4c5a9466b7cb2 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -408,20 +408,18 @@ xfs_alloc_compute_diff( * Fix up the length, based on mod and prod. * len should be k * prod + mod for some k. * If len is too small it is returned unchanged. - * If len hits maxlen it is left alone. */ -STATIC void +static void xfs_alloc_fix_len( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args) { - xfs_extlen_t k; - xfs_extlen_t rlen; + xfs_extlen_t k; + xfs_extlen_t rlen = args->len; ASSERT(args->mod < args->prod); - rlen = args->len; ASSERT(rlen >= args->minlen); ASSERT(rlen <= args->maxlen); - if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || + if (args->prod <= 1 || rlen < args->mod || (args->mod == 0 && rlen < args->prod)) return; k = rlen % args->prod; @@ -2385,14 +2383,23 @@ xfs_alloc_space_available( if (available < (int)max(args->total, alloc_len)) return false; + if (flags & XFS_ALLOC_FLAG_CHECK) + return true; + /* - * Clamp maxlen to the amount of free space available for the actual - * extent allocation. + * If we can't do a maxlen allocation, then we must reduce the size of + * the allocation to match the available free space. We know how big + * the largest contiguous free space we can allocate is, so that's our + * upper bound. However, we don't exaclty know what alignment/size + * constraints have been placed on the allocation, so we can't + * arbitrarily select some new max size. Hence make this a minlen + * allocation as we know that will definitely succeed and match the + * callers alignment constraints. */ - if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { - args->maxlen = available; + alloc_len = args->maxlen + (args->alignment - 1) + args->minalignslop; + if (longest < alloc_len) { + args->maxlen = args->minlen; ASSERT(args->maxlen > 0); - ASSERT(args->maxlen >= args->minlen); } return true; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7682dfe2f7010b73bf3e6fa4d3f65b9892b2c339..1323259192d65825596c21faf6358e5a8d78fba0 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3253,32 +3253,51 @@ xfs_bmap_longest_free_extent( return error; } -static void +static int xfs_bmap_select_minlen( struct xfs_bmalloca *ap, struct xfs_alloc_arg *args, xfs_extlen_t *blen, int notinit) { + xfs_extlen_t nlen = 0; + + /* Adjust best length for extent start alignment. */ + if (*blen > args->alignment) + *blen -= args->alignment; + if (notinit || *blen < ap->minlen) { /* * Since we did a BUF_TRYLOCK above, it is possible that * there is space for this request. */ - args->minlen = ap->minlen; + nlen = ap->minlen; } else if (*blen < args->maxlen) { /* * If the best seen length is less than the request length, * use the best as the minimum. */ - args->minlen = *blen; + + nlen = *blen; } else { /* * Otherwise we've seen an extent as big as maxlen, use that * as the minimum. */ - args->minlen = args->maxlen; + nlen = args->maxlen; } + + if (args->alignment > 1) { + nlen = rounddown(nlen, args->alignment); + if (nlen < ap->minlen) { + if (xfs_inode_forcealign(ap->ip) && + (ap->datatype & XFS_ALLOC_USERDATA)) + return -ENOSPC; + nlen = ap->minlen; + } + } + args->minlen = nlen; + return 0; } STATIC int @@ -3311,8 +3330,8 @@ xfs_bmap_btalloc_nullfb( break; } - xfs_bmap_select_minlen(ap, args, blen, notinit); - return 0; + error = xfs_bmap_select_minlen(ap, args, blen, notinit); + return error; } STATIC int @@ -3349,7 +3368,9 @@ xfs_bmap_btalloc_filestreams( } - xfs_bmap_select_minlen(ap, args, blen, notinit); + error = xfs_bmap_select_minlen(ap, args, blen, notinit); + if (error) + return error; /* * Set the failure fallback case to look in the selected AG as stream @@ -3419,9 +3440,8 @@ xfs_bmap_btalloc( xfs_fileoff_t orig_offset; xfs_extlen_t orig_length; xfs_extlen_t blen; - xfs_extlen_t nextminlen = 0; + xfs_extlen_t alignment; int nullfb; /* true if ap->firstblock isn't set */ - int isaligned; int tryagain; int error; int stripe_align; @@ -3480,7 +3500,7 @@ xfs_bmap_btalloc( /* * Normal allocation, done through xfs_alloc_vextent. */ - tryagain = isaligned = 0; + tryagain = 0; memset(&args, 0, sizeof(args)); args.tp = ap->tp; args.mp = mp; @@ -3491,13 +3511,12 @@ xfs_bmap_btalloc( * xfs_get_cowextsz_hint() returns extsz_hint for when forcealign is * set as forcealign and cowextsz_hint are mutually exclusive */ - if (xfs_inode_forcealign(ap->ip) && align) { + if (xfs_inode_forcealign(ap->ip)) args.alignment = align; - if (stripe_align == 0 || stripe_align % align) - stripe_align = align; - } else { + else if (stripe_align) + args.alignment = stripe_align; + else args.alignment = 1; - } /* Trim the allocation back to the maximum an AG can fit. */ args.maxlen = min(ap->length, mp->m_ag_max_usable); @@ -3548,47 +3567,27 @@ xfs_bmap_btalloc( * is only set if the allocation length is >= the stripe unit and the * allocation offset is at the end of file. */ - if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) { - if (!ap->offset) { - args.alignment = stripe_align; - atype = args.type; - isaligned = 1; - /* - * Adjust minlen to try and preserve alignment if we - * can't guarantee an aligned maxlen extent. - */ - if (blen > args.alignment && - blen <= args.maxlen + args.alignment) - args.minlen = blen - args.alignment; - args.minalignslop = 0; - } else { - /* - * First try an exact bno allocation. - * If it fails then do a near or start bno - * allocation with alignment turned on. - */ - atype = args.type; - tryagain = 1; - args.type = XFS_ALLOCTYPE_THIS_BNO; - /* - * Compute the minlen+alignment for the - * next case. Set slop so that the value - * of minlen+alignment+slop doesn't go up - * between the calls. - */ - if (blen > stripe_align && blen <= args.maxlen) - nextminlen = blen - stripe_align; - else - nextminlen = args.minlen; - if (nextminlen + stripe_align > args.minlen + 1) - args.minalignslop = - nextminlen + stripe_align - - args.minlen - 1; - else - args.minalignslop = 0; + args.minalignslop = 0; + if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { + if (args.alignment > 1 && xfs_inode_forcealign(ap->ip)) { + args.fsbno = NULLFSBLOCK; + goto alloc_out; } - } else { - args.minalignslop = 0; + args.alignment = 1; + } else if (ap->aeof && ap->offset) { + /* + * First try an exact bno allocation. + * If it fails then do a near or start bno + * allocation with alignment turned on. + */ + alignment = args.alignment; + atype = args.type; + tryagain = 1; + args.type = XFS_ALLOCTYPE_THIS_BNO; + args.fsbno = ap->blkno; + + args.alignment = 1; + args.minalignslop = alignment - args.alignment; } args.postallocs = 1; args.minleft = ap->minleft; @@ -3607,21 +3606,26 @@ xfs_bmap_btalloc( */ args.type = atype; args.fsbno = ap->blkno; - args.alignment = stripe_align; - args.minlen = nextminlen; + args.alignment = alignment; args.minalignslop = 0; - isaligned = 1; if ((error = xfs_alloc_vextent(&args))) return error; } - if (isaligned && args.fsbno == NULLFSBLOCK && - (args.alignment <= 1 || !xfs_inode_forcealign(ap->ip))) { + if (args.fsbno == NULLFSBLOCK && args.alignment > 1 && + xfs_inode_forcealign(ap->ip)) { + /* + * Don't attempting non-aligned fallbacks alloc + * for forcealign + */ + goto alloc_out; + } + + if (args.alignment > 1 && args.fsbno == NULLFSBLOCK) { /* * allocation failed, so turn off alignment and * try again. */ - args.type = atype; args.fsbno = ap->blkno; args.alignment = 0; if ((error = xfs_alloc_vextent(&args))) @@ -3643,6 +3647,8 @@ xfs_bmap_btalloc( return error; ap->tp->t_flags |= XFS_TRANS_LOWMODE; } + +alloc_out: if (args.fsbno != NULLFSBLOCK) { /* * check the allocation happened at the same or higher AG than @@ -3669,10 +3675,12 @@ xfs_bmap_btalloc( * very fragmented so we're unlikely to be able to satisfy the * hints anyway. */ - if (ap->length <= orig_length) - ap->offset = orig_offset; - else if (ap->offset + ap->length < orig_offset + orig_length) - ap->offset = orig_offset + orig_length - ap->length; + if (!(xfs_inode_forcealign(ap->ip) && align)) { + if (ap->length <= orig_length) + ap->offset = orig_offset; + else if (ap->offset + ap->length < orig_offset + orig_length) + ap->offset = orig_offset + orig_length - ap->length; + } xfs_bmap_btalloc_accounting(ap, &args); } else { ap->blkno = NULLFSBLOCK; @@ -5289,7 +5297,7 @@ __xfs_bunmapi( isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); end = start + len; if (xfs_inode_forcealign(ip) && ip->i_d.di_extsize > 1 - && S_ISREG(VFS_I(ip)->i_mode)) { + && S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) { start = roundup_64(start, ip->i_d.di_extsize); end = rounddown_64(end, ip->i_d.di_extsize); len = end - start; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index a527a544a684170dd3ce57c1ddf09ddb0b0dfcc5..30dc960951cac30f0f20389621fa1d69c5317202 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -769,6 +769,8 @@ xfs_setattr_size( int error; uint lock_flags = 0; bool did_zeroing = false; + bool write_back = false; + unsigned int blocksize = 0; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); @@ -776,6 +778,11 @@ xfs_setattr_size( ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0); + if (xfs_inode_forcealign(ip) && ip->i_d.di_extsize > 1) + blocksize = ip->i_d.di_extsize << i_blocksize(inode); + else + blocksize = i_blocksize(inode); + oldsize = inode->i_size; newsize = iattr->ia_size; @@ -805,21 +812,8 @@ xfs_setattr_size( */ inode_dio_wait(inode); - /* - * File data changes must be complete before we start the transaction to - * modify the inode. This needs to be done before joining the inode to - * the transaction because the inode cannot be unlocked once it is a - * part of the transaction. - * - * Start with zeroing any data beyond EOF that we may expose on file - * extension, or zeroing out the rest of the block on a downward - * truncate. - */ - if (newsize > oldsize) { - trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); - error = iomap_zero_range(inode, oldsize, newsize - oldsize, - &did_zeroing, &xfs_buffered_write_iomap_ops); - } else { + write_back = newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size; + if (newsize < oldsize) { /* * iomap won't detect a dirty page over an unwritten block (or a * cow block over a hole) and subsequently skips zeroing the @@ -827,53 +821,69 @@ xfs_setattr_size( * convert the block before the pagecache truncate. */ error = filemap_write_and_wait_range(inode->i_mapping, newsize, - newsize); + roundup_64(newsize, blocksize) - 1); if (error) return error; - error = iomap_truncate_page(inode, newsize, &did_zeroing, - &xfs_buffered_write_iomap_ops); - } - if (error) - return error; - - /* - * We've already locked out new page faults, so now we can safely remove - * pages from the page cache knowing they won't get refaulted until we - * drop the XFS_MMAP_EXCL lock after the extent manipulations are - * complete. The truncate_setsize() call also cleans partial EOF page - * PTEs on extending truncates and hence ensures sub-page block size - * filesystems are correctly handled, too. - * - * We have to do all the page cache truncate work outside the - * transaction context as the "lock" order is page lock->log space - * reservation as defined by extent allocation in the writeback path. - * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but - * having already truncated the in-memory version of the file (i.e. made - * user visible changes). There's not much we can do about this, except - * to hope that the caller sees ENOMEM and retries the truncate - * operation. - * - * And we update in-core i_size and truncate page cache beyond newsize - * before writeback the [di_size, newsize] range, so we're guaranteed - * not to write stale data past the new EOF on truncate down. - */ - truncate_setsize(inode, newsize); - - /* - * We are going to log the inode size change in this transaction so - * any previous writes that are beyond the on disk EOF and the new - * EOF that have not been written out need to be written here. If we - * do not write the data out, we expose ourselves to the null files - * problem. Note that this includes any block zeroing we did above; - * otherwise those blocks may not be zeroed after a crash. - */ - if (did_zeroing || - (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) { - error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - ip->i_d.di_size, newsize - 1); + error = iomap_truncate_page(inode, newsize, blocksize, + &did_zeroing, &xfs_buffered_write_iomap_ops); if (error) return error; + /* + * We are going to log the inode size change in this transaction + * so any previous writes that are beyond the on disk EOF and + * the new EOF that have not been written out need to be written + * here. If we do not write the data out, we expose ourselves + * to the null files problem. Note that this includes any block + * zeroing we did above; otherwise those blocks may not be + * zeroed after a crash. + */ + if (did_zeroing || write_back) { + error = filemap_write_and_wait_range(inode->i_mapping, + min_t(loff_t, ip->i_d.di_size, newsize), + roundup_64(newsize, blocksize) - 1); + if (error) + return error; + } + + /* + * Updating i_size after writing back to make sure the zeroed + * blocks could been written out, and drop all the page cache + * range that beyond blocksize aligned new EOF block. + * + * We've already locked out new page faults, so now we can + * safely remove pages from the page cache knowing they won't + * get refaulted until we drop the XFS_MMAP_EXCL lock after the + * extent manipulations are complete. + */ + i_size_write(inode, newsize); + truncate_pagecache(inode, roundup_64(newsize, blocksize)); + } else { + /* + * Start with zeroing any data beyond EOF that we may expose on + * file extension. + */ + if (newsize > oldsize) { + trace_xfs_zero_eof(ip, oldsize, newsize - oldsize); + error = iomap_zero_range(inode, oldsize, newsize - oldsize, + &did_zeroing, &xfs_buffered_write_iomap_ops); + if (error) + return error; + } + + /* + * The truncate_setsize() call also cleans partial EOF page + * PTEs on extending truncates and hence ensures sub-page block + * size filesystems are correctly handled, too. + */ + truncate_setsize(inode, newsize); + + if (did_zeroing || write_back) { + error = filemap_write_and_wait_range(inode->i_mapping, + ip->i_d.di_size, newsize - 1); + if (error) + return error; + } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d43f76a4b99a433f17697cf44ed2507cef076d35..f2ff547e760c0b6e1ec547236bf5786c67987f50 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1658,10 +1658,19 @@ xfs_fc_fill_super( } } - if (xfs_has_forcealign(mp)) + if (xfs_has_forcealign(mp)) { xfs_warn(mp, "EXPERIMENTAL forced data extent alignment feature in use. Use at your own risk!"); + if (xfs_has_realtime(mp)) { + xfs_alert(mp, + "forcealign not supported for realtime device!"); + error = -EINVAL; + goto out_filestream_unmount; + } + + } + if (xfs_has_atomicwrites(mp)) xfs_warn(mp, "EXPERIMENTAL atomicwrites feature in use. Use at your own risk!"); @@ -1674,6 +1683,14 @@ xfs_fc_fill_super( goto out_filestream_unmount; } + if (xfs_has_forcealign(mp)) { + xfs_alert(mp, + "reflink not compatible with forcealign!"); + error = -EINVAL; + goto out_filestream_unmount; + } + + if (xfs_globals.always_cow) { xfs_info(mp, "using DEBUG-only always_cow mode."); mp->m_always_cow = true; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index d14a729d40ce3de937e8b41e9a083416bb5391a3..1b6e22741d43002e8ecbad1bc980da64101eb858 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -207,8 +207,8 @@ int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops); -int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, - const struct iomap_ops *ops); +int iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize, + bool *did_zero, const struct iomap_ops *ops); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, diff --git a/include/linux/math64.h b/include/linux/math64.h index 66deb1fdc2ef641d1c4a917c212585be80d94751..b5c4d1df08e522248bca845f84f00c039f09ab4e 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -3,6 +3,7 @@ #define _LINUX_MATH64_H #include +#include #include #include @@ -11,6 +12,20 @@ #define div64_long(x, y) div64_s64((x), (y)) #define div64_ul(x, y) div64_u64((x), (y)) +/** + * rem_u64 - remainder of unsigned 64bit divide with 32bit divisor + * @dividend: unsigned 64bit dividend + * @divisor: unsigned 32bit divisor + * + * Return: dividend % divisor + */ +static inline u32 rem_u64(u64 dividend, u32 divisor) +{ + if (is_power_of_2(divisor)) + return dividend & (divisor - 1); + return dividend % divisor; +} + /** * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder * @dividend: unsigned 64bit dividend @@ -85,6 +100,15 @@ static inline s64 div64_s64(s64 dividend, s64 divisor) #define div64_long(x, y) div_s64((x), (y)) #define div64_ul(x, y) div_u64((x), (y)) +#ifndef rem_u64 +static inline u32 rem_u64(u64 dividend, u32 divisor) +{ + if (is_power_of_2(divisor)) + return dividend & (divisor - 1); + return do_div(dividend, divisor); +} +#endif + #ifndef div_u64_rem static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) {