diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 54da6d717a061c91f13036816b88dca3ef1ca3a6..b987dc2c685170ad1ce08768553c7a8cee02d7a6 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -72,10 +72,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags) /* * Zone interfaces */ - -#define kmem_zone kmem_cache -#define kmem_zone_t struct kmem_cache - static inline struct page * kmem_to_page(void *addr) { diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 60aab422e818e40301c05ac6f9ccde16b2387f7d..b901eedf3bb81bf41a9b9f223c670f7e293e1ab2 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -27,7 +27,7 @@ #include "xfs_ag_resv.h" #include "xfs_bmap.h" -extern kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_extfree_item_cache; struct workqueue_struct *xfs_alloc_wq; @@ -37,8 +37,8 @@ struct workqueue_struct *xfs_alloc_wq; #define XFSA_FIXUP_CNT_OK 2 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); +STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *, uint32_t); +STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *, uint32_t); /* * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in @@ -81,6 +81,25 @@ xfs_prealloc_blocks( return XFS_IBT_BLOCK(mp) + 1; } +/* + * Twice fixup for the same ag may happen within exact one tp, and the consume + * of agfl after first fixup may trigger second fixup's failure, then xfs will + * shutdown. To avoid that, we reserve blocks which can satisfy the second + * fixup. + */ +xfs_extlen_t +xfs_ag_fixup_aside( + struct xfs_mount *mp) +{ + xfs_extlen_t ret; + + ret = 2 * mp->m_ag_maxlevels; + if (xfs_has_rmapbt(mp)) + ret += mp->m_rmap_maxlevels; + + return ret; +} + /* * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of * AGF buffer (PV 947395), we place constraints on the relationship among @@ -95,12 +114,15 @@ xfs_prealloc_blocks( * * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a * potential split of the file's bmap btree. + * + * Besides, comment for xfs_ag_fixup_aside show why we reserve more blocks. */ unsigned int xfs_alloc_set_aside( struct xfs_mount *mp) { - return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); + return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + + 4 + xfs_ag_fixup_aside(mp)); } /* @@ -133,6 +155,8 @@ xfs_alloc_ag_max_usable( if (xfs_has_reflink(mp)) blocks++; /* refcount root block */ + blocks += xfs_ag_fixup_aside(mp); + return mp->m_sb.sb_agblocks - blocks; } @@ -1127,7 +1151,8 @@ xfs_alloc_ag_vextent_small( */ STATIC int /* error */ xfs_alloc_ag_vextent( - xfs_alloc_arg_t *args) /* argument structure for allocation */ + xfs_alloc_arg_t *args, /* argument structure for allocation */ + uint32_t alloc_flags) { int error=0; @@ -1143,10 +1168,10 @@ xfs_alloc_ag_vextent( args->wasfromfl = 0; switch (args->type) { case XFS_ALLOCTYPE_THIS_AG: - error = xfs_alloc_ag_vextent_size(args); + error = xfs_alloc_ag_vextent_size(args, alloc_flags); break; case XFS_ALLOCTYPE_NEAR_BNO: - error = xfs_alloc_ag_vextent_near(args); + error = xfs_alloc_ag_vextent_near(args, alloc_flags); break; case XFS_ALLOCTYPE_THIS_BNO: error = xfs_alloc_ag_vextent_exact(args); @@ -1554,7 +1579,8 @@ xfs_alloc_ag_vextent_lastblock( */ STATIC int xfs_alloc_ag_vextent_near( - struct xfs_alloc_arg *args) + struct xfs_alloc_arg *args, + uint32_t alloc_flags) { struct xfs_alloc_cur acur = {}; int error; /* error code */ @@ -1573,6 +1599,8 @@ xfs_alloc_ag_vextent_near( if (args->agbno > args->max_agbno) args->agbno = args->max_agbno; + /* Retry once quickly if we find busy extents before blocking. */ + alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; restart: len = 0; @@ -1628,9 +1656,20 @@ xfs_alloc_ag_vextent_near( */ if (!acur.len) { if (acur.busy) { + /* + * Our only valid extents must have been busy. Flush and + * retry the allocation again. If we get an -EAGAIN + * error, we're being told that a deadlock was avoided + * and the current transaction needs committing before + * the allocation can be retried. + */ trace_xfs_alloc_near_busy(args); - xfs_extent_busy_flush(args->mp, args->pag, - acur.busy_gen); + error = xfs_extent_busy_flush(args->tp, args->pag, + acur.busy_gen, alloc_flags); + if (error) + goto out; + + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; goto restart; } trace_xfs_alloc_size_neither(args); @@ -1653,22 +1692,25 @@ xfs_alloc_ag_vextent_near( * and of the form k * prod + mod unless there's nothing that large. * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. */ -STATIC int /* error */ +static int xfs_alloc_ag_vextent_size( - xfs_alloc_arg_t *args) /* allocation argument structure */ + struct xfs_alloc_arg *args, + uint32_t alloc_flags) { - struct xfs_agf *agf = args->agbp->b_addr; - xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ - xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ - int error; /* error result */ - xfs_agblock_t fbno; /* start of found freespace */ - xfs_extlen_t flen; /* length of found freespace */ - int i; /* temp status variable */ - xfs_agblock_t rbno; /* returned block number */ - xfs_extlen_t rlen; /* length of returned extent */ - bool busy; - unsigned busy_gen; + struct xfs_agf *agf = args->agbp->b_addr; + struct xfs_btree_cur *bno_cur; + struct xfs_btree_cur *cnt_cur; + xfs_agblock_t fbno; /* start of found freespace */ + xfs_extlen_t flen; /* length of found freespace */ + xfs_agblock_t rbno; /* returned block number */ + xfs_extlen_t rlen; /* length of returned extent */ + bool busy; + unsigned busy_gen; + int error; + int i; + /* Retry once quickly if we find busy extents before blocking. */ + alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH; restart: /* * Allocate and initialize a cursor for the by-size btree. @@ -1676,7 +1718,6 @@ xfs_alloc_ag_vextent_size( cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_CNT); bno_cur = NULL; - busy = false; /* * Look for an entry >= maxlen+alignment-1 blocks. @@ -1727,19 +1768,25 @@ xfs_alloc_ag_vextent_size( error = xfs_btree_increment(cnt_cur, 0, &i); if (error) goto error0; - if (i == 0) { - /* - * Our only valid extents must have been busy. - * Make it unbusy by forcing the log out and - * retrying. - */ - xfs_btree_del_cursor(cnt_cur, - XFS_BTREE_NOERROR); - trace_xfs_alloc_size_busy(args); - xfs_extent_busy_flush(args->mp, - args->pag, busy_gen); - goto restart; - } + if (i) + continue; + + /* + * Our only valid extents must have been busy. Flush and + * retry the allocation again. If we get an -EAGAIN + * error, we're being told that a deadlock was avoided + * and the current transaction needs committing before + * the allocation can be retried. + */ + trace_xfs_alloc_size_busy(args); + error = xfs_extent_busy_flush(args->tp, args->pag, + busy_gen, alloc_flags); + if (error) + goto error0; + + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + goto restart; } } @@ -1819,9 +1866,21 @@ xfs_alloc_ag_vextent_size( args->len = rlen; if (rlen < args->minlen) { if (busy) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + /* + * Our only valid extents must have been busy. Flush and + * retry the allocation again. If we get an -EAGAIN + * error, we're being told that a deadlock was avoided + * and the current transaction needs committing before + * the allocation can be retried. + */ trace_xfs_alloc_size_busy(args); - xfs_extent_busy_flush(args->mp, args->pag, busy_gen); + error = xfs_extent_busy_flush(args->tp, args->pag, + busy_gen, alloc_flags); + if (error) + goto error0; + + alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH; + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); goto restart; } goto out_nominleft; @@ -2280,6 +2339,7 @@ xfs_alloc_min_freelist( static bool xfs_alloc_space_available( struct xfs_alloc_arg *args, + xfs_extlen_t need, xfs_extlen_t min_free, int flags) { @@ -2296,7 +2356,7 @@ xfs_alloc_space_available( /* do we have enough contiguous free space for the allocation? */ alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; - longest = xfs_alloc_longest_free_extent(pag, min_free, reservation); + longest = xfs_alloc_longest_free_extent(pag, need, reservation); if (longest < alloc_len) return false; @@ -2305,7 +2365,7 @@ xfs_alloc_space_available( * account extra agfl blocks because we are about to defer free them, * making them unavailable until the current transaction commits. */ - agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free); + agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, need); available = (int)(pag->pagf_freeblks + agflcount - reservation - min_free - args->minleft); if (available < (int)max(args->total, alloc_len)) @@ -2439,7 +2499,7 @@ xfs_agfl_reset( /* * Defer an AGFL block free. This is effectively equivalent to - * xfs_bmap_add_free() with some special handling particular to AGFL blocks. + * xfs_free_extent_later() with some special handling particular to AGFL blocks. * * Deferring AGFL frees helps prevent log reservation overruns due to too many * allocation operations in a transaction. AGFL frees are prone to this problem @@ -2448,29 +2508,95 @@ xfs_agfl_reset( * the real allocation can proceed. Deferring the free disconnects freeing up * the AGFL slot from freeing the block. */ -STATIC void +static int xfs_defer_agfl_block( struct xfs_trans *tp, xfs_agnumber_t agno, - xfs_fsblock_t agbno, + xfs_agblock_t agbno, struct xfs_owner_info *oinfo) { struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *new; /* new element */ + xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno); - ASSERT(xfs_bmap_free_item_zone != NULL); + ASSERT(xfs_extfree_item_cache != NULL); ASSERT(oinfo != NULL); - new = kmem_cache_alloc(xfs_bmap_free_item_zone, + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) + return -EFSCORRUPTED; + + new = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); + new->xefi_startblock = fsbno; new->xefi_blockcount = 1; - new->xefi_oinfo = *oinfo; - new->xefi_skip_discard = false; + new->xefi_owner = oinfo->oi_owner; + new->xefi_agresv = XFS_AG_RESV_AGFL; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); + return 0; +} + +/* + * Add the extent to the list of extents to be free at transaction end. + * The list is maintained sorted (by block number). + */ +int +__xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type, + bool skip_discard) +{ + struct xfs_extent_free_item *new; /* new element */ + struct xfs_mount *mp = tp->t_mountp; +#ifdef DEBUG + xfs_agnumber_t agno; + xfs_agblock_t agbno; + + ASSERT(bno != NULLFSBLOCK); + ASSERT(len > 0); + ASSERT(len <= MAXEXTLEN); + ASSERT(!isnullstartblock(bno)); + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(agbno < mp->m_sb.sb_agblocks); + ASSERT(len < mp->m_sb.sb_agblocks); + ASSERT(agbno + len <= mp->m_sb.sb_agblocks); +#endif + ASSERT(xfs_extfree_item_cache != NULL); + ASSERT(type != XFS_AG_RESV_AGFL); + + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) + return -EFSCORRUPTED; + + new = kmem_cache_zalloc(xfs_extfree_item_cache, + GFP_KERNEL | __GFP_NOFAIL); + new->xefi_startblock = bno; + new->xefi_blockcount = (xfs_extlen_t)len; + new->xefi_agresv = type; + if (skip_discard) + new->xefi_flags |= XFS_EFI_SKIP_DISCARD; + if (oinfo) { + ASSERT(oinfo->oi_offset == 0); + + if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) + new->xefi_flags |= XFS_EFI_ATTR_FORK; + if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) + new->xefi_flags |= XFS_EFI_BMBT_BLOCK; + new->xefi_owner = oinfo->oi_owner; + } else { + new->xefi_owner = XFS_RMAP_OWN_NULL; + } + trace_xfs_bmap_free_defer(tp->t_mountp, + XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, + XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); + return 0; } /* @@ -2480,7 +2606,7 @@ xfs_defer_agfl_block( int /* error */ xfs_alloc_fix_freelist( struct xfs_alloc_arg *args, /* allocation argument structure */ - int flags) /* XFS_ALLOC_FLAG_... */ + uint32_t alloc_flags) { struct xfs_mount *mp = args->mp; struct xfs_perag *pag = args->pag; @@ -2490,13 +2616,14 @@ xfs_alloc_fix_freelist( struct xfs_alloc_arg targs; /* local allocation arguments */ xfs_agblock_t bno; /* freelist block */ xfs_extlen_t need; /* total blocks needed in freelist */ + xfs_extlen_t minfree; int error = 0; /* deferred ops (AGFL block frees) require permanent transactions */ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); if (!pag->pagf_init) { - error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + error = xfs_alloc_read_agf(mp, tp, args->agno, alloc_flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) @@ -2511,8 +2638,8 @@ xfs_alloc_fix_freelist( * point */ if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) && - (flags & XFS_ALLOC_FLAG_TRYLOCK)) { - ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); + (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) { + ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING)); goto out_agbp_relse; } @@ -2521,8 +2648,11 @@ xfs_alloc_fix_freelist( * blocks to perform multiple allocations from a single AG and * transaction if needed. */ - need = xfs_alloc_min_freelist(mp, pag) * (1 + args->postallocs); - if (!xfs_alloc_space_available(args, need, flags | + minfree = need = xfs_alloc_min_freelist(mp, pag); + if (args->postallocs) + minfree += xfs_ag_fixup_aside(mp); + + if (!xfs_alloc_space_available(args, need, minfree, alloc_flags | XFS_ALLOC_FLAG_CHECK)) goto out_agbp_relse; @@ -2531,7 +2661,7 @@ xfs_alloc_fix_freelist( * Can fail if we're not blocking on locks, and it's held. */ if (!agbp) { - error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + error = xfs_alloc_read_agf(mp, tp, args->agno, alloc_flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) @@ -2545,8 +2675,11 @@ xfs_alloc_fix_freelist( xfs_agfl_reset(tp, agbp, pag); /* If there isn't enough total space or single-extent, reject it. */ - need = xfs_alloc_min_freelist(mp, pag) * (1 + args->postallocs); - if (!xfs_alloc_space_available(args, need, flags)) + minfree = need = xfs_alloc_min_freelist(mp, pag); + if (args->postallocs) + minfree += xfs_ag_fixup_aside(mp); + + if (!xfs_alloc_space_available(args, need, minfree, alloc_flags)) goto out_agbp_relse; /* @@ -2575,17 +2708,20 @@ xfs_alloc_fix_freelist( */ memset(&targs, 0, sizeof(targs)); /* struct copy below */ - if (flags & XFS_ALLOC_FLAG_NORMAP) + if (alloc_flags & XFS_ALLOC_FLAG_NORMAP) targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; else targs.oinfo = XFS_RMAP_OINFO_AG; - while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { + while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) && + pag->pagf_flcount > need) { error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); if (error) goto out_agbp_relse; /* defer agfl frees */ - xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + if (error) + goto out_agbp_relse; } targs.tp = tp; @@ -2606,7 +2742,7 @@ xfs_alloc_fix_freelist( targs.resv = XFS_AG_RESV_AGFL; /* Allocate as many blocks as possible at once. */ - error = xfs_alloc_ag_vextent(&targs); + error = xfs_alloc_ag_vextent(&targs, alloc_flags); if (error) goto out_agflbp_relse; @@ -2616,7 +2752,7 @@ xfs_alloc_fix_freelist( * on a completely full ag. */ if (targs.agbno == NULLAGBLOCK) { - if (flags & XFS_ALLOC_FLAG_FREEING) + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) break; goto out_agflbp_relse; } @@ -2685,6 +2821,10 @@ xfs_alloc_get_freelist( */ agfl_bno = xfs_buf_to_agfl_bno(agflbp); bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); + if (XFS_IS_CORRUPT(tp->t_mountp, + !xfs_verify_agbno(mp, be32_to_cpu(agf->agf_seqno), bno))) + return -EFSCORRUPTED; + be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) @@ -2831,6 +2971,7 @@ xfs_agf_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_agf *agf = bp->b_addr; + uint32_t agf_length = be32_to_cpu(agf->agf_length); if (xfs_has_crc(mp)) { if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) @@ -2842,18 +2983,50 @@ xfs_agf_verify( if (!xfs_verify_magic(bp, agf->agf_magicnum)) return __this_address; - if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && - be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && - be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && - be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && - be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) + if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum))) + return __this_address; + + /* + * Both agf_seqno and agf_length need to validated before anything else + * block number related in the AGF or AGFL can be checked. + * + * During growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) return __this_address; - if (be32_to_cpu(agf->agf_length) > mp->m_sb.sb_dblocks) + /* + * Only the last AGF in the filesytsem is allowed to be shorter + * than the AG size recorded in the superblock. + */ + if (agf_length != mp->m_sb.sb_agblocks && + !(bp->b_flags & _XBF_LOGRECOVERY)) { + /* + * During growfs, the new last AGF can get here before we + * have updated the superblock. Give it a pass on the seqno + * check. + */ + if (bp->b_pag && + be32_to_cpu(agf->agf_seqno) != mp->m_sb.sb_agcount - 1) + return __this_address; + if (agf_length < XFS_MIN_AG_BLOCKS) + return __this_address; + if (agf_length > mp->m_sb.sb_agblocks) + return __this_address; + } + + if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp)) + return __this_address; + if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp)) + return __this_address; + if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp)) return __this_address; if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) || - be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length)) + be32_to_cpu(agf->agf_freeblks) > agf_length) return __this_address; if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || @@ -2862,37 +3035,28 @@ xfs_agf_verify( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) return __this_address; - if (xfs_has_rmapbt(mp) && - (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) - return __this_address; - - if (xfs_has_rmapbt(mp) && - be32_to_cpu(agf->agf_rmap_blocks) > be32_to_cpu(agf->agf_length)) + if (xfs_has_lazysbcount(mp) && + be32_to_cpu(agf->agf_btreeblks) > agf_length) return __this_address; - /* - * during growfs operations, the perag is not fully initialised, - * so we can't use it for any useful checking. growfs ensures we can't - * use it by using uncached buffers that don't have the perag attached - * so we can detect and avoid this problem. - */ - if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) - return __this_address; + if (xfs_has_rmapbt(mp)) { + if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length) + return __this_address; - if (xfs_has_lazysbcount(mp) && - be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) - return __this_address; + if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > + mp->m_rmap_maxlevels) + return __this_address; + } - if (xfs_has_reflink(mp) && - be32_to_cpu(agf->agf_refcount_blocks) > - be32_to_cpu(agf->agf_length)) - return __this_address; + if (xfs_has_reflink(mp)) { + if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length) + return __this_address; - if (xfs_has_reflink(mp) && - (be32_to_cpu(agf->agf_refcount_level) < 1 || - be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) - return __this_address; + if (be32_to_cpu(agf->agf_refcount_level) < 1 || + be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels) + return __this_address; + } return NULL; @@ -3059,7 +3223,7 @@ xfs_alloc_vextent( { xfs_agblock_t agsize; /* allocation group size */ int error; - int flags; /* XFS_ALLOC_FLAG_... locking flags */ + uint32_t alloc_flags; /* XFS_ALLOC_FLAG_... locking flags */ struct xfs_mount *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ @@ -3112,7 +3276,7 @@ xfs_alloc_vextent( break; } args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); - if ((error = xfs_alloc_ag_vextent(args))) + if ((error = xfs_alloc_ag_vextent(args, 0))) goto error0; break; case XFS_ALLOCTYPE_START_BNO: @@ -3141,13 +3305,13 @@ xfs_alloc_vextent( args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->type = XFS_ALLOCTYPE_THIS_AG; sagno = 0; - flags = 0; + alloc_flags = 0; } else { /* * Start with the given allocation group. */ args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); - flags = XFS_ALLOC_FLAG_TRYLOCK; + alloc_flags = XFS_ALLOC_FLAG_TRYLOCK; } /* * Loop over allocation groups twice; first time with @@ -3155,7 +3319,7 @@ xfs_alloc_vextent( */ for (;;) { args->pag = xfs_perag_get(mp, args->agno); - error = xfs_alloc_fix_freelist(args, flags); + error = xfs_alloc_fix_freelist(args, alloc_flags); if (error) { trace_xfs_alloc_vextent_nofix(args); goto error0; @@ -3164,7 +3328,8 @@ xfs_alloc_vextent( * If we get a buffer back then the allocation will fly. */ if (args->agbp) { - if ((error = xfs_alloc_ag_vextent(args))) + if ((error = xfs_alloc_ag_vextent(args, + alloc_flags))) goto error0; break; } @@ -3195,13 +3360,13 @@ xfs_alloc_vextent( * or switch to non-trylock mode. */ if (args->agno == sagno) { - if (flags == 0) { + if (alloc_flags == 0) { args->agbno = NULLAGBLOCK; trace_xfs_alloc_vextent_allfailed(args); break; } - flags = 0; + alloc_flags = 0; if (type == XFS_ALLOCTYPE_START_BNO) { args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); @@ -3449,3 +3614,20 @@ xfs_agfl_walk( return 0; } + +int __init +xfs_extfree_intent_init_cache(void) +{ + xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent", + sizeof(struct xfs_extent_free_item), + 0, 0, NULL); + + return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_extfree_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_extfree_item_cache); + xfs_extfree_item_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 9cf9b4b593ca164ec0375225043f18f0ac0a5493..6ae1ade607cd64d85a532eae296d2f71027314f2 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -38,11 +38,12 @@ typedef unsigned int xfs_alloctype_t; /* * Flags for xfs_alloc_fix_freelist. */ -#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ -#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ -#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ -#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ -#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ +#define XFS_ALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */ +#define XFS_ALLOC_FLAG_FREEING (1U << 1) /* indicate caller is freeing extents*/ +#define XFS_ALLOC_FLAG_NORMAP (1U << 2) /* don't modify the rmapbt */ +#define XFS_ALLOC_FLAG_NOSHRINK (1U << 3) /* don't shrink the freelist */ +#define XFS_ALLOC_FLAG_CHECK (1U << 4) /* test only, don't modify args */ +#define XFS_ALLOC_FLAG_TRYFLUSH (1U << 5) /* don't wait in busy extent flush */ /* * Argument structure for xfs_alloc routines. @@ -211,7 +212,7 @@ int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t, struct xfs_buf *, struct xfs_owner_info *); -int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); +int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags); int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **agbp); @@ -246,4 +247,42 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } +int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, + xfs_filblks_t len, const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type, bool skip_discard); + +/* + * List of extents to be free "later". + * The list is kept sorted on xbf_startblock. + */ +struct xfs_extent_free_item { + struct list_head xefi_list; + uint64_t xefi_owner; + xfs_fsblock_t xefi_startblock;/* starting fs block number */ + xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ + unsigned int xefi_flags; + enum xfs_ag_resv_type xefi_agresv; +}; + +#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ +#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ +#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ + +static inline int +xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type) +{ + return __xfs_free_extent_later(tp, bno, len, oinfo, type, false); +} + + +extern struct kmem_cache *xfs_extfree_item_cache; + +int __init xfs_extfree_intent_init_cache(void); +void xfs_extfree_intent_destroy_cache(void); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 4452b42559176d0e40ecf664df77bc3b65223f70..613a1743fdd596b152579d9eaaadfe1e7791dcfe 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -480,7 +480,7 @@ xfs_allocbt_init_common( ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); + cur = kmem_cache_zalloc(xfs_btree_cur_cache, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 852019a7feda78634d5a6a2083ffe2385ec455b3..70e30fdbedfdb824712372fb366c0372958101b0 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -2251,8 +2251,6 @@ xfs_attr3_leaf_unbalance( trace_xfs_attr_leaf_unbalance(state->args); - drop_leaf = drop_blk->bp->b_addr; - save_leaf = save_blk->bp->b_addr; xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf); xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf); entry = xfs_attr3_leaf_entryp(drop_leaf); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 34fe4aed0ba8692f50a83e164f174df1bd12900a..cecfa5280b51a417c16191e7b2d9b5026860353b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -36,8 +36,7 @@ #include "xfs_icache.h" #include "xfs_iomap.h" - -kmem_zone_t *xfs_bmap_free_item_zone; +struct kmem_cache *xfs_bmap_intent_cache; /* * Miscellaneous helper functions @@ -518,56 +517,6 @@ xfs_bmap_validate_ret( #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap) do { } while (0) #endif /* DEBUG */ -/* - * bmap free list manipulation functions - */ - -/* - * Add the extent to the list of extents to be free at transaction end. - * The list is maintained sorted (by block number). - */ -void -__xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo, - bool skip_discard) -{ - struct xfs_extent_free_item *new; /* new element */ -#ifdef DEBUG - struct xfs_mount *mp = tp->t_mountp; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - - ASSERT(bno != NULLFSBLOCK); - ASSERT(len > 0); - ASSERT(len <= MAXEXTLEN); - ASSERT(!isnullstartblock(bno)); - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - ASSERT(agno < mp->m_sb.sb_agcount); - ASSERT(agbno < mp->m_sb.sb_agblocks); - ASSERT(len < mp->m_sb.sb_agblocks); - ASSERT(agbno + len <= mp->m_sb.sb_agblocks); -#endif - ASSERT(xfs_bmap_free_item_zone != NULL); - - new = kmem_cache_alloc(xfs_bmap_free_item_zone, - GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = bno; - new->xefi_blockcount = (xfs_extlen_t)len; - if (oinfo) - new->xefi_oinfo = *oinfo; - else - new->xefi_oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; - new->xefi_skip_discard = skip_discard; - trace_xfs_bmap_free_defer(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, - XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); -} - /* * Inode fork format manipulation functions */ @@ -621,8 +570,13 @@ xfs_bmap_btree_to_extents( cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_bmap_add_free(cur->bc_tp, cbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, + XFS_AG_RESV_NONE); + if (error) + return error; + ip->i_d.di_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); @@ -4245,7 +4199,7 @@ xfs_bmapi_convert_unwritten( return 0; } -static inline xfs_extlen_t +xfs_extlen_t xfs_bmapi_minleft( struct xfs_trans *tp, struct xfs_inode *ip, @@ -5239,10 +5193,13 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - __xfs_bmap_add_free(tp, del->br_startblock, + error = __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, - (bflags & XFS_BMAPI_NODISCARD) || - del->br_state == XFS_EXT_UNWRITTEN); + XFS_AG_RESV_NONE, + ((bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN)); + if (error) + goto done; } } @@ -6137,7 +6094,7 @@ __xfs_bmap_add( bmap->br_blockcount, bmap->br_state); - bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS); + bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&bi->bi_list); bi->bi_type = type; bi->bi_owner = ip; @@ -6259,3 +6216,20 @@ xfs_bmap_validate_extent( return __this_address; return NULL; } + +int __init +xfs_bmap_intent_init_cache(void) +{ + xfs_bmap_intent_cache = kmem_cache_create("xfs_bmap_intent", + sizeof(struct xfs_bmap_intent), + 0, 0, NULL); + + return xfs_bmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_bmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_bmap_intent_cache); + xfs_bmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 6747e97a7949014b789981b5ef4a93648183c50c..39848a18700ca70c7715328971967c05b78c6c34 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -13,8 +13,6 @@ struct xfs_inode; struct xfs_mount; struct xfs_trans; -extern kmem_zone_t *xfs_bmap_free_item_zone; - /* * Argument structure for xfs_bmap_alloc. */ @@ -44,19 +42,6 @@ struct xfs_bmalloca { int flags; }; -/* - * List of extents to be free "later". - * The list is kept sorted on xbf_startblock. - */ -struct xfs_extent_free_item -{ - xfs_fsblock_t xefi_startblock;/* starting fs block number */ - xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ - bool xefi_skip_discard; - struct list_head xefi_list; - struct xfs_owner_info xefi_oinfo; /* extent owner */ -}; - #define XFS_BMAP_MAX_NMAP 4 /* @@ -189,9 +174,6 @@ int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version); void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); -void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, - xfs_filblks_t len, const struct xfs_owner_info *oinfo, - bool skip_discard); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); @@ -239,16 +221,8 @@ int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, int *logflagsp); - -static inline void -xfs_bmap_add_free( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo) -{ - __xfs_bmap_add_free(tp, bno, len, oinfo, false); -} +xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct xfs_inode *ip, + int fork); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, @@ -258,8 +232,8 @@ enum xfs_bmap_intent_type { struct xfs_bmap_intent { struct list_head bi_list; enum xfs_bmap_intent_type bi_type; - struct xfs_inode *bi_owner; int bi_whichfork; + struct xfs_inode *bi_owner; struct xfs_bmbt_irec bi_bmap; }; @@ -291,4 +265,9 @@ int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, int flags); +extern struct kmem_cache *xfs_bmap_intent_cache; + +int __init xfs_bmap_intent_init_cache(void); +void xfs_bmap_intent_destroy_cache(void); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 64a3cbcb22a237c86d8482b4e4856245ec4c3e53..a6f7cd114222b3ca19882610cd04f23dc146fe26 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -211,18 +211,16 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); args.type = XFS_ALLOCTYPE_START_BNO; + /* - * Make sure there is sufficient room left in the AG to - * complete a full tree split for an extent insert. If - * we are converting the middle part of an extent then - * we may need space for two tree splits. - * - * We are relying on the caller to make the correct block - * reservation for this operation to succeed. If the - * reservation amount is insufficient then we may fail a - * block allocation here and corrupt the filesystem. + * If we are coming here from something like unwritten extent + * conversion, there has been no data extent allocation already + * done, so we have to ensure that we attempt to locate the + * entire set of bmbt allocations in the same AG, as + * xfs_bmapi_write() would have reserved. */ - args.minleft = args.tp->t_blk_res; + args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur->bc_ino.ip, + cur->bc_ino.whichfork); } else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) { args.type = XFS_ALLOCTYPE_START_BNO; } else { @@ -246,6 +244,7 @@ xfs_bmbt_alloc_block( * successful activate the lowspace algorithm. */ args.fsbno = 0; + args.minleft = 0; args.type = XFS_ALLOCTYPE_FIRST_AG; error = xfs_alloc_vextent(&args); if (error) @@ -284,9 +283,13 @@ xfs_bmbt_free_block( struct xfs_trans *tp = cur->bc_tp; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); struct xfs_owner_info oinfo; + int error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - xfs_bmap_add_free(cur->bc_tp, fsbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, + XFS_AG_RESV_NONE); + if (error) + return error; ip->i_d.di_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -552,7 +555,7 @@ xfs_bmbt_init_cursor( struct xfs_btree_cur *cur; ASSERT(whichfork != XFS_COW_FORK); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); + cur = kmem_cache_zalloc(xfs_btree_cur_cache, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 145ee148a6e0e8ff76974b483b1169e65d70b6b6..04ab18e209fb5f653a8ae79549d985153cfc0212 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -25,7 +25,7 @@ /* * Cursor allocation zone. */ -kmem_zone_t *xfs_btree_cur_zone; +struct kmem_cache *xfs_btree_cur_cache; /* * Btree magic numbers. @@ -454,7 +454,7 @@ xfs_btree_del_cursor( xfs_is_shutdown(cur->bc_mp) || error != 0); if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) kmem_free(cur->bc_ops); - kmem_cache_free(xfs_btree_cur_zone, cur); + kmem_cache_free(xfs_btree_cur_cache, cur); } /* diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index ba11d2a4b686e4ff30ae0b06acd28929dcc0a9de..d172046ae833fca8cb02c6163ff47d048e6c4eba 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -12,7 +12,7 @@ struct xfs_mount; struct xfs_trans; struct xfs_ifork; -extern kmem_zone_t *xfs_btree_cur_zone; +extern struct kmem_cache *xfs_btree_cur_cache; /* * Generic key, ptr and record wrapper structures. diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 10e93c9ce827f6c5be5bef7907cab8e19a2dcaee..f1258e294ead5fd270ab36380afa599dfac01ec3 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -72,7 +72,7 @@ STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *save_blk); -kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ +struct kmem_cache *xfs_da_state_cache; /* anchor for dir/attr state */ /* * Allocate a dir-state structure. @@ -84,7 +84,7 @@ xfs_da_state_alloc( { struct xfs_da_state *state; - state = kmem_cache_zalloc(xfs_da_state_zone, GFP_NOFS | __GFP_NOFAIL); + state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL); state->args = args; state->mp = args->dp->i_mount; return state; @@ -113,7 +113,7 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef DEBUG memset((char *)state, 0, sizeof(*state)); #endif /* DEBUG */ - kmem_cache_free(xfs_da_state_zone, state); + kmem_cache_free(xfs_da_state_cache, state); } static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork) diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index adb3c4419051a3355de385fd222040a3892a87e7..6902fdda184e5174ca9216c5ff2f460ac93a1b0e 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -9,7 +9,6 @@ struct xfs_inode; struct xfs_trans; -struct zone; /* * Directory/attribute geometry information. There will be one of these for each @@ -238,6 +237,6 @@ void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp, struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from); -extern struct kmem_zone *xfs_da_state_zone; +extern struct kmem_cache *xfs_da_state_cache; #endif /* __XFS_DA_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 168b8798625f9736bcc5b9f820d1cad727958149..3da95a9515044e8fbc1ff50927e8a078073b7462 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -18,6 +18,12 @@ #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_log.h" +#include "xfs_rmap.h" +#include "xfs_refcount.h" +#include "xfs_bmap.h" +#include "xfs_alloc.h" + +static struct kmem_cache *xfs_defer_pending_cache; /* * Deferred Operations in XFS @@ -349,7 +355,7 @@ xfs_defer_cancel_list( ops->cancel_item(pwi); } ASSERT(dfp->dfp_count == 0); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); } } @@ -446,7 +452,7 @@ xfs_defer_finish_one( /* Done with the dfp, free it. */ list_del(&dfp->dfp_list); - kmem_free(dfp); + kmem_cache_free(xfs_defer_pending_cache, dfp); out: if (ops->finish_cleanup) ops->finish_cleanup(tp, state, error); @@ -580,8 +586,8 @@ xfs_defer_add( dfp = NULL; } if (!dfp) { - dfp = kmem_alloc(sizeof(struct xfs_defer_pending), - KM_NOFS); + dfp = kmem_cache_zalloc(xfs_defer_pending_cache, + GFP_NOFS | __GFP_NOFAIL); dfp->dfp_type = type; dfp->dfp_intent = NULL; dfp->dfp_done = NULL; @@ -750,3 +756,59 @@ xfs_defer_ops_continue( kmem_free(dfc); } + +static inline int __init +xfs_defer_init_cache(void) +{ + xfs_defer_pending_cache = kmem_cache_create("xfs_defer_pending", + sizeof(struct xfs_defer_pending), + 0, 0, NULL); + + return xfs_defer_pending_cache != NULL ? 0 : -ENOMEM; +} + +static inline void +xfs_defer_destroy_cache(void) +{ + kmem_cache_destroy(xfs_defer_pending_cache); + xfs_defer_pending_cache = NULL; +} + +/* Set up caches for deferred work items. */ +int __init +xfs_defer_init_item_caches(void) +{ + int error; + + error = xfs_defer_init_cache(); + if (error) + return error; + error = xfs_rmap_intent_init_cache(); + if (error) + goto err; + error = xfs_refcount_intent_init_cache(); + if (error) + goto err; + error = xfs_bmap_intent_init_cache(); + if (error) + goto err; + error = xfs_extfree_intent_init_cache(); + if (error) + goto err; + + return 0; +err: + xfs_defer_destroy_item_caches(); + return error; +} + +/* Destroy all the deferred work item caches, if they've been allocated. */ +void +xfs_defer_destroy_item_caches(void) +{ + xfs_extfree_intent_destroy_cache(); + xfs_bmap_intent_destroy_cache(); + xfs_refcount_intent_destroy_cache(); + xfs_rmap_intent_destroy_cache(); + xfs_defer_destroy_cache(); +} diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index df0312679ca74bb50c43e9afe199d97bcffcf285..edc23cc324de143f1913ea114f4379169ca69de7 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -101,4 +101,7 @@ void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, struct xfs_inode **captured_ipp); void xfs_defer_ops_release(struct xfs_mount *mp, struct xfs_defer_capture *d); +int __init xfs_defer_init_item_caches(void); +void xfs_defer_destroy_item_caches(void); + #endif /* __XFS_DEFER_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index b8b7f4d143295f0be8b828749169ad180bd3b34b..a1319f9c8ed83563b7a7203ff3e7d7de3de9474a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -768,6 +768,8 @@ xfs_ialloc_ag_alloc( args.alignment = args.mp->m_sb.sb_spino_align; args.prod = 1; + /* Allow space for the inode btree to split */ + args.minleft = igeo->inobt_maxlevels; args.minlen = igeo->ialloc_min_blks; args.maxlen = args.minlen; @@ -1851,7 +1853,7 @@ xfs_dialloc( * might be sparse and only free the regions that are allocated as part of the * chunk. */ -STATIC void +static int xfs_difree_inode_chunk( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -1868,10 +1870,10 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - M_IGEO(mp)->ialloc_blks, - &XFS_RMAP_OINFO_INODES); - return; + return xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, sagbno), + M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, + XFS_AG_RESV_NONE); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -1888,6 +1890,8 @@ xfs_difree_inode_chunk( XFS_INOBT_HOLEMASK_BITS); nextbit = startidx + 1; while (startidx < XFS_INOBT_HOLEMASK_BITS) { + int error; + nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS, nextbit); /* @@ -1913,8 +1917,11 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk, &XFS_RMAP_OINFO_INODES); + error = xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE); + if (error) + return error; /* reset range to current bit and carry on... */ startidx = endidx = nextbit; @@ -1922,6 +1929,7 @@ xfs_difree_inode_chunk( next: nextbit++; } + return 0; } STATIC int @@ -1981,7 +1989,11 @@ xfs_difree_inobt( */ off = agino - rec.ir_startino; ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK); - ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off))); + + if (XFS_IS_CORRUPT(mp, rec.ir_free & XFS_INOBT_MASK(off))) { + error = -EFSCORRUPTED; + goto error0; + } /* * Mark the inode free & increment the count. */ @@ -2021,7 +2033,9 @@ xfs_difree_inobt( goto error0; } - xfs_difree_inode_chunk(tp, agno, &rec); + error = xfs_difree_inode_chunk(tp, agno, &rec); + if (error) + goto error0; } else { xic->deleted = false; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index e554b58abc65b8b252b12b19b1c5a13ed4712841..5150010ca4c60077743da183bb9ebf6d605d543c 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -154,9 +154,11 @@ __xfs_inobt_free_block( struct xfs_buf *bp, enum xfs_ag_resv_type resv) { + xfs_fsblock_t fsbno; + xfs_inobt_mod_blockcount(cur, -1); - return xfs_free_extent(cur->bc_tp, - XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1, + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_INOBT, resv); } @@ -432,7 +434,7 @@ xfs_inobt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); + cur = kmem_cache_zalloc(xfs_btree_cur_cache, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_btnum = btnum; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 11000cb8129a13d3b0bc076f50b0f1e3d4a245e8..26a1696e1bcef031ddfba51cc50d2918680925a6 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -25,7 +25,7 @@ #include "xfs_attr_leaf.h" #include "xfs_types.h" -kmem_zone_t *xfs_ifork_zone; +struct kmem_cache *xfs_ifork_cache; void xfs_init_local_fork( @@ -688,7 +688,7 @@ xfs_ifork_init_cow( if (ip->i_cowfp) return; - ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_zone, + ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_cache, GFP_NOFS | __GFP_NOFAIL); ip->i_cowfp->if_flags = XFS_IFEXTENTS; ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 64465472f0f0425cb3dedf53d86dd3ec42251314..fe2324c53855fa6ab8f903e86a307719baf5ce61 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -211,7 +211,7 @@ static inline bool xfs_iext_peek_prev_extent(struct xfs_ifork *ifp, xfs_iext_get_extent((ifp), (ext), (got)); \ xfs_iext_next((ifp), (ext))) -extern struct kmem_zone *xfs_ifork_zone; +extern struct kmem_cache *xfs_ifork_cache; extern void xfs_ifork_init_cow(struct xfs_inode *ip); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 1ab15910e40217df56a9eea1a67dda8319405f63..4191548b4736239336a4e20795fae5c5f1816c0d 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -23,6 +23,8 @@ #include "xfs_refcount.h" #include "xfs_rmap.h" +struct kmem_cache *xfs_refcount_intent_cache; + /* Allowable refcount adjustment amounts. */ enum xfs_refc_adjust_op { XFS_REFCOUNT_ADJUST_INCREASE = 1, @@ -915,8 +917,7 @@ xfs_refcount_adjust_extents( struct xfs_btree_cur *cur, xfs_agblock_t *agbno, xfs_extlen_t *aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { struct xfs_refcount_irec ext, tmp; int error; @@ -973,8 +974,11 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.agno, tmp.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, - tmp.rc_blockcount, oinfo); + error = xfs_free_extent_later(cur->bc_tp, fsbno, + tmp.rc_blockcount, NULL, + XFS_AG_RESV_NONE); + if (error) + goto out_error; } (*agbno) += tmp.rc_blockcount; @@ -1018,8 +1022,11 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.agno, ext.rc_startblock); - xfs_bmap_add_free(cur->bc_tp, fsbno, ext.rc_blockcount, - oinfo); + error = xfs_free_extent_later(cur->bc_tp, fsbno, + ext.rc_blockcount, NULL, + XFS_AG_RESV_NONE); + if (error) + goto out_error; } skip: @@ -1047,8 +1054,7 @@ xfs_refcount_adjust( xfs_extlen_t aglen, xfs_agblock_t *new_agbno, xfs_extlen_t *new_aglen, - enum xfs_refc_adjust_op adj, - struct xfs_owner_info *oinfo) + enum xfs_refc_adjust_op adj) { bool shape_changed; int shape_changes = 0; @@ -1091,8 +1097,7 @@ xfs_refcount_adjust( cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ - error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, - adj, oinfo); + error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); if (error) goto out_error; @@ -1191,12 +1196,12 @@ xfs_refcount_finish_one( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_INCREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_INCREASE); *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_DECREASE, NULL); + new_len, XFS_REFCOUNT_ADJUST_DECREASE); *new_fsb = XFS_AGB_TO_FSB(mp, agno, new_agbno); break; case XFS_REFCOUNT_ALLOC_COW: @@ -1241,8 +1246,8 @@ __xfs_refcount_add( type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), blockcount); - ri = kmem_alloc(sizeof(struct xfs_refcount_intent), - KM_NOFS); + ri = kmem_cache_alloc(xfs_refcount_intent_cache, + GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_startblock = startblock; @@ -1747,7 +1752,11 @@ xfs_refcount_recover_cow_leftovers( rr->rr_rrec.rc_blockcount); /* Free the block. */ - xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); + error = xfs_free_extent_later(tp, fsb, + rr->rr_rrec.rc_blockcount, NULL, + XFS_AG_RESV_NONE); + if (error) + goto out_trans; error = xfs_trans_commit(tp); if (error) @@ -1787,3 +1796,20 @@ xfs_refcount_has_record( return xfs_btree_has_record(cur, &low, &high, exists); } + +int __init +xfs_refcount_intent_init_cache(void) +{ + xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent", + sizeof(struct xfs_refcount_intent), + 0, 0, NULL); + + return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_refcount_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_refcount_intent_cache); + xfs_refcount_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 209795539c8dd012462e850fe721f5bca4888fea..d86a1afca0b703a109c8b612fb92fb5d273b6386 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -25,8 +25,8 @@ enum xfs_refcount_intent_type { struct xfs_refcount_intent { struct list_head ri_list; enum xfs_refcount_intent_type ri_type; - xfs_fsblock_t ri_startblock; xfs_extlen_t ri_blockcount; + xfs_fsblock_t ri_startblock; }; void xfs_refcount_increase_extent(struct xfs_trans *tp, @@ -76,4 +76,9 @@ extern void xfs_refcount_btrec_to_irec(union xfs_btree_rec *rec, extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); +extern struct kmem_cache *xfs_refcount_intent_cache; + +int __init xfs_refcount_intent_init_cache(void); +void xfs_refcount_intent_destroy_cache(void); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 679f5d9763da5ef5e6ce6297bd939bff1788f363..3e0d82f1802bb44481da4e7a6a4e5611ff3f2528 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -103,18 +103,13 @@ xfs_refcountbt_free_block( struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); - int error; trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.agno, XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC, - XFS_AG_RESV_METADATA); - if (error) - return error; - - return error; + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); } STATIC int @@ -323,7 +318,7 @@ xfs_refcountbt_init_common( ASSERT(agno != NULLAGNUMBER); ASSERT(agno < mp->m_sb.sb_agcount); - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); + cur = kmem_cache_zalloc(xfs_btree_cur_cache, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; cur->bc_btnum = XFS_BTNUM_REFC; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index e6caa25c9e18d4c350e6c60ebb4c7af3bbd23a1f..64bfb4f319be53a2eb1266f4c6f2ed98dbf7b139 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -22,6 +22,8 @@ #include "xfs_error.h" #include "xfs_inode.h" +struct kmem_cache *xfs_rmap_intent_cache; + /* * Lookup the first record less than or equal to [bno, len, owner, offset] * in the btree given by cur. @@ -2487,7 +2489,7 @@ __xfs_rmap_add( bmap->br_blockcount, bmap->br_state); - ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_NOFS); + ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; @@ -2781,3 +2783,20 @@ const struct xfs_owner_info XFS_RMAP_OINFO_REFC = { const struct xfs_owner_info XFS_RMAP_OINFO_COW = { .oi_owner = XFS_RMAP_OWN_COW, }; + +int __init +xfs_rmap_intent_init_cache(void) +{ + xfs_rmap_intent_cache = kmem_cache_create("xfs_rmap_intent", + sizeof(struct xfs_rmap_intent), + 0, 0, NULL); + + return xfs_rmap_intent_cache != NULL ? 0 : -ENOMEM; +} + +void +xfs_rmap_intent_destroy_cache(void) +{ + kmem_cache_destroy(xfs_rmap_intent_cache); + xfs_rmap_intent_cache = NULL; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index abe633403fd152db0eecb4d31e25caaa5c5dd020..9befe4894359ad4755eaf130e9848d40c9d8bf45 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -156,8 +156,8 @@ enum xfs_rmap_intent_type { struct xfs_rmap_intent { struct list_head ri_list; enum xfs_rmap_intent_type ri_type; - uint64_t ri_owner; int ri_whichfork; + uint64_t ri_owner; struct xfs_bmbt_irec ri_bmap; }; @@ -212,4 +212,9 @@ extern const struct xfs_owner_info XFS_RMAP_OINFO_INODES; extern const struct xfs_owner_info XFS_RMAP_OINFO_REFC; extern const struct xfs_owner_info XFS_RMAP_OINFO_COW; +extern struct kmem_cache *xfs_rmap_intent_cache; + +int __init xfs_rmap_intent_init_cache(void); +void xfs_rmap_intent_destroy_cache(void); + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 19cb2c11a3a344a14e41362449161e77716be79d..97c519ede1d63b6b63e1e6807c243528ffe01743 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -454,7 +454,7 @@ xfs_rmapbt_init_common( { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); + cur = kmem_cache_zalloc(xfs_btree_cur_cache, GFP_NOFS | __GFP_NOFAIL); cur->bc_tp = tp; cur->bc_mp = mp; /* Overlapping btree; 2 keys per pointer. */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index af313e9bf55ec95add515610862589d77aead0fd..c099ccf2787dbdefeed8584389c51ac40ec146b3 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -419,7 +419,6 @@ xfs_validate_sb_common( sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || - sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || @@ -437,6 +436,61 @@ xfs_validate_sb_common( return -EFSCORRUPTED; } + /* + * Logs that are too large are not supported at all. Reject them + * outright. Logs that are too small are tolerated on v4 filesystems, + * but we can only check that when mounting the log. Hence we skip + * those checks here. + */ + if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) { + xfs_notice(mp, + "Log size 0x%x blocks too large, maximum size is 0x%llx blocks", + sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS); + return -EFSCORRUPTED; + } + + if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) { + xfs_warn(mp, + "log size 0x%llx bytes too large, maximum size is 0x%llx bytes", + XFS_FSB_TO_B(mp, sbp->sb_logblocks), + XFS_MAX_LOG_BYTES); + return -EFSCORRUPTED; + } + + /* + * Do not allow filesystems with corrupted log sector or stripe units to + * be mounted. We cannot safely size the iclogs or write to the log if + * the log stripe unit is not valid. + */ + if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) { + if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) { + xfs_notice(mp, + "log sector size in bytes/log2 (0x%x/0x%x) must match", + sbp->sb_logsectsize, 1U << sbp->sb_logsectlog); + return -EFSCORRUPTED; + } + } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) { + xfs_notice(mp, + "log sector size in bytes/log2 (0x%x/0x%x) are not zero", + sbp->sb_logsectsize, sbp->sb_logsectlog); + return -EFSCORRUPTED; + } + + if (sbp->sb_logsunit > 1) { + if (sbp->sb_logsunit % sbp->sb_blocksize) { + xfs_notice(mp, + "log stripe unit 0x%x bytes must be a multiple of block size", + sbp->sb_logsunit); + return -EFSCORRUPTED; + } + if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) { + xfs_notice(mp, + "log stripe unit 0x%x bytes over maximum size (0x%x bytes)", + sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE); + return -EFSCORRUPTED; + } + } + /* Validate the realtime geometry; stolen from xfs_repair */ if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 86cb4b764897fce5588c33e9e5f5f624dc2e10b6..19eabad06f92052b5f440594656eb09eed984bc2 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -61,6 +61,29 @@ xfs_verify_fsbno( return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } +/* + * Verify that a data device extent is fully contained inside the filesystem, + * does not cross an AG boundary, and does not point at static metadata. + */ +bool +xfs_verify_fsbext( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + xfs_fsblock_t len) +{ + if (fsbno + len <= fsbno) + return false; + + if (!xfs_verify_fsbno(mp, fsbno)) + return false; + + if (!xfs_verify_fsbno(mp, fsbno + len - 1)) + return false; + + return XFS_FSB_TO_AGNO(mp, fsbno) == + XFS_FSB_TO_AGNO(mp, fsbno + len - 1); +} + /* Calculate the first and last possible inode number in an AG. */ void xfs_agino_range( diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 1ce06173c2f55fb9c5dc933af5f4435aea1b66b4..f76fa8498749864030adf7f3e9d8e2d56347a50b 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -186,6 +186,8 @@ bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, + xfs_fsblock_t len); void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t *first, xfs_agino_t *last); bool xfs_verify_agino(struct xfs_mount *mp, xfs_agnumber_t agno, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index fe8c19814f1d7750ba798452dd4e7d7324ef9760..ef7db9defef138ecbc463b9272d33017de997653 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -150,9 +150,8 @@ xfs_end_ioend( if (unlikely(error)) { if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); - xfs_bmap_punch_delalloc_range(ip, - XFS_B_TO_FSBT(mp, offset), - XFS_B_TO_FSB(mp, size)); + xfs_bmap_punch_delalloc_range(ip, offset, + offset + size); } goto done; } @@ -501,27 +500,25 @@ xfs_prepare_ioend( } /* - * If the page has delalloc blocks on it, we need to punch them out before we - * invalidate the page. If we don't, we leave a stale delalloc mapping on the - * inode that can trip up a later direct I/O read operation on the same region. + * If the folio has delalloc blocks on it, the caller is asking us to punch them + * out. If we don't, we can leave a stale delalloc mapping covered by a clean + * page that needs to be dirtied again before the delalloc mapping can be + * converted. This stale delalloc mapping can trip up a later direct I/O read + * operation on the same region. * * We prevent this by truncating away the delalloc regions on the page. Because * they are delalloc, we can do this without needing a transaction. Indeed - if * we get ENOSPC errors, we have to be able to do this truncation without a - * transaction as there is no space left for block reservation (typically why we - * see a ENOSPC in writeback). + * transaction as there is no space left for block reservation (typically why + * we see a ENOSPC in writeback). */ static void xfs_discard_page( struct page *page, loff_t fileoff) { - struct inode *inode = page->mapping->host; - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(page->mapping->host); struct xfs_mount *mp = ip->i_mount; - unsigned int pageoff = offset_in_page(fileoff); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff); - xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff); int error; if (xfs_is_shutdown(mp)) @@ -531,8 +528,13 @@ xfs_discard_page( "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", page, ip->i_ino, fileoff); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - i_blocks_per_page(inode, page) - pageoff_fsb); + /* + * The end of the punch range is always the offset of the the first + * byte of the next page. Hence the end offset is only dependent on the + * page itself and not the start offset that is passed in. + */ + error = xfs_bmap_punch_delalloc_range(ip, fileoff, + page_offset(page) + thp_size(page)); if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index fb26c6123a2147d7141746ab4bbbd4cfc840ca1e..a68ed8f36b479ccf89c5f5e0740467dd92c46a47 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -26,8 +26,8 @@ #include "xfs_log_recover.h" #include "xfs_quota.h" -kmem_zone_t *xfs_bui_zone; -kmem_zone_t *xfs_bud_zone; +struct kmem_cache *xfs_bui_cache; +struct kmem_cache *xfs_bud_cache; static const struct xfs_item_ops xfs_bui_item_ops; @@ -41,7 +41,7 @@ xfs_bui_item_free( struct xfs_bui_log_item *buip) { kmem_free(buip->bui_item.li_lv_shadow); - kmem_cache_free(xfs_bui_zone, buip); + kmem_cache_free(xfs_bui_cache, buip); } /* @@ -140,7 +140,7 @@ xfs_bui_init( { struct xfs_bui_log_item *buip; - buip = kmem_cache_zalloc(xfs_bui_zone, GFP_KERNEL | __GFP_NOFAIL); + buip = kmem_cache_zalloc(xfs_bui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops); buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS; @@ -201,7 +201,7 @@ xfs_bud_item_release( xfs_bui_release(budp->bud_buip); kmem_free(budp->bud_item.li_lv_shadow); - kmem_cache_free(xfs_bud_zone, budp); + kmem_cache_free(xfs_bud_cache, budp); } static const struct xfs_item_ops xfs_bud_item_ops = { @@ -218,7 +218,7 @@ xfs_trans_get_bud( { struct xfs_bud_log_item *budp; - budp = kmem_cache_zalloc(xfs_bud_zone, GFP_KERNEL | __GFP_NOFAIL); + budp = kmem_cache_zalloc(xfs_bud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); budp->bud_buip = buip; @@ -387,7 +387,7 @@ xfs_bmap_update_finish_item( bmap->bi_bmap.br_blockcount = count; return -EAGAIN; } - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); return error; } @@ -407,7 +407,7 @@ xfs_bmap_update_cancel_item( struct xfs_bmap_intent *bmap; bmap = container_of(item, struct xfs_bmap_intent, bi_list); - kmem_free(bmap); + kmem_cache_free(xfs_bmap_intent_cache, bmap); } const struct xfs_defer_op_type xfs_bmap_update_defer_type = { diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index b9be62f8bd5215d26e846ee9ab5fca5f46222001..3fafd3881a0bb2f372694e4680c7e28a2dc3ca13 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -25,7 +25,7 @@ /* kernel only BUI/BUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -65,7 +65,7 @@ struct xfs_bud_log_item { struct xfs_bud_log_format bud_format; }; -extern struct kmem_zone *xfs_bui_zone; -extern struct kmem_zone *xfs_bud_zone; +extern struct kmem_cache *xfs_bui_cache; +extern struct kmem_cache *xfs_bud_cache; #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 85d3804ebbe7dc1cfa35b93759ef3ea981440e9f..edf62092125c62a932fe846f8e3492e464b86ccd 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -551,11 +551,13 @@ xfs_getbmap( int xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, - xfs_fileoff_t start_fsb, - xfs_fileoff_t length) + xfs_off_t start_byte, + xfs_off_t end_byte) { + struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = &ip->i_df; - xfs_fileoff_t end_fsb = start_fsb + length; + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; int error = 0; @@ -568,7 +570,7 @@ xfs_bmap_punch_delalloc_range( while (got.br_startoff + got.br_blockcount > start_fsb) { del = got; - xfs_trim_extent(&del, start_fsb, length); + xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb); /* * A delete can push the cursor forward. Step back to the @@ -798,9 +800,6 @@ xfs_alloc_file_space( rblocks = 0; } - /* - * Allocate and setup the transaction. - */ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, rblocks, false, &tp); if (error) @@ -817,9 +816,9 @@ xfs_alloc_file_space( if (error) goto error; - /* - * Complete the transaction - */ + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 9f993168b55b8b2b8063e7211e13f4158df55fa0..ca9000d38b90ec34f59de199ca1ab7e2c346f785 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) #endif /* CONFIG_XFS_RT */ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, - xfs_fileoff_t start_fsb, xfs_fileoff_t length); + xfs_off_t start_byte, xfs_off_t end_byte); struct kgetbmap { __s64 bmv_offset; /* file offset of segment in blocks */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 0d3d057c4af44306917a4a483e5f5296fbf27563..af5dc20c5e27cd76c4f7f3ca941037c0761bd584 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -21,7 +21,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" -static kmem_zone_t *xfs_buf_zone; +static struct kmem_cache *xfs_buf_cache; #define xb_to_gfp(flags) \ ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) @@ -224,7 +224,7 @@ _xfs_buf_alloc( int i; *bpp = NULL; - bp = kmem_cache_zalloc(xfs_buf_zone, GFP_NOFS | __GFP_NOFAIL); + bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL); /* * We don't want certain flags to appear in b_flags unless they are @@ -251,7 +251,7 @@ _xfs_buf_alloc( */ error = xfs_buf_get_maps(bp, nmaps); if (error) { - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); return error; } @@ -345,7 +345,7 @@ xfs_buf_free( kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); } /* @@ -1005,7 +1005,7 @@ xfs_buf_get_uncached( _xfs_buf_free_pages(bp); fail_free_buf: xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_cache, bp); fail: return error; } @@ -2344,12 +2344,12 @@ xfs_buf_delwri_pushbuf( int __init xfs_buf_init(void) { - xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, + xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_buf_zone) + if (!xfs_buf_cache) goto out; return 0; @@ -2361,7 +2361,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_cache_destroy(xfs_buf_zone); + kmem_cache_destroy(xfs_buf_cache); } void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 3a9e006b7220560f1a64fcc4c4d505c7a4131730..7826028a5861cf2a919e366cf273cc111b3554ed 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -24,7 +24,7 @@ #include "xfs_log_priv.h" -kmem_zone_t *xfs_buf_item_zone; +struct kmem_cache *xfs_buf_item_cache; static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) { @@ -715,7 +715,7 @@ xfs_buf_item_committed( trace_xfs_buf_item_committed(bip); - if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0) + if ((bip->bli_flags & XFS_BLI_KEEP_LSN) && lip->li_lsn != 0) return lip->li_lsn; return lsn; } @@ -786,7 +786,7 @@ xfs_buf_item_init( return 0; } - bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL); + bip = kmem_cache_zalloc(xfs_buf_item_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); bip->bli_buf = bp; @@ -807,7 +807,7 @@ xfs_buf_item_init( map_size = DIV_ROUND_UP(chunks, NBWORD); if (map_size > XFS_BLF_DATAMAP_SIZE) { - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); xfs_err(mp, "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!", map_size, @@ -984,7 +984,7 @@ xfs_buf_item_free( { xfs_buf_item_free_format(bip); kmem_free(bip->bli_item.li_lv_shadow); - kmem_cache_free(xfs_buf_item_zone, bip); + kmem_cache_free(xfs_buf_item_cache, bip); } /* diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 50aa0f5ef959030d02cbbf5d1258009ec052f3e7..30f55e133696144424ad988c48b25d9c6a913420 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -17,6 +17,12 @@ #define XFS_BLI_STALE_INODE 0x20 #define XFS_BLI_INODE_BUF 0x40 #define XFS_BLI_ORDERED 0x80 +#define XFS_BLI_GROW_SB_BUF 0x100 + +#define XFS_BLI_KEEP_LSN \ + (XFS_BLI_INODE_ALLOC_BUF | \ + XFS_BLI_GROW_SB_BUF) + #define XFS_BLI_FLAGS \ { XFS_BLI_HOLD, "HOLD" }, \ @@ -26,7 +32,8 @@ { XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \ { XFS_BLI_STALE_INODE, "STALE_INODE" }, \ { XFS_BLI_INODE_BUF, "INODE_BUF" }, \ - { XFS_BLI_ORDERED, "ORDERED" } + { XFS_BLI_ORDERED, "ORDERED" }, \ + { XFS_BLI_GROW_SB_BUF, "GROW_SB" } struct xfs_buf; @@ -71,6 +78,6 @@ static inline void xfs_buf_dquot_io_fail(struct xfs_buf *bp) void xfs_buf_iodone(struct xfs_buf *); bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); -extern kmem_zone_t *xfs_buf_item_zone; +extern struct kmem_cache *xfs_buf_item_cache; #endif /* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index 9cb79a494d06bf002745b35c32bb31feba67ef49..d214e0f9cc092e5ddd2ada58eec1e7db9686dde4 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -950,13 +950,10 @@ xlog_recover_buf_commit_pass2( /* * We're skipping replay of this buffer log item due to the log - * item LSN being behind the ondisk buffer. Verify the buffer - * contents since we aren't going to run the write verifier. + * item LSN being behind the ondisk buffer. clear XBF_DONE flag + * of the buffer to prevent buffer from being used without verify. */ - if (bp->b_ops) { - bp->b_ops->verify_read(bp); - error = bp->b_error; - } + bp->b_flags &= ~XBF_DONE; goto out_release; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c0c62f37c396879e647a8a5da63de5e0c77de8cd..da8ab127d16117f22d2b9eb22b8f6636f1ca34d6 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -38,8 +38,8 @@ * otherwise by the lowest id first, see xfs_dqlock2. */ -struct kmem_zone *xfs_qm_dqtrxzone; -static struct kmem_zone *xfs_qm_dqzone; +struct kmem_cache *xfs_dqtrx_cache; +static struct kmem_cache *xfs_dquot_cache; static struct lock_class_key xfs_dquot_group_class; static struct lock_class_key xfs_dquot_project_class; @@ -57,7 +57,7 @@ xfs_qm_dqdestroy( mutex_destroy(&dqp->q_qlock); XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); - kmem_cache_free(xfs_qm_dqzone, dqp); + kmem_cache_free(xfs_dquot_cache, dqp); } /* @@ -458,7 +458,7 @@ xfs_dquot_alloc( { struct xfs_dquot *dqp; - dqp = kmem_cache_zalloc(xfs_qm_dqzone, GFP_KERNEL | __GFP_NOFAIL); + dqp = kmem_cache_zalloc(xfs_dquot_cache, GFP_KERNEL | __GFP_NOFAIL); dqp->q_type = type; dqp->q_id = id; @@ -1365,22 +1365,22 @@ xfs_dqlock2( int __init xfs_qm_init(void) { - xfs_qm_dqzone = kmem_cache_create("xfs_dquot", + xfs_dquot_cache = kmem_cache_create("xfs_dquot", sizeof(struct xfs_dquot), 0, 0, NULL); - if (!xfs_qm_dqzone) + if (!xfs_dquot_cache) goto out; - xfs_qm_dqtrxzone = kmem_cache_create("xfs_dqtrx", + xfs_dqtrx_cache = kmem_cache_create("xfs_dqtrx", sizeof(struct xfs_dquot_acct), 0, 0, NULL); - if (!xfs_qm_dqtrxzone) - goto out_free_dqzone; + if (!xfs_dqtrx_cache) + goto out_free_dquot_cache; return 0; -out_free_dqzone: - kmem_cache_destroy(xfs_qm_dqzone); +out_free_dquot_cache: + kmem_cache_destroy(xfs_dquot_cache); out: return -ENOMEM; } @@ -1388,8 +1388,8 @@ xfs_qm_init(void) void xfs_qm_exit(void) { - kmem_cache_destroy(xfs_qm_dqtrxzone); - kmem_cache_destroy(xfs_qm_dqzone); + kmem_cache_destroy(xfs_dqtrx_cache); + kmem_cache_destroy(xfs_dquot_cache); } /* diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 26680444969c7bd8923b586f7d41a634169c5ece..3b71e1714514b9a19641331f1bb289aca6b221e8 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -578,20 +578,45 @@ xfs_extent_busy_clear( /* * Flush out all busy extents for this AG. + * + * If the current transaction is holding busy extents, the caller may not want + * to wait for committed busy extents to resolve. If we are being told just to + * try a flush or progress has been made since we last skipped a busy extent, + * return immediately to allow the caller to try again. + * + * If we are freeing extents, we might actually be holding the only free extents + * in the transaction busy list and the log force won't resolve that situation. + * In this case, we must return -EAGAIN to avoid a deadlock by informing the + * caller it needs to commit the busy extents it holds before retrying the + * extent free operation. */ -void +int xfs_extent_busy_flush( - struct xfs_mount *mp, + struct xfs_trans *tp, struct xfs_perag *pag, - unsigned busy_gen) + unsigned busy_gen, + uint32_t alloc_flags) { DEFINE_WAIT (wait); int error; - error = xfs_log_force(mp, XFS_LOG_SYNC); + error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); if (error) - return; + return error; + + /* Avoid deadlocks on uncommitted busy extents. */ + if (!list_empty(&tp->t_busy)) { + if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH) + return 0; + + if (busy_gen != READ_ONCE(pag->pagb_gen)) + return 0; + + if (alloc_flags & XFS_ALLOC_FLAG_FREEING) + return -EAGAIN; + } + /* Wait for committed busy extents to resolve. */ do { prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); if (busy_gen != READ_ONCE(pag->pagb_gen)) @@ -600,6 +625,7 @@ xfs_extent_busy_flush( } while (1); finish_wait(&pag->pagb_wait, &wait); + return 0; } void diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 8aea07100092397f4e1f5754c9b806ce5138cb9c..f145d532051955a22c492c98d5fb2a3baf95c4fe 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -50,9 +50,9 @@ bool xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno, xfs_extlen_t *len, unsigned *busy_gen); -void -xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, - unsigned busy_gen); +int +xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag, + unsigned busy_gen, uint32_t alloc_flags); void xfs_extent_busy_wait_all(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index b33276cb525f8d7bee3bac032d6f92f9c7b11a51..24ff79d1a1232d2d03b30978e1fa8bf3668927b9 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -25,8 +25,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_efi_zone; -kmem_zone_t *xfs_efd_zone; +struct kmem_cache *xfs_efi_cache; +struct kmem_cache *xfs_efd_cache; static const struct xfs_item_ops xfs_efi_item_ops; @@ -43,7 +43,7 @@ xfs_efi_item_free( if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) kmem_free(efip); else - kmem_cache_free(xfs_efi_zone, efip); + kmem_cache_free(xfs_efi_cache, efip); } /* @@ -161,7 +161,7 @@ xfs_efi_init( (nextents * sizeof(xfs_extent_t))); efip = kmem_zalloc(size, 0); } else { - efip = kmem_cache_zalloc(xfs_efi_zone, + efip = kmem_cache_zalloc(xfs_efi_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -246,7 +246,7 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp) if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) kmem_free(efdp); else - kmem_cache_free(xfs_efd_zone, efdp); + kmem_cache_free(xfs_efd_cache, efdp); } /* @@ -338,7 +338,7 @@ xfs_trans_get_efd( nextents * sizeof(struct xfs_extent), 0); } else { - efdp = kmem_cache_zalloc(xfs_efd_zone, + efdp = kmem_cache_zalloc(xfs_efd_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -352,6 +352,34 @@ xfs_trans_get_efd( return efdp; } +/* + * Fill the EFD with all extents from the EFI when we need to roll the + * transaction and continue with a new EFI. + * + * This simply copies all the extents in the EFI to the EFD rather than make + * assumptions about which extents in the EFI have already been processed. We + * currently keep the xefi list in the same order as the EFI extent list, but + * that may not always be the case. Copying everything avoids leaving a landmine + * were we fail to cancel all the extents in an EFI if the xefi list is + * processed in a different order to the extents in the EFI. + */ +static void +xfs_efd_from_efi( + struct xfs_efd_log_item *efdp) +{ + struct xfs_efi_log_item *efip = efdp->efd_efip; + uint i; + + ASSERT(efip->efi_format.efi_nextents > 0); + ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents); + + for (i = 0; i < efip->efi_format.efi_nextents; i++) { + efdp->efd_format.efd_extents[i] = + efip->efi_format.efi_extents[i]; + } + efdp->efd_next_extent = efip->efi_format.efi_nextents; +} + /* * Free an extent and log it to the EFD. Note that the transaction is marked * dirty regardless of whether the extent free succeeds or fails to support the @@ -361,23 +389,30 @@ static int xfs_trans_free_extent( struct xfs_trans *tp, struct xfs_efd_log_item *efdp, - xfs_fsblock_t start_block, - xfs_extlen_t ext_len, - const struct xfs_owner_info *oinfo, - bool skip_discard) + struct xfs_extent_free_item *free) { + struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_extent *extp; uint next_extent; - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, + free->xefi_startblock); xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, - start_block); + free->xefi_startblock); int error; - trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); + oinfo.oi_owner = free->xefi_owner; + if (free->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; + if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) + oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; + + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, + free->xefi_blockcount); - error = __xfs_free_extent(tp, start_block, ext_len, - oinfo, XFS_AG_RESV_NONE, skip_discard); + error = __xfs_free_extent(tp, free->xefi_startblock, + free->xefi_blockcount, &oinfo, free->xefi_agresv, + free->xefi_flags & XFS_EFI_SKIP_DISCARD); /* * Mark the transaction dirty, even on error. This ensures the * transaction is aborted, which: @@ -388,11 +423,22 @@ xfs_trans_free_extent( tp->t_flags |= XFS_TRANS_DIRTY; set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); + /* + * If we need a new transaction to make progress, the caller will log a + * new EFI with the current contents. It will also log an EFD to cancel + * the existing EFI, and so we need to copy all the unprocessed extents + * in this EFI to the EFD so this works correctly. + */ + if (error == -EAGAIN) { + xfs_efd_from_efi(efdp); + return error; + } + next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = start_block; - extp->ext_len = ext_len; + extp->ext_start = free->xefi_startblock; + extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; return error; @@ -483,11 +529,17 @@ xfs_extent_free_finish_item( int error; free = container_of(item, struct xfs_extent_free_item, xefi_list); - error = xfs_trans_free_extent(tp, EFD_ITEM(done), - free->xefi_startblock, - free->xefi_blockcount, - &free->xefi_oinfo, free->xefi_skip_discard); - kmem_cache_free(xfs_bmap_free_item_zone, free); + + error = xfs_trans_free_extent(tp, EFD_ITEM(done), free); + + /* + * Don't free the XEFI if we need a new transaction to complete + * processing of it. + */ + if (error == -EAGAIN) + return error; + + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -507,7 +559,7 @@ xfs_extent_free_cancel_item( struct xfs_extent_free_item *free; free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_cache_free(xfs_bmap_free_item_zone, free); + kmem_cache_free(xfs_extfree_item_cache, free); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -530,6 +582,7 @@ xfs_agfl_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { + struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_efd_log_item *efdp = EFD_ITEM(done); struct xfs_extent_free_item *free; @@ -544,13 +597,13 @@ xfs_agfl_free_finish_item( ASSERT(free->xefi_blockcount == 1); agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); + oinfo.oi_owner = free->xefi_owner; trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); if (!error) - error = xfs_free_agfl_block(tp, agno, agbno, agbp, - &free->xefi_oinfo); + error = xfs_free_agfl_block(tp, agno, agbno, agbp, &oinfo); /* * Mark the transaction dirty, even on error. This ensures the @@ -569,7 +622,7 @@ xfs_agfl_free_finish_item( extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; - kmem_cache_free(xfs_bmap_free_item_zone, free); + kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -600,6 +653,7 @@ xfs_efi_item_recover( xfs_fsblock_t startblock_fsb; int i; int error = 0; + bool requeue_only = false; /* * First check the validity of the extents described by the @@ -623,10 +677,35 @@ xfs_efi_item_recover( efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); for (i = 0; i < efip->efi_format.efi_nextents; i++) { + struct xfs_extent_free_item fake = { + .xefi_owner = XFS_RMAP_OWN_UNKNOWN, + .xefi_agresv = XFS_AG_RESV_NONE, + }; + extp = &efip->efi_format.efi_extents[i]; - error = xfs_trans_free_extent(tp, efdp, extp->ext_start, - extp->ext_len, - &XFS_RMAP_OINFO_ANY_OWNER, false); + + fake.xefi_startblock = extp->ext_start; + fake.xefi_blockcount = extp->ext_len; + + if (!requeue_only) + error = xfs_trans_free_extent(tp, efdp, &fake); + + /* + * If we can't free the extent without potentially deadlocking, + * requeue the rest of the extents to a new so that they get + * run again later with a new transaction context. + */ + if (error == -EAGAIN || requeue_only) { + error = xfs_free_extent_later(tp, fake.xefi_startblock, + fake.xefi_blockcount, + &XFS_RMAP_OINFO_ANY_OWNER, + fake.xefi_agresv); + if (!error) { + requeue_only = true; + continue; + } + }; + if (error) goto abort_error; diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index cd2860c875bf50638c0346eac065f530dfa75424..186d0f2137f1f5c68068693dbfd4ccd0c2f70525 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -9,7 +9,7 @@ /* kernel only EFI/EFD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -69,7 +69,7 @@ struct xfs_efd_log_item { */ #define XFS_EFD_MAX_FAST_EXTENTS 16 -extern struct kmem_zone *xfs_efi_zone; -extern struct kmem_zone *xfs_efd_zone; +extern struct kmem_cache *xfs_efi_cache; +extern struct kmem_cache *xfs_efd_cache; #endif /* __XFS_EXTFREE_ITEM_H__ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a6a59bd6c189ec79f6c23635612aca14a9045d1d..9f52365995c7bc5279e94fc5c8331ba95322e346 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -894,7 +894,6 @@ xfs_file_fallocate( struct inode *inode = file_inode(file); struct xfs_inode *ip = XFS_I(inode); long error; - enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; loff_t new_size = 0; bool do_file_insert = false; @@ -992,8 +991,6 @@ xfs_file_fallocate( } do_file_insert = true; } else { - flags |= XFS_PREALLOC_SET; - if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; @@ -1044,11 +1041,6 @@ xfs_file_fallocate( if (error) goto out_unlock; } - - error = xfs_update_prealloc_flags(ip, XFS_PREALLOC_SET); - if (error) - goto out_unlock; - } /* Change file size if needed */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 6847bbb5a735fc0fc4b0a9d668b0620801a36604..bdf55ed88680165e2020e33d41c1d4a64de6fa71 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -87,10 +87,10 @@ xfs_inode_alloc( * XXX: If this didn't occur in transactions, we could drop GFP_NOFAIL * and return NULL here on ENOMEM. */ - ip = kmem_cache_alloc(xfs_inode_zone, GFP_KERNEL | __GFP_NOFAIL); + ip = kmem_cache_alloc(xfs_inode_cache, GFP_KERNEL | __GFP_NOFAIL); if (inode_init_always(mp->m_super, VFS_I(ip))) { - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); return NULL; } @@ -141,7 +141,7 @@ xfs_inode_free_callback( if (ip->i_cowfp) { xfs_idestroy_fork(ip->i_cowfp); - kmem_cache_free(xfs_ifork_zone, ip->i_cowfp); + kmem_cache_free(xfs_ifork_cache, ip->i_cowfp); } if (ip->i_itemp) { ASSERT(!test_bit(XFS_LI_IN_AIL, @@ -150,7 +150,7 @@ xfs_inode_free_callback( ip->i_itemp = NULL; } - kmem_cache_free(xfs_inode_zone, ip); + kmem_cache_free(xfs_inode_cache, ip); } static void diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index aa8c7c261d24559a6086413ed5966b973e596dc4..cd90b4f3149530e7797fff691418c73876c618ff 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -20,7 +20,7 @@ #include "xfs_ialloc.h" #include "xfs_trace.h" -kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +struct kmem_cache *xfs_icreate_cache; /* inode create item */ static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip) { @@ -64,7 +64,7 @@ xfs_icreate_item_release( struct xfs_log_item *lip) { kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow); - kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip)); + kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip)); } static const struct xfs_item_ops xfs_icreate_item_ops = { @@ -98,7 +98,7 @@ xfs_icreate_log( { struct xfs_icreate_item *icp; - icp = kmem_cache_zalloc(xfs_icreate_zone, GFP_KERNEL | __GFP_NOFAIL); + icp = kmem_cache_zalloc(xfs_icreate_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE, &xfs_icreate_item_ops); diff --git a/fs/xfs/xfs_icreate_item.h b/fs/xfs/xfs_icreate_item.h index a50d0b01e15a37ccf25032465125c789706597b1..64992823108ae28286d59515713cb23f3233382f 100644 --- a/fs/xfs/xfs_icreate_item.h +++ b/fs/xfs/xfs_icreate_item.h @@ -12,7 +12,7 @@ struct xfs_icreate_item { struct xfs_icreate_log ic_format; }; -extern kmem_zone_t *xfs_icreate_zone; /* inode create item zone */ +extern struct kmem_cache *xfs_icreate_cache; /* inode create item */ void xfs_icreate_log(struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, unsigned int count, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7b777540a44cd4070890b43e74c7338335bf92ad..268bbc2d978b8cb2d4d052905dfe0d63f7d9ace7 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -37,7 +37,7 @@ #include "xfs_reflink.h" #include "xfs_log_priv.h" -kmem_zone_t *xfs_inode_zone; +struct kmem_cache *xfs_inode_cache; /* * Used in xfs_itruncate_extents(). This is the maximum number of extents diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ad51b5707677590de853aa02e9917aae8a38825e..b552daae323fcdcac75abddcc069285ab46f5cbd 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -546,7 +546,7 @@ static inline void xfs_setup_existing_inode(struct xfs_inode *ip) void xfs_irele(struct xfs_inode *ip); -extern struct kmem_zone *xfs_inode_zone; +extern struct kmem_cache *xfs_inode_cache; /* The default CoW extent size hint. */ #define XFS_DEFAULT_COWEXTSZ_HINT 32 diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fb1d482c720038189dac886417b0c626740395ee..9f496cdcde3bd891e38246bd00e16edbfec27105 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -22,7 +22,7 @@ #include -kmem_zone_t *xfs_ili_zone; /* inode log item zone */ +struct kmem_cache *xfs_ili_cache; /* inode log item */ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip) { @@ -661,7 +661,7 @@ xfs_inode_item_init( struct xfs_inode_log_item *iip; ASSERT(ip->i_itemp == NULL); - iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_zone, + iip = ip->i_itemp = kmem_cache_zalloc(xfs_ili_cache, GFP_KERNEL | __GFP_NOFAIL); iip->ili_inode = ip; @@ -683,7 +683,7 @@ xfs_inode_item_destroy( ip->i_itemp = NULL; kmem_free(iip->ili_item.li_lv_shadow); - kmem_cache_free(xfs_ili_zone, iip); + kmem_cache_free(xfs_ili_cache, iip); } diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 9c829cf5c839f2dea7193b133205fa9e588d2c7d..bbd836a44ff0480328704bf53907c0de4a8a6132 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -48,6 +48,6 @@ extern void xfs_iflush_shutdown_abort(struct xfs_inode *); extern int xfs_inode_item_format_convert(xfs_log_iovec_t *, struct xfs_inode_log_format *); -extern struct kmem_zone *xfs_ili_zone; +extern struct kmem_cache *xfs_ili_cache; #endif /* __XFS_INODE_ITEM_H__ */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 892a9ea714abb7661b679c6664348c731d654f13..ae96ac52c9746742ada218a0e48440729e539703 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1112,64 +1112,280 @@ xfs_buffered_write_iomap_begin( } static int -xfs_buffered_write_iomap_end( +xfs_buffered_write_delalloc_punch( struct inode *inode, loff_t offset, - loff_t length, - ssize_t written, - unsigned flags, - struct iomap *iomap) + loff_t length) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t start_fsb; - xfs_fileoff_t end_fsb; - int error = 0; + return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, + offset + length); +} - if (iomap->type != IOMAP_DELALLOC) - return 0; +/* + * Scan the data range passed to us for dirty page cache folios. If we find a + * dirty folio, punch out the preceeding range and update the offset from which + * the next punch will start from. + * + * We can punch out storage reservations under clean pages because they either + * contain data that has been written back - in which case the delalloc punch + * over that range is a no-op - or they have been read faults in which case they + * contain zeroes and we can remove the delalloc backing range and any new + * writes to those pages will do the normal hole filling operation... + * + * This makes the logic simple: we only need to keep the delalloc extents only + * over the dirty ranges of the page cache. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations. + */ +static int +xfs_iomap_write_delalloc_scan( + struct inode *inode, + loff_t *punch_start_byte, + loff_t start_byte, + loff_t end_byte, + int (*punch)(struct inode *inode, loff_t offset, loff_t length)) +{ + while (start_byte < end_byte) { + struct page *page; + + /* grab locked page */ + page = find_lock_page(inode->i_mapping, + start_byte >> PAGE_SHIFT); + + if (!page) { + start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + + PAGE_SIZE; + continue; + } + + /* if dirty, punch up to offset */ + if (PageDirty(page)) { + if (start_byte > *punch_start_byte) { + int error; + + error = punch(inode, *punch_start_byte, + start_byte - *punch_start_byte); + if (error) { + unlock_page(page); + put_page(page); + return error; + } + } + + /* + * Make sure the next punch start is correctly bound to + * the end of this data range, not the end of the folio. + */ + *punch_start_byte = min_t(loff_t, end_byte, + (page->index + 1) << PAGE_SHIFT); + } + + /* move offset to start of next folio in range */ + start_byte = (page->index + 1) << PAGE_SHIFT; + unlock_page(page); + put_page(page); + } + return 0; +} + +/* + * Punch out all the delalloc blocks in the range given except for those that + * have dirty data still pending in the page cache - those are going to be + * written and so must still retain the delalloc backing for writeback. + * + * As we are scanning the page cache for data, we don't need to reimplement the + * wheel - mapping_seek_hole_data() does exactly what we need to identify the + * start and end of data ranges correctly even for sub-folio block sizes. This + * byte range based iteration is especially convenient because it means we + * don't have to care about variable size folios, nor where the start or end of + * the data range lies within a folio, if they lie within the same folio or even + * if there are multiple discontiguous data ranges within the folio. + * + * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so + * can return data ranges that exist in the cache beyond EOF. e.g. a page fault + * spanning EOF will initialise the post-EOF data to zeroes and mark it up to + * date. A write page fault can then mark it dirty. If we then fail a write() + * beyond EOF into that up to date cached range, we allocate a delalloc block + * beyond EOF and then have to punch it out. Because the range is up to date, + * mapping_seek_hole_data() will return it, and we will skip the punch because + * the folio is dirty. THis is incorrect - we always need to punch out delalloc + * beyond EOF in this case as writeback will never write back and covert that + * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, + * resulting in always punching out the range from the EOF to the end of the + * range the iomap spans. + * + * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it + * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA + * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) + * returns the end of the data range (data_end). Using closed intervals would + * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose + * the code to subtle off-by-one bugs.... + */ +static int +xfs_iomap_write_delalloc_release( + struct inode *inode, + loff_t start_byte, + loff_t end_byte, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + struct xfs_inode *ip = XFS_I(inode); + loff_t punch_start_byte = start_byte; + loff_t scan_end_byte = min(i_size_read(inode), end_byte); + int error = 0; /* - * Behave as if the write failed if drop writes is enabled. Set the NEW - * flag to force delalloc cleanup. + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite whilst we walk the + * cache and perform delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. */ - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { - iomap->flags |= IOMAP_F_NEW; - written = 0; + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + while (start_byte < scan_end_byte) { + loff_t data_end; + + start_byte = mapping_seek_hole_data(inode->i_mapping, + start_byte, scan_end_byte, SEEK_DATA); + /* + * If there is no more data to scan, all that is left is to + * punch out the remaining range. + */ + if (start_byte == -ENXIO || start_byte == scan_end_byte) + break; + if (start_byte < 0) { + error = start_byte; + goto out_unlock; + } + WARN_ON_ONCE(start_byte < punch_start_byte); + WARN_ON_ONCE(start_byte > scan_end_byte); + + /* + * We find the end of this contiguous cached data range by + * seeking from start_byte to the beginning of the next hole. + */ + data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, + scan_end_byte, SEEK_HOLE); + if (data_end < 0) { + error = data_end; + goto out_unlock; + } + WARN_ON_ONCE(data_end <= start_byte); + WARN_ON_ONCE(data_end > scan_end_byte); + + error = xfs_iomap_write_delalloc_scan(inode, &punch_start_byte, + start_byte, data_end, punch); + if (error) + goto out_unlock; + + /* The next data search starts at the end of this one. */ + start_byte = data_end; } + if (punch_start_byte < end_byte) + error = punch(inode, punch_start_byte, + end_byte - punch_start_byte); +out_unlock: + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + return error; +} + +/* + * When a short write occurs, the filesystem may need to remove reserved space + * that was allocated in ->iomap_begin from it's ->iomap_end method. For + * filesystems that use delayed allocation, we need to punch out delalloc + * extents from the range that are not dirty in the page cache. As the write can + * race with page faults, there can be dirty pages over the delalloc extent + * outside the range of a short write but still within the delalloc extent + * allocated for this iomap. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations. + * + * The punch() callback *must* only punch delalloc extents in the range passed + * to it. It must skip over all other types of extents in the range and leave + * them completely unchanged. It must do this punch atomically with respect to + * other extent modifications. + * + * The punch() callback may be called with a folio locked to prevent writeback + * extent allocation racing at the edge of the range we are currently punching. + * The locked folio may or may not cover the range being punched, so it is not + * safe for the punch() callback to lock folios itself. + * + * Lock order is: + * + * inode->i_rwsem (shared or exclusive) + * inode->i_mapping->invalidate_lock (exclusive) + * folio_lock() + * ->punch + * internal filesystem allocation lock + */ +static int +xfs_iomap_file_buffered_write_punch_delalloc( + struct inode *inode, + struct iomap *iomap, + loff_t pos, + loff_t length, + ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t start_byte; + loff_t end_byte; + int blocksize = i_blocksize(inode); + + if (iomap->type != IOMAP_DELALLOC) + return 0; + + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (!(iomap->flags & IOMAP_F_NEW)) + return 0; + /* - * start_fsb refers to the first unused block after a short write. If + * start_byte refers to the first unused block after a short write. If * nothing was written, round offset down to point at the first block in * the range. */ if (unlikely(!written)) - start_fsb = XFS_B_TO_FSBT(mp, offset); + start_byte = round_down(pos, blocksize); else - start_fsb = XFS_B_TO_FSB(mp, offset + written); - end_fsb = XFS_B_TO_FSB(mp, offset + length); + start_byte = round_up(pos + written, blocksize); + end_byte = round_up(pos + length, blocksize); + + /* Nothing to do if we've written the entire delalloc extent */ + if (start_byte >= end_byte) + return 0; + + return xfs_iomap_write_delalloc_release(inode, start_byte, end_byte, + punch); +} + +static int +xfs_buffered_write_iomap_end( + struct inode *inode, + loff_t offset, + loff_t length, + ssize_t written, + unsigned flags, + struct iomap *iomap) +{ + struct xfs_mount *mp = XFS_M(inode->i_sb); + int error; /* - * Trim delalloc blocks if they were allocated by this write and we - * didn't manage to write the whole range. - * - * We don't need to care about racing delalloc as we hold i_mutex - * across the reserve/allocate/unreserve calls. If there are delalloc - * blocks in the range, they are ours. + * Behave as if the write failed if drop writes is enabled. Set the NEW + * flag to force delalloc cleanup. */ - if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { - truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), - XFS_FSB_TO_B(mp, end_fsb) - 1); - - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); - if (error && !xfs_is_shutdown(mp)) { - xfs_alert(mp, "%s: unable to clean up ino %lld", - __func__, ip->i_ino); - return error; - } + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { + iomap->flags |= IOMAP_F_NEW; + written = 0; } + error = xfs_iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, + length, written, &xfs_buffered_write_delalloc_punch); + if (error && !xfs_is_shutdown(mp)) { + xfs_alert(mp, "%s: unable to clean up ino 0x%llx", + __func__, XFS_I(inode)->i_ino); + return error; + } return 0; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index cc478df1499626a37bf9a246dd84ccd4e5a88a88..a527a544a684170dd3ce57c1ddf09ddb0b0dfcc5 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -573,37 +573,6 @@ xfs_vn_getattr( return 0; } -static void -xfs_setattr_mode( - struct xfs_inode *ip, - struct iattr *iattr) -{ - struct inode *inode = VFS_I(ip); - umode_t mode = iattr->ia_mode; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; -} - -void -xfs_setattr_time( - struct xfs_inode *ip, - struct iattr *iattr) -{ - struct inode *inode = VFS_I(ip); - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (iattr->ia_valid & ATTR_ATIME) - inode->i_atime = iattr->ia_atime; - if (iattr->ia_valid & ATTR_CTIME) - inode->i_ctime = iattr->ia_ctime; - if (iattr->ia_valid & ATTR_MTIME) - inode->i_mtime = iattr->ia_mtime; -} - static int xfs_vn_change_ok( struct dentry *dentry, @@ -701,16 +670,6 @@ xfs_setattr_nonsize( gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((inode->i_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - inode->i_mode &= ~(S_ISUID|S_ISGID); - /* * Change the ownerships and register quota modifications * in the transaction. @@ -722,7 +681,6 @@ xfs_setattr_nonsize( olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } - inode->i_uid = uid; } if (!gid_eq(igid, gid)) { if (XFS_IS_GQUOTA_ON(mp)) { @@ -733,15 +691,10 @@ xfs_setattr_nonsize( olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - inode->i_gid = gid; } } - if (mask & ATTR_MODE) - xfs_setattr_mode(ip, iattr); - if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) - xfs_setattr_time(ip, iattr); - + setattr_copy(inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); @@ -981,11 +934,8 @@ xfs_setattr_size( xfs_inode_clear_eofblocks_tag(ip); } - if (iattr->ia_valid & ATTR_MODE) - xfs_setattr_mode(ip, iattr); - if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) - xfs_setattr_time(ip, iattr); - + ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); + setattr_copy(inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index fb3604b682080d940f0e4876f776e29ee4a7ff4c..42aab2a39a956504054fec1bffe58e1255cb6b6e 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -18,7 +18,6 @@ extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); */ #define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */ -extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr); extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags); extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 7220945cf816ed696b8b9224b59aeff3a975243c..9a363f787ba423e0d647cb4adbae74e54bbe76a3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -21,7 +21,7 @@ #include "xfs_sb.h" #include "xfs_health.h" -kmem_zone_t *xfs_log_ticket_zone; +struct kmem_cache *xfs_log_ticket_cache; /* Local miscellaneous function prototypes */ STATIC struct xlog * @@ -613,7 +613,6 @@ xfs_log_mount( int num_bblks) { struct xlog *log; - bool fatal = xfs_has_crc(mp); int error = 0; int min_logfsbs; @@ -635,53 +634,37 @@ xfs_log_mount( mp->m_log = log; /* - * Validate the given log space and drop a critical message via syslog - * if the log size is too small that would lead to some unexpected - * situations in transaction log space reservation stage. + * Now that we have set up the log and it's internal geometry + * parameters, we can validate the given log space and drop a critical + * message via syslog if the log size is too small. A log that is too + * small can lead to unexpected situations in transaction log space + * reservation stage. The superblock verifier has already validated all + * the other log geometry constraints, so we don't have to check those + * here. * - * Note: we can't just reject the mount if the validation fails. This - * would mean that people would have to downgrade their kernel just to - * remedy the situation as there is no way to grow the log (short of - * black magic surgery with xfs_db). + * Note: For v4 filesystems, we can't just reject the mount if the + * validation fails. This would mean that people would have to + * downgrade their kernel just to remedy the situation as there is no + * way to grow the log (short of black magic surgery with xfs_db). * - * We can, however, reject mounts for CRC format filesystems, as the + * We can, however, reject mounts for V5 format filesystems, as the * mkfs binary being used to make the filesystem should never create a * filesystem with a log that is too small. */ min_logfsbs = xfs_log_calc_minimum_size(mp); - if (mp->m_sb.sb_logblocks < min_logfsbs) { xfs_warn(mp, "Log size %d blocks too small, minimum size is %d blocks", mp->m_sb.sb_logblocks, min_logfsbs); - error = -EINVAL; - } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) { - xfs_warn(mp, - "Log size %d blocks too large, maximum size is %lld blocks", - mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS); - error = -EINVAL; - } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) { - xfs_warn(mp, - "log size %lld bytes too large, maximum size is %lld bytes", - XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), - XFS_MAX_LOG_BYTES); - error = -EINVAL; - } else if (mp->m_sb.sb_logsunit > 1 && - mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) { - xfs_warn(mp, - "log stripe unit %u bytes must be a multiple of block size", - mp->m_sb.sb_logsunit); - error = -EINVAL; - fatal = true; - } - if (error) { + /* * Log check errors are always fatal on v5; or whenever bad * metadata leads to a crash. */ - if (fatal) { + if (xfs_has_crc(mp)) { xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); ASSERT(0); + error = -EINVAL; goto out_free_log; } xfs_crit(mp, "Log size out of supported range."); @@ -2701,19 +2684,22 @@ xlog_get_lowest_lsn( static void xlog_state_set_callback( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t header_lsn) + struct xlog_in_core *iclog) { + struct xfs_cil_ctx *ctx; + trace_xlog_iclog_callback(iclog, _RET_IP_); iclog->ic_state = XLOG_STATE_CALLBACK; - ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), - header_lsn) <= 0); - - if (list_empty_careful(&iclog->ic_callbacks)) + ctx = list_first_entry_or_null(&iclog->ic_callbacks, + struct xfs_cil_ctx, iclog_entry); + if (!ctx) return; - atomic64_set(&log->l_last_sync_lsn, header_lsn); + ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), + ctx->start_lsn) <= 0); + + atomic64_set(&log->l_last_sync_lsn, ctx->start_lsn); xlog_grant_push_ail(log, 0); } @@ -2748,7 +2734,7 @@ xlog_state_iodone_process_iclog( lowest_lsn = xlog_get_lowest_lsn(log); if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0) return false; - xlog_state_set_callback(log, iclog, header_lsn); + xlog_state_set_callback(log, iclog); return false; default: /* @@ -3397,7 +3383,7 @@ xfs_log_ticket_put( { ASSERT(atomic_read(&ticket->t_ref) > 0); if (atomic_dec_and_test(&ticket->t_ref)) - kmem_cache_free(xfs_log_ticket_zone, ticket); + kmem_cache_free(xfs_log_ticket_cache, ticket); } xlog_ticket_t * @@ -3521,7 +3507,7 @@ xlog_ticket_alloc( struct xlog_ticket *tic; int unit_res; - tic = kmem_cache_zalloc(xfs_log_ticket_zone, GFP_NOFS | __GFP_NOFAIL); + tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL); unit_res = xlog_calc_unit_res(log, unit_bytes); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ecb9ec8a4d055f4468e64bad0cd51c53709c2000..ee004a779d3688f1c0292da80415ff0be1585a23 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -502,7 +502,7 @@ xlog_recover_cancel(struct xlog *); extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, char *dp, int size); -extern kmem_zone_t *xfs_log_ticket_zone; +extern struct kmem_cache *xfs_log_ticket_cache; struct xlog_ticket * xlog_ticket_alloc( struct xlog *log, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 1ce9700ea68c0e4e1d50b6486aa974eb5cc97f8d..331f568834a1b58245458c7b246110ea4602e24c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3193,8 +3193,12 @@ xlog_do_recovery_pass( * Submit buffers that have been added from the last record processed, * regardless of error status. */ - if (!list_empty(&buffer_list)) + if (!list_empty(&buffer_list)) { + if (error) + xfs_force_shutdown(log->l_mp, SHUTDOWN_META_IO_ERROR); + error2 = xfs_buf_delwri_submit(&buffer_list); + } if (error && first_bad) *first_bad = rhead_blk; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 8346441985b0e54ecf28baf20c605461e7e522ac..04b347fe1b59afdb57d1e124e6de15ad88e695cc 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -128,7 +128,6 @@ __xfs_free_perag( struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); - ASSERT(atomic_read(&pag->pag_ref) == 0); kmem_free(pag); } @@ -147,7 +146,7 @@ xfs_free_perag( pag = radix_tree_delete(&mp->m_perag_tree, agno); spin_unlock(&mp->m_perag_lock); ASSERT(pag); - ASSERT(atomic_read(&pag->pag_ref) == 0); + XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); cancel_delayed_work_sync(&pag->pag_blockgc_work); xfs_iunlink_destroy(pag); xfs_buf_hash_destroy(pag); @@ -779,6 +778,15 @@ xfs_mountfs( xfs_rmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); + /* + * We now need m_ag_maxlevels/m_rmap_maxlevels to initialize + * m_alloc_set_aside/m_ag_max_usable. And when we first do the + * init in xfs_sb_mount_common, m_alloc_set_aside/m_ag_max_usable + * still equals to 0. Redo it now. + */ + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); + /* * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks * is NOT aligned turn off m_dalign since allocator alignment is within diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 34c3b16f834f94584c27105bf4eb94b1782226dd..f85e3b07ab4408897ddbcbad3935e06a8b8f1b78 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -219,7 +219,7 @@ _xfs_mru_cache_list_insert( * When destroying or reaping, all the elements that were migrated to the reap * list need to be deleted. For each element this involves removing it from the * data store, removing it from the reap list, calling the client's free - * function and deleting the element from the element zone. + * function and deleting the element from the element cache. * * We get called holding the mru->lock, which we drop and then reacquire. * Sparse need special help with this to tell it we know what we are doing. diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 2876b1808e338d33a0b01cae052dde14d6a0b7ee..21839e35e0984c0a30d16d1aca4f41e5ec94f4e4 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -287,7 +287,8 @@ xfs_fs_commit_blocks( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_setattr_time(ip, iattr); + ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); + setattr_copy(inode, iattr); if (update_isize) { i_size_write(inode, iattr->ia_size); ip->i_d.di_size = iattr->ia_size; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 442a0f97a9d4390bb69a632bd585b950ec2b744e..5bb12717ea28c109498cb3466c65f26e1b62e8ce 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -11,7 +11,7 @@ struct xfs_inode; -extern struct kmem_zone *xfs_qm_dqtrxzone; +extern struct kmem_cache *xfs_dqtrx_cache; /* * Number of bmaps that we ask from bmapi when doing a quotacheck. diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 93535c110ca1e2c243da0088a1372a304cbf59af..1d3036fcfbf4f8a4d4cab7ead7bb7a43e72baea8 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_cui_zone; -kmem_zone_t *xfs_cud_zone; +struct kmem_cache *xfs_cui_cache; +struct kmem_cache *xfs_cud_cache; static const struct xfs_item_ops xfs_cui_item_ops; @@ -39,7 +39,7 @@ xfs_cui_item_free( if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) kmem_free(cuip); else - kmem_cache_free(xfs_cui_zone, cuip); + kmem_cache_free(xfs_cui_cache, cuip); } /* @@ -144,7 +144,7 @@ xfs_cui_init( cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents), 0); else - cuip = kmem_cache_zalloc(xfs_cui_zone, + cuip = kmem_cache_zalloc(xfs_cui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops); @@ -206,7 +206,7 @@ xfs_cud_item_release( xfs_cui_release(cudp->cud_cuip); kmem_free(cudp->cud_item.li_lv_shadow); - kmem_cache_free(xfs_cud_zone, cudp); + kmem_cache_free(xfs_cud_cache, cudp); } static const struct xfs_item_ops xfs_cud_item_ops = { @@ -223,7 +223,7 @@ xfs_trans_get_cud( { struct xfs_cud_log_item *cudp; - cudp = kmem_cache_zalloc(xfs_cud_zone, GFP_KERNEL | __GFP_NOFAIL); + cudp = kmem_cache_zalloc(xfs_cud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); cudp->cud_cuip = cuip; @@ -386,7 +386,7 @@ xfs_refcount_update_finish_item( refc->ri_blockcount = new_aglen; return -EAGAIN; } - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); return error; } @@ -406,7 +406,7 @@ xfs_refcount_update_cancel_item( struct xfs_refcount_intent *refc; refc = container_of(item, struct xfs_refcount_intent, ri_list); - kmem_free(refc); + kmem_cache_free(xfs_refcount_intent_cache, refc); } const struct xfs_defer_op_type xfs_refcount_update_defer_type = { diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index f4f2e836540bb784ffd1d8296fff41a7e5a9a982..eb0ab13682d0b4702f5c7bd3696087709b79ed45 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -25,7 +25,7 @@ /* kernel only CUI/CUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_cud_log_item { struct xfs_cud_log_format cud_format; }; -extern struct kmem_zone *xfs_cui_zone; -extern struct kmem_zone *xfs_cud_zone; +extern struct kmem_cache *xfs_cui_cache; +extern struct kmem_cache *xfs_cud_cache; #endif /* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 19b1285f6ae9cbe03cf5c79bc97b27b89e3df90d..551cc92b049dc2008966b5dc70ed17e4c1bcef68 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -484,8 +484,11 @@ xfs_reflink_cancel_cow_blocks( xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); - xfs_bmap_add_free(*tpp, del.br_startblock, - del.br_blockcount, NULL); + error = xfs_free_extent_later(*tpp, del.br_startblock, + del.br_blockcount, NULL, + XFS_AG_RESV_NONE); + if (error) + break; /* Roll the transaction */ error = xfs_defer_finish(tpp); diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 46ada19c3c260d675ebf10307cbea50193a12987..aafbb6589ebca8c7b51c4f7734514bfe91ab4390 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -21,8 +21,8 @@ #include "xfs_log_priv.h" #include "xfs_log_recover.h" -kmem_zone_t *xfs_rui_zone; -kmem_zone_t *xfs_rud_zone; +struct kmem_cache *xfs_rui_cache; +struct kmem_cache *xfs_rud_cache; static const struct xfs_item_ops xfs_rui_item_ops; @@ -39,7 +39,7 @@ xfs_rui_item_free( if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) kmem_free(ruip); else - kmem_cache_free(xfs_rui_zone, ruip); + kmem_cache_free(xfs_rui_cache, ruip); } /* @@ -142,7 +142,7 @@ xfs_rui_init( if (nextents > XFS_RUI_MAX_FAST_EXTENTS) ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), 0); else - ruip = kmem_cache_zalloc(xfs_rui_zone, + ruip = kmem_cache_zalloc(xfs_rui_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); @@ -204,7 +204,7 @@ xfs_rud_item_release( xfs_rui_release(rudp->rud_ruip); kmem_free(rudp->rud_item.li_lv_shadow); - kmem_cache_free(xfs_rud_zone, rudp); + kmem_cache_free(xfs_rud_cache, rudp); } static const struct xfs_item_ops xfs_rud_item_ops = { @@ -221,7 +221,7 @@ xfs_trans_get_rud( { struct xfs_rud_log_item *rudp; - rudp = kmem_cache_zalloc(xfs_rud_zone, GFP_KERNEL | __GFP_NOFAIL); + rudp = kmem_cache_zalloc(xfs_rud_cache, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); rudp->rud_ruip = ruip; @@ -404,7 +404,7 @@ xfs_rmap_update_finish_item( rmap->ri_bmap.br_startoff, rmap->ri_bmap.br_startblock, rmap->ri_bmap.br_blockcount, rmap->ri_bmap.br_state, state); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); return error; } @@ -424,7 +424,7 @@ xfs_rmap_update_cancel_item( struct xfs_rmap_intent *rmap; rmap = container_of(item, struct xfs_rmap_intent, ri_list); - kmem_free(rmap); + kmem_cache_free(xfs_rmap_intent_cache, rmap); } const struct xfs_defer_op_type xfs_rmap_update_defer_type = { diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h index 31e6cdfff71f49effbc9cc0c8e69c986c9507e91..802e5119eacaab640dc4b4d25e955cc02aadd695 100644 --- a/fs/xfs/xfs_rmap_item.h +++ b/fs/xfs/xfs_rmap_item.h @@ -28,7 +28,7 @@ /* kernel only RUI/RUD definitions */ struct xfs_mount; -struct kmem_zone; +struct kmem_cache; /* * Max number of extents in fast allocation path. @@ -68,7 +68,7 @@ struct xfs_rud_log_item { struct xfs_rud_log_format rud_format; }; -extern struct kmem_zone *xfs_rui_zone; -extern struct kmem_zone *xfs_rud_zone; +extern struct kmem_cache *xfs_rui_cache; +extern struct kmem_cache *xfs_rud_cache; #endif /* __XFS_RMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e83af9027131a8e231b0311ef63a8cdfff731bbb..502fb08bfd3812fafea2775a24be40b24da83872 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -36,6 +36,7 @@ #include "xfs_bmap_item.h" #include "xfs_reflink.h" #include "xfs_pwork.h" +#include "xfs_defer.h" #include #include @@ -1989,196 +1990,196 @@ static struct file_system_type xfs_fs_type = { MODULE_ALIAS_FS("xfs"); STATIC int __init -xfs_init_zones(void) +xfs_init_caches(void) { - xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket", + int error; + + xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", sizeof(struct xlog_ticket), 0, 0, NULL); - if (!xfs_log_ticket_zone) + if (!xfs_log_ticket_cache) goto out; - xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item", - sizeof(struct xfs_extent_free_item), - 0, 0, NULL); - if (!xfs_bmap_free_item_zone) - goto out_destroy_log_ticket_zone; - - xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur", + xfs_btree_cur_cache = kmem_cache_create("xfs_btree_cur", sizeof(struct xfs_btree_cur), 0, 0, NULL); - if (!xfs_btree_cur_zone) - goto out_destroy_bmap_free_item_zone; + if (!xfs_btree_cur_cache) + goto out_destroy_log_ticket_cache; + + error = xfs_defer_init_item_caches(); + if (error) + goto out_destroy_btree_cur_cache; - xfs_da_state_zone = kmem_cache_create("xfs_da_state", + xfs_da_state_cache = kmem_cache_create("xfs_da_state", sizeof(struct xfs_da_state), 0, 0, NULL); - if (!xfs_da_state_zone) - goto out_destroy_btree_cur_zone; + if (!xfs_da_state_cache) + goto out_destroy_defer_item_cache; - xfs_ifork_zone = kmem_cache_create("xfs_ifork", + xfs_ifork_cache = kmem_cache_create("xfs_ifork", sizeof(struct xfs_ifork), 0, 0, NULL); - if (!xfs_ifork_zone) - goto out_destroy_da_state_zone; + if (!xfs_ifork_cache) + goto out_destroy_da_state_cache; - xfs_trans_zone = kmem_cache_create("xfs_trans", + xfs_trans_cache = kmem_cache_create("xfs_trans", sizeof(struct xfs_trans), 0, 0, NULL); - if (!xfs_trans_zone) - goto out_destroy_ifork_zone; + if (!xfs_trans_cache) + goto out_destroy_ifork_cache; /* - * The size of the zone allocated buf log item is the maximum + * The size of the cache-allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ - xfs_buf_item_zone = kmem_cache_create("xfs_buf_item", + xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", sizeof(struct xfs_buf_log_item), 0, 0, NULL); - if (!xfs_buf_item_zone) - goto out_destroy_trans_zone; + if (!xfs_buf_item_cache) + goto out_destroy_trans_cache; - xfs_efd_zone = kmem_cache_create("xfs_efd_item", + xfs_efd_cache = kmem_cache_create("xfs_efd_item", (sizeof(struct xfs_efd_log_item) + XFS_EFD_MAX_FAST_EXTENTS * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efd_zone) - goto out_destroy_buf_item_zone; + if (!xfs_efd_cache) + goto out_destroy_buf_item_cache; - xfs_efi_zone = kmem_cache_create("xfs_efi_item", + xfs_efi_cache = kmem_cache_create("xfs_efi_item", (sizeof(struct xfs_efi_log_item) + XFS_EFI_MAX_FAST_EXTENTS * sizeof(struct xfs_extent)), 0, 0, NULL); - if (!xfs_efi_zone) - goto out_destroy_efd_zone; + if (!xfs_efi_cache) + goto out_destroy_efd_cache; - xfs_inode_zone = kmem_cache_create("xfs_inode", + xfs_inode_cache = kmem_cache_create("xfs_inode", sizeof(struct xfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), xfs_fs_inode_init_once); - if (!xfs_inode_zone) - goto out_destroy_efi_zone; + if (!xfs_inode_cache) + goto out_destroy_efi_cache; - xfs_ili_zone = kmem_cache_create("xfs_ili", + xfs_ili_cache = kmem_cache_create("xfs_ili", sizeof(struct xfs_inode_log_item), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); - if (!xfs_ili_zone) - goto out_destroy_inode_zone; + if (!xfs_ili_cache) + goto out_destroy_inode_cache; - xfs_icreate_zone = kmem_cache_create("xfs_icr", + xfs_icreate_cache = kmem_cache_create("xfs_icr", sizeof(struct xfs_icreate_item), 0, 0, NULL); - if (!xfs_icreate_zone) - goto out_destroy_ili_zone; + if (!xfs_icreate_cache) + goto out_destroy_ili_cache; - xfs_rud_zone = kmem_cache_create("xfs_rud_item", + xfs_rud_cache = kmem_cache_create("xfs_rud_item", sizeof(struct xfs_rud_log_item), 0, 0, NULL); - if (!xfs_rud_zone) - goto out_destroy_icreate_zone; + if (!xfs_rud_cache) + goto out_destroy_icreate_cache; - xfs_rui_zone = kmem_cache_create("xfs_rui_item", + xfs_rui_cache = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_rui_zone) - goto out_destroy_rud_zone; + if (!xfs_rui_cache) + goto out_destroy_rud_cache; - xfs_cud_zone = kmem_cache_create("xfs_cud_item", + xfs_cud_cache = kmem_cache_create("xfs_cud_item", sizeof(struct xfs_cud_log_item), 0, 0, NULL); - if (!xfs_cud_zone) - goto out_destroy_rui_zone; + if (!xfs_cud_cache) + goto out_destroy_rui_cache; - xfs_cui_zone = kmem_cache_create("xfs_cui_item", + xfs_cui_cache = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_cui_zone) - goto out_destroy_cud_zone; + if (!xfs_cui_cache) + goto out_destroy_cud_cache; - xfs_bud_zone = kmem_cache_create("xfs_bud_item", + xfs_bud_cache = kmem_cache_create("xfs_bud_item", sizeof(struct xfs_bud_log_item), 0, 0, NULL); - if (!xfs_bud_zone) - goto out_destroy_cui_zone; + if (!xfs_bud_cache) + goto out_destroy_cui_cache; - xfs_bui_zone = kmem_cache_create("xfs_bui_item", + xfs_bui_cache = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 0, 0, NULL); - if (!xfs_bui_zone) - goto out_destroy_bud_zone; + if (!xfs_bui_cache) + goto out_destroy_bud_cache; return 0; - out_destroy_bud_zone: - kmem_cache_destroy(xfs_bud_zone); - out_destroy_cui_zone: - kmem_cache_destroy(xfs_cui_zone); - out_destroy_cud_zone: - kmem_cache_destroy(xfs_cud_zone); - out_destroy_rui_zone: - kmem_cache_destroy(xfs_rui_zone); - out_destroy_rud_zone: - kmem_cache_destroy(xfs_rud_zone); - out_destroy_icreate_zone: - kmem_cache_destroy(xfs_icreate_zone); - out_destroy_ili_zone: - kmem_cache_destroy(xfs_ili_zone); - out_destroy_inode_zone: - kmem_cache_destroy(xfs_inode_zone); - out_destroy_efi_zone: - kmem_cache_destroy(xfs_efi_zone); - out_destroy_efd_zone: - kmem_cache_destroy(xfs_efd_zone); - out_destroy_buf_item_zone: - kmem_cache_destroy(xfs_buf_item_zone); - out_destroy_trans_zone: - kmem_cache_destroy(xfs_trans_zone); - out_destroy_ifork_zone: - kmem_cache_destroy(xfs_ifork_zone); - out_destroy_da_state_zone: - kmem_cache_destroy(xfs_da_state_zone); - out_destroy_btree_cur_zone: - kmem_cache_destroy(xfs_btree_cur_zone); - out_destroy_bmap_free_item_zone: - kmem_cache_destroy(xfs_bmap_free_item_zone); - out_destroy_log_ticket_zone: - kmem_cache_destroy(xfs_log_ticket_zone); + out_destroy_bud_cache: + kmem_cache_destroy(xfs_bud_cache); + out_destroy_cui_cache: + kmem_cache_destroy(xfs_cui_cache); + out_destroy_cud_cache: + kmem_cache_destroy(xfs_cud_cache); + out_destroy_rui_cache: + kmem_cache_destroy(xfs_rui_cache); + out_destroy_rud_cache: + kmem_cache_destroy(xfs_rud_cache); + out_destroy_icreate_cache: + kmem_cache_destroy(xfs_icreate_cache); + out_destroy_ili_cache: + kmem_cache_destroy(xfs_ili_cache); + out_destroy_inode_cache: + kmem_cache_destroy(xfs_inode_cache); + out_destroy_efi_cache: + kmem_cache_destroy(xfs_efi_cache); + out_destroy_efd_cache: + kmem_cache_destroy(xfs_efd_cache); + out_destroy_buf_item_cache: + kmem_cache_destroy(xfs_buf_item_cache); + out_destroy_trans_cache: + kmem_cache_destroy(xfs_trans_cache); + out_destroy_ifork_cache: + kmem_cache_destroy(xfs_ifork_cache); + out_destroy_da_state_cache: + kmem_cache_destroy(xfs_da_state_cache); + out_destroy_defer_item_cache: + xfs_defer_destroy_item_caches(); + out_destroy_btree_cur_cache: + kmem_cache_destroy(xfs_btree_cur_cache); + out_destroy_log_ticket_cache: + kmem_cache_destroy(xfs_log_ticket_cache); out: return -ENOMEM; } STATIC void -xfs_destroy_zones(void) +xfs_destroy_caches(void) { /* * Make sure all delayed rcu free are flushed before we * destroy caches. */ rcu_barrier(); - kmem_cache_destroy(xfs_bui_zone); - kmem_cache_destroy(xfs_bud_zone); - kmem_cache_destroy(xfs_cui_zone); - kmem_cache_destroy(xfs_cud_zone); - kmem_cache_destroy(xfs_rui_zone); - kmem_cache_destroy(xfs_rud_zone); - kmem_cache_destroy(xfs_icreate_zone); - kmem_cache_destroy(xfs_ili_zone); - kmem_cache_destroy(xfs_inode_zone); - kmem_cache_destroy(xfs_efi_zone); - kmem_cache_destroy(xfs_efd_zone); - kmem_cache_destroy(xfs_buf_item_zone); - kmem_cache_destroy(xfs_trans_zone); - kmem_cache_destroy(xfs_ifork_zone); - kmem_cache_destroy(xfs_da_state_zone); - kmem_cache_destroy(xfs_btree_cur_zone); - kmem_cache_destroy(xfs_bmap_free_item_zone); - kmem_cache_destroy(xfs_log_ticket_zone); + kmem_cache_destroy(xfs_bui_cache); + kmem_cache_destroy(xfs_bud_cache); + kmem_cache_destroy(xfs_cui_cache); + kmem_cache_destroy(xfs_cud_cache); + kmem_cache_destroy(xfs_rui_cache); + kmem_cache_destroy(xfs_rud_cache); + kmem_cache_destroy(xfs_icreate_cache); + kmem_cache_destroy(xfs_ili_cache); + kmem_cache_destroy(xfs_inode_cache); + kmem_cache_destroy(xfs_efi_cache); + kmem_cache_destroy(xfs_efd_cache); + kmem_cache_destroy(xfs_buf_item_cache); + kmem_cache_destroy(xfs_trans_cache); + kmem_cache_destroy(xfs_ifork_cache); + kmem_cache_destroy(xfs_da_state_cache); + kmem_cache_destroy(xfs_btree_cur_cache); + xfs_defer_destroy_item_caches(); + kmem_cache_destroy(xfs_log_ticket_cache); } STATIC int __init @@ -2271,13 +2272,13 @@ init_xfs_fs(void) if (error) goto out; - error = xfs_init_zones(); + error = xfs_init_caches(); if (error) goto out_destroy_hp; error = xfs_init_workqueues(); if (error) - goto out_destroy_zones; + goto out_destroy_caches; error = xfs_mru_cache_init(); if (error) @@ -2352,8 +2353,8 @@ init_xfs_fs(void) xfs_mru_cache_uninit(); out_destroy_wq: xfs_destroy_workqueues(); - out_destroy_zones: - xfs_destroy_zones(); + out_destroy_caches: + xfs_destroy_caches(); out_destroy_hp: xfs_cpu_hotplug_destroy(); out: @@ -2376,7 +2377,7 @@ exit_xfs_fs(void) xfs_buf_terminate(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); - xfs_destroy_zones(); + xfs_destroy_caches(); xfs_uuid_table_free(); xfs_cpu_hotplug_destroy(); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 8b6617833c5887cb39a33d079f229e7cee9de357..609ddada6fd78c26fc04d625eb984be9173c6e8a 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -24,8 +24,9 @@ #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_icache.h" +#include "xfs_buf_item.h" -kmem_zone_t *xfs_trans_zone; +struct kmem_cache *xfs_trans_cache; #if defined(CONFIG_TRACEPOINTS) static void @@ -76,7 +77,7 @@ xfs_trans_free( if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); - kmem_cache_free(xfs_trans_zone, tp); + kmem_cache_free(xfs_trans_cache, tp); } /* @@ -95,7 +96,7 @@ xfs_trans_dup( trace_xfs_trans_dup(tp, _RET_IP_); - ntp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); /* * Initialize the new transaction structure. @@ -263,7 +264,7 @@ xfs_trans_alloc( * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ retry: - tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); + tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); xfs_trans_set_context(tp); @@ -478,11 +479,14 @@ STATIC void xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { - xfs_dsb_t *sbp; - xfs_buf_t *bp; - int whole = 0; + xfs_dsb_t *sbp; + xfs_buf_t *bp; + struct xfs_buf_log_item *bip; + int whole = 0; + int grow = 0; bp = xfs_trans_getsb(tp); + bip = bp->b_log_item; sbp = bp->b_addr; /* @@ -507,10 +511,12 @@ xfs_trans_apply_sb_deltas( if (tp->t_dblocks_delta) { be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); whole = 1; + grow = 1; } if (tp->t_agcount_delta) { be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta); whole = 1; + grow = 1; } if (tp->t_imaxpct_delta) { sbp->sb_imax_pct += tp->t_imaxpct_delta; @@ -538,6 +544,8 @@ xfs_trans_apply_sb_deltas( } xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); + if (grow) + bip->bli_flags |= XFS_BLI_GROW_SB_BUF; if (whole) /* * Log the whole thing, the fields are noncontiguous. @@ -977,7 +985,7 @@ xfs_trans_cancel( * progress, so we only need to check against the mount shutdown state * here. */ - if (dirty && !xfs_is_shutdown(mp)) { + if (dirty && !(xfs_is_shutdown(mp) && xlog_is_shutdown(log))) { XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 2aad408b13134152f4345dfa63c82bcd68e26d57..731af3f04c83246982062f8ba32fb81b47cd0cc2 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -244,7 +244,7 @@ void xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); -extern kmem_zone_t *xfs_trans_zone; +extern struct kmem_cache *xfs_trans_cache; static inline struct xfs_log_item * xfs_trans_item_relog( diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d3a97a028560651828a5e73b75f8d3585a5a96eb..11a938652f2219652618a22e2b0cad113315d852 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -730,11 +730,10 @@ void xfs_ail_push_all_sync( struct xfs_ail *ailp) { - struct xfs_log_item *lip; DEFINE_WAIT(wait); spin_lock(&ailp->ail_lock); - while ((lip = xfs_ail_max(ailp)) != NULL) { + while (xfs_ail_max(ailp) != NULL) { prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); wake_up_process(ailp->ail_task); spin_unlock(&ailp->ail_lock); @@ -822,7 +821,7 @@ xfs_trans_ail_update_bulk( trace_xfs_ail_insert(lip, 0, lsn); } lip->li_lsn = lsn; - list_add(&lip->li_ail, &tmp); + list_add_tail(&lip->li_ail, &tmp); } if (!list_empty(&tmp)) diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index d91a8ecf01ff5569d1d8949b2c74ae41ce0b2d35..e29b77a716641281709b7001a4c3259e94f96666 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -866,7 +866,7 @@ STATIC void xfs_trans_alloc_dqinfo( xfs_trans_t *tp) { - tp->t_dqinfo = kmem_cache_zalloc(xfs_qm_dqtrxzone, + tp->t_dqinfo = kmem_cache_zalloc(xfs_dqtrx_cache, GFP_KERNEL | __GFP_NOFAIL); } @@ -876,6 +876,6 @@ xfs_trans_free_dqinfo( { if (!tp->t_dqinfo) return; - kmem_cache_free(xfs_qm_dqtrxzone, tp->t_dqinfo); + kmem_cache_free(xfs_dqtrx_cache, tp->t_dqinfo); tp->t_dqinfo = NULL; } diff --git a/mm/filemap.c b/mm/filemap.c index dbd5aa13fc2d01624dc7635e5304154cd0ce5910..fd4aae06ff150cbee001838854eb95e065695d94 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2855,6 +2855,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, return end; return start; } +EXPORT_SYMBOL(mapping_seek_hole_data); #ifdef CONFIG_MMU #define MMAP_LOTSAMISS (100)