diff --git a/block/bio.c b/block/bio.c index 8c64c93e96c8afd00ff938ab35a86ce479291a60..b28b6a51fb2cb6692ff6b24a2fa835455a8386e7 100644 --- a/block/bio.c +++ b/block/bio.c @@ -915,9 +915,6 @@ void __bio_add_page(struct bio *bio, struct page *page, bio->bi_iter.bi_size += len; bio->bi_vcnt++; - - if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page))) - bio_set_flag(bio, BIO_WORKINGSET); } EXPORT_SYMBOL_GPL(__bio_add_page); @@ -1108,9 +1105,6 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) * fit into the bio, or are requested in @iter, whatever is smaller. If * MM encounters an error pinning the requested pages, it stops. Error * is returned only if 0 pages could be pinned. - * - * It's intended for direct IO, so doesn't do PSI tracking, the caller is - * responsible for setting BIO_WORKINGSET if necessary. */ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { @@ -1136,8 +1130,6 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) if (is_bvec) bio_set_flag(bio, BIO_NO_PAGE_REF); - /* don't account direct I/O as memory stall */ - bio_clear_flag(bio, BIO_WORKINGSET); return bio->bi_vcnt ? 0 : ret; } EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); diff --git a/block/blk-core.c b/block/blk-core.c index 4afdd568225bd1655dc6d7a2a94a5d1670ac462e..502284017da189d286c8aa0526c28a3a4268223e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -1108,24 +1107,6 @@ blk_qc_t submit_bio(struct bio *bio) } } - /* - * If we're reading data that is part of the userspace workingset, count - * submission time as memory stall. When the device is congested, or - * the submitting cgroup IO-throttled, submission can be a significant - * part of overall IO time. - */ - if (unlikely(bio_op(bio) == REQ_OP_READ && - bio_flagged(bio, BIO_WORKINGSET))) { - unsigned long pflags; - blk_qc_t ret; - - psi_memstall_enter(&pflags); - ret = submit_bio_noacct(bio); - psi_memstall_leave(&pflags); - - return ret; - } - return submit_bio_noacct(bio); } EXPORT_SYMBOL(submit_bio); diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 646416b5940e9a0fd8baf8000a81f17c993cb6df..a3ecff725688a9e7ebbb5ea6c06082978501c564 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -500,7 +501,8 @@ static u64 bio_end_offset(struct bio *bio) static noinline int add_ra_bio_pages(struct inode *inode, u64 compressed_end, - struct compressed_bio *cb) + struct compressed_bio *cb, + int *memstall, unsigned long *pflags) { unsigned long end_index; unsigned long pg_index; @@ -549,6 +551,11 @@ static noinline int add_ra_bio_pages(struct inode *inode, goto next; } + if (!*memstall && PageWorkingset(page)) { + psi_memstall_enter(pflags); + *memstall = 1; + } + end = last_offset + PAGE_SIZE - 1; /* * at this point, we have a locked page in the page cache @@ -630,6 +637,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9; u64 em_len; u64 em_start; + unsigned long pflags; + int memstall = 0; struct extent_map *em; blk_status_t ret = BLK_STS_RESOURCE; int faili = 0; @@ -688,7 +697,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, faili = nr_pages - 1; cb->nr_pages = nr_pages; - add_ra_bio_pages(inode, em_start + em_len, cb); + add_ra_bio_pages(inode, em_start + em_len, cb, &memstall, &pflags); /* include any pages we added in add_ra-bio_pages */ cb->len = bio->bi_iter.bi_size; @@ -767,6 +776,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, bio_endio(comp_bio); } + if (memstall) + psi_memstall_leave(&pflags); + return 0; fail2: diff --git a/fs/direct-io.c b/fs/direct-io.c index 9dafbb07dd6a6f387512eb78dcccdef4fa834789..c64d4eb38995a7639eec0756111071d8a861830f 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -426,8 +426,6 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) unsigned long flags; bio->bi_private = dio; - /* don't account direct I/O as memory stall */ - bio_clear_flag(bio, BIO_WORKINGSET); spin_lock_irqsave(&dio->bio_lock, flags); dio->refcount++; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 8cb2cf612e49b816b10f9979d1952b5a868fd8e7..92c41cdf256e379283a1d73fd5ec4c344eb85d3c 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -7,6 +7,7 @@ #include "zdata.h" #include "compress.h" #include +#include #include @@ -1165,6 +1166,8 @@ static void z_erofs_submit_queue(struct super_block *sb, pgoff_t last_index; unsigned int nr_bios = 0; struct bio *bio = NULL; + unsigned long pflags; + int memstall = 0; bi_private = jobqueueset_init(sb, q, fgq, force_fg); qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head; @@ -1204,9 +1207,18 @@ static void z_erofs_submit_queue(struct super_block *sb, if (bio && cur != last_index + 1) { submit_bio_retry: submit_bio(bio); + if (memstall) { + psi_memstall_leave(&pflags); + memstall = 0; + } bio = NULL; } + if (unlikely(PageWorkingset(page)) && !memstall) { + psi_memstall_enter(&pflags); + memstall = 1; + } + if (!bio) { bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); @@ -1234,8 +1246,11 @@ static void z_erofs_submit_queue(struct super_block *sb, move_to_bypass_jobqueue(pcl, qtail, owned_head); } while (owned_head != Z_EROFS_PCLUSTER_TAIL); - if (bio) + if (bio) { submit_bio(bio); + if (memstall) + psi_memstall_leave(&pflags); + } /* * although background is preferred, no one is pending for submission. diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1853ec569b72d2d6dd52f4f8730f9ac2ebf77370..ca4243cb90741255d26716058d34056355331a35 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -291,7 +291,6 @@ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ - BIO_WORKINGSET, /* contains userspace workingset pages */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0bfa9cce65890d449443512509116852b826b0bf..728550720e12666f46351c8a11708957a48af6aa 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -806,6 +806,8 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; + bool _workingset; + unsigned long _pflags; }; #define DEFINE_READAHEAD(rac, f, m, i) \ diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 11a43dccb7fcbaad9279eb294376b8d1fa6d65a2..33bf2afce83d63cc693bc90ce31c8a68809bb64b 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -942,6 +942,7 @@ void psi_memstall_enter(unsigned long *flags) rq_unlock_irq(rq, &rf); } +EXPORT_SYMBOL_GPL(psi_memstall_enter); /** * psi_memstall_leave - mark the end of an memory stall section @@ -973,6 +974,7 @@ void psi_memstall_leave(unsigned long *flags) rq_unlock_irq(rq, &rf); } +EXPORT_SYMBOL_GPL(psi_memstall_leave); #ifdef CONFIG_CGROUPS int psi_cgroup_alloc(struct cgroup *cgroup) diff --git a/mm/filemap.c b/mm/filemap.c index fd4aae06ff150cbee001838854eb95e065695d94..8beb7ccae51f0cced7e6bbda40ad5f7f1ac16157 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2294,6 +2294,8 @@ generic_file_buffered_read_readpage(struct kiocb *iocb, struct page *page) { struct file_ra_state *ra = &filp->f_ra; + bool workingset = PageWorkingset(page); + unsigned long pflags; int error; if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) { @@ -2308,8 +2310,13 @@ generic_file_buffered_read_readpage(struct kiocb *iocb, * PG_error will be set again if readpage fails. */ ClearPageError(page); + /* Start the actual read. The read will unlock the page. */ + if (unlikely(workingset)) + psi_memstall_enter(&pflags); error = mapping->a_ops->readpage(filp, page); + if (unlikely(workingset)) + psi_memstall_leave(&pflags); if (unlikely(error)) { put_page(page); diff --git a/mm/readahead.c b/mm/readahead.c index ed23d5dec12387fd862caad619b9d072d44649fc..a9e6169cb3710460ff57070aa5b81f89ba79cf52 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -125,6 +126,8 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, if (!readahead_count(rac)) goto out; + if (unlikely(rac->_workingset)) + psi_memstall_enter(&rac->_pflags); blk_start_plug(&plug); if (aops->readahead) { @@ -149,6 +152,9 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, } blk_finish_plug(&plug); + if (unlikely(rac->_workingset)) + psi_memstall_leave(&rac->_pflags); + rac->_workingset = false; BUG_ON(!list_empty(pages)); BUG_ON(readahead_count(rac)); @@ -228,6 +234,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, } if (i == nr_to_read - lookahead_size) SetPageReadahead(page); + ractl->_workingset |= PageWorkingset(page); ractl->_nr_pages++; }