From 9726a03d14604175f3528214675409206157f9ee Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 4 Jun 2024 20:46:47 +0800 Subject: [PATCH] anolis: fuse: separate bg_queue for write and other requests ANBZ: #9340 Reaahead may be starved by the writeback wave, since the writeback routine sends forced background requests which are enqueued into bg_queue list without considering the max_background limit, while the background requests sent by readahead routine are non-forced and thus throttled by max_background limit. There can be hundreds thousands of WRITE requests queued in bg_queue list prior to READ requests, and thus the asynchronous readahead can be starved from the writeback wave. Fix this by introducing two bg_queue lists and separating WRITE requests from the others. Also make readahead routine send forced background request. Besides also introduce FUSE_SEPARATE_BACKGROUND init flag. When FUSE_SEPARATE_BACKGROUND init flag is set, there are two separate background queues, one for WRITE requests and one for the others. The number of active background requests is also counted separately for these two sorts of requests in this case, and thus there are at maximum max_background in-flight background requests for each sort of requests. Signed-off-by: Joseph Qi Signed-off-by: Jingbo Xu --- fs/fuse/dev.c | 71 ++++++++++++++++++++++++++++++++++----- fs/fuse/file.c | 5 +++ fs/fuse/fuse_i.h | 21 +++++++++--- fs/fuse/inode.c | 10 +++--- include/uapi/linux/fuse.h | 4 +++ 5 files changed, 93 insertions(+), 18 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index aa6d0beb461a..e9c4f3c7c148 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -29,6 +29,8 @@ MODULE_ALIAS("devname:fuse"); #define FUSE_INT_REQ_BIT (1ULL << 0) #define FUSE_REQ_ID_STEP (1ULL << 1) +#define DEFAULT_BG_QUEUE READ + static struct kmem_cache *fuse_req_cachep; static struct fuse_dev *fuse_get_dev(struct file *file) @@ -257,21 +259,72 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, } } -static void flush_bg_queue(struct fuse_conn *fc) +static void fuse_add_bg_queue(struct fuse_conn *fc, struct fuse_req *req) { - struct fuse_iqueue *fiq = &fc->iq; + if (fc->separate_background) { + if (req->args->opcode == FUSE_WRITE) + list_add_tail(&req->list, &fc->bg_queue[WRITE]); + else + list_add_tail(&req->list, &fc->bg_queue[READ]); + } else { + /* default to one single background queue */ + list_add_tail(&req->list, &fc->bg_queue[DEFAULT_BG_QUEUE]); + } - while (fc->active_background < fc->max_background && - !list_empty(&fc->bg_queue)) { - struct fuse_req *req; +} + +static void fuse_dec_active_bg(struct fuse_conn *fc, struct fuse_req *req) +{ + if (fc->separate_background) { + if (req->args->opcode == FUSE_WRITE) + fc->active_background[WRITE]--; + else + fc->active_background[READ]--; + } else { + /* default to one single count */ + fc->active_background[DEFAULT_BG_QUEUE]--; + } +} - req = list_first_entry(&fc->bg_queue, struct fuse_req, list); +/* bg_queue needs to be further flushed when true returned */ +static bool do_flush_bg_queue(struct fuse_conn *fc, unsigned int index, + unsigned int batch) +{ + struct fuse_iqueue *fiq = &fc->iq; + struct fuse_req *req; + unsigned int count = 0; + + while (fc->active_background[index] < fc->max_background && + !list_empty(&fc->bg_queue[index])) { + if (batch && count++ == batch) + return true; + req = list_first_entry(&fc->bg_queue[index], + struct fuse_req, list); list_del(&req->list); - fc->active_background++; + fc->active_background[index]++; spin_lock(&fiq->lock); req->in.h.unique = fuse_get_unique(fiq); queue_request_and_unlock(fiq, req); } + return false; +} + +static void flush_bg_queue(struct fuse_conn *fc) +{ + if (!fc->separate_background) { + do_flush_bg_queue(fc, DEFAULT_BG_QUEUE, 0); + } else { + bool proceed_write = true, proceed_other = true; + + do { + if (proceed_other) + proceed_other = do_flush_bg_queue(fc, READ, + FUSE_DEFAULT_MAX_BACKGROUND); + if (proceed_write) + proceed_write = do_flush_bg_queue(fc, WRITE, + FUSE_DEFAULT_MAX_BACKGROUND); + } while (proceed_other || proceed_write); + } } static void fuse_update_stats(struct fuse_conn *fc, struct fuse_req *req) @@ -343,7 +396,7 @@ void fuse_request_end(struct fuse_req *req) clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); } fc->num_background--; - fc->active_background--; + fuse_dec_active_bg(fc, req); flush_bg_queue(fc); spin_unlock(&fc->bg_lock); } else { @@ -574,7 +627,7 @@ static bool fuse_request_queue_background(struct fuse_req *req) set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC); set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC); } - list_add_tail(&req->list, &fc->bg_queue); + fuse_add_bg_queue(fc, req); flush_bg_queue(fc); queued = true; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 0d5fd6dfe213..c721bd4fa3dd 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -967,6 +967,11 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) if (fm->fc->async_read) { ia->ff = fuse_file_get(ff); ap->args.end = fuse_readpages_end; + /* force background request to avoid starvation from writeback */ + if (fm->fc->separate_background) { + ap->args.force = true; + ap->args.nocreds = true; + } err = fuse_simple_background(fm, &ap->args, GFP_KERNEL); if (!err) return; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 5de745f84744..fa58fc602b56 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -39,6 +39,9 @@ /** Maximum of max_pages received in init_out */ #define FUSE_MAX_MAX_PAGES 1024 +/** Maximum number of outstanding background requests */ +#define FUSE_DEFAULT_MAX_BACKGROUND 12 + /** Bias for fi->writectr, meaning new writepages must not be sent */ #define FUSE_NOWRITE INT_MIN @@ -637,11 +640,18 @@ struct fuse_conn { /** Number of requests currently in the background */ unsigned num_background; - /** Number of background requests currently queued for userspace */ - unsigned active_background; + /* + * Number of background requests currently queued for userspace. + * active_background[WRITE] for WRITE requests, and + * active_background[READ] for others. + */ + unsigned active_background[2]; - /** The list of background requests set aside for later queuing */ - struct list_head bg_queue; + /* + * The list of background requests set aside for later queuing. + * bg_queue[WRITE] for WRITE requests, bg_queue[READ] for others. + */ + struct list_head bg_queue[2]; /** Protects: max_background, congestion_threshold, num_background, * active_background, bg_queue, blocked */ @@ -840,6 +850,9 @@ struct fuse_conn { /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ unsigned int direct_io_allow_mmap:1; + /* separate background queue for WRITE requests and the others */ + unsigned int separate_background:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e8f31ce4dffd..1d531e8a03a4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -54,9 +54,6 @@ MODULE_PARM_DESC(max_user_congthresh, #define FUSE_DEFAULT_BLKSIZE 512 -/** Maximum number of outstanding background requests */ -#define FUSE_DEFAULT_MAX_BACKGROUND 12 - /** Congestion starts at 75% of maximum */ #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) @@ -793,7 +790,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, atomic_set(&fc->dev_count, 1); init_waitqueue_head(&fc->blocked_waitq); fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); - INIT_LIST_HEAD(&fc->bg_queue); + INIT_LIST_HEAD(&fc->bg_queue[READ]); + INIT_LIST_HEAD(&fc->bg_queue[WRITE]); INIT_LIST_HEAD(&fc->entry); INIT_LIST_HEAD(&fc->devices); idr_init(&fc->passthrough_req); @@ -1201,6 +1199,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->delete_stale = 1; if (flags & FUSE_NO_EXPORT_SUPPORT) fm->sb->s_export_op = &fuse_export_fid_operations; + if (flags & FUSE_SEPARATE_BACKGROUND) + fc->separate_background = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1247,7 +1247,7 @@ static void fuse_prepare_send_init(struct fuse_mount *fm, FUSE_INVAL_CACHE_INFAIL | FUSE_CLOSE_TO_OPEN | FUSE_INVALDIR_ALLENTRY | FUSE_DELETE_STALE | FUSE_DIRECT_IO_ALLOW_MMAP | FUSE_NO_EXPORT_SUPPORT | - FUSE_HAS_RESEND; + FUSE_HAS_RESEND | FUSE_SEPARATE_BACKGROUND; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index fd8ea0cca5bf..2bcce22ed6d4 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -349,6 +349,8 @@ struct fuse_file_lock { * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests + * FUSE_SEPARATE_BACKGROUND: separate background queue for WRITE requests and + * the others */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -386,6 +388,8 @@ struct fuse_file_lock { #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) #define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) #define FUSE_HAS_RESEND (1ULL << 39) +#define FUSE_SEPARATE_BACKGROUND (1ULL << 56) +/* The 57th bit is left to FUSE_HAS_RECOVERY */ #define FUSE_DELETE_STALE (1ULL << 58) /* The 59th bit is left to FUSE_DIO_SHARED_MMAP */ #define FUSE_INVAL_CACHE_INFAIL (1ULL << 60) -- Gitee