From 57a46cf00b7e9a6a084f38d6dba8af4d6528040c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Aug 2024 21:10:35 +0800 Subject: [PATCH 1/3] blk-wbt: move private information from blk-wbt.h to blk-wbt.c mainline inclusion from mainline-v6.3-rc1 commit 0bc65bd41dfd2f75b9f38812326d767db5cd0663 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IA8D5J CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0bc65bd41dfd2f75b9f38812326d767db5cd0663 -------------------------------- A large part of blk-wbt.h is only used in blk-wbt.c, so move it there. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230203150400.3199230-11-hch@lst.de Signed-off-by: Jens Axboe Conflicts: block/blk-mq-sched.c block/blk-settings.c block/blk-sysfs.c block/blk-wbt.c block/elevator.c [1. different context for header files; 2. also add blk-rq-qos.h in blk-mq-sched.c and elevator.c to avoid kabi change.] Signed-off-by: Yu Kuai --- block/blk-mq-sched.c | 1 + block/blk-settings.c | 1 + block/blk-sysfs.c | 1 + block/blk-wbt.c | 77 +++++++++++++++++++++++++++++++++++++++ block/blk-wbt.h | 86 -------------------------------------------- block/elevator.c | 1 + 6 files changed, 81 insertions(+), 86 deletions(-) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c92d25b71a72..29f8a6df6b18 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -16,6 +16,7 @@ #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-mq-tag.h" +#include "blk-rq-qos.h" #include "blk-wbt.h" void blk_mq_sched_assign_ioc(struct request *rq) diff --git a/block/blk-settings.c b/block/blk-settings.c index d1a1f963c3eb..7cdf95b6a568 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -15,6 +15,7 @@ #include #include "blk.h" +#include "blk-rq-qos.h" #include "blk-wbt.h" unsigned long blk_max_low_pfn; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 078aace75204..293a4af1e0bc 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -16,6 +16,7 @@ #include "blk.h" #include "blk-mq.h" #include "blk-mq-debugfs.h" +#include "blk-rq-qos.h" #include "blk-wbt.h" struct queue_sysfs_entry { diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 6a90d33e6f6a..799caf8e4dcb 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -25,12 +25,79 @@ #include #include +#include "blk-stat.h" #include "blk-wbt.h" #include "blk-rq-qos.h" #define CREATE_TRACE_POINTS #include +enum wbt_flags { + WBT_TRACKED = 1, /* write, tracked for throttling */ + WBT_READ = 2, /* read */ + WBT_KSWAPD = 4, /* write, from kswapd */ + WBT_DISCARD = 8, /* discard */ + + WBT_NR_BITS = 4, /* number of bits */ +}; + +enum { + WBT_RWQ_BG = 0, + WBT_RWQ_KSWAPD, + WBT_RWQ_DISCARD, + WBT_NUM_RWQ, +}; + +/* + * If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other + * state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered + * to WBT_STATE_OFF/ON_MANUAL. + */ +enum { + WBT_STATE_ON_DEFAULT = 1, /* on by default */ + WBT_STATE_ON_MANUAL = 2, /* on manually by sysfs */ + WBT_STATE_OFF_DEFAULT = 3, /* off by default */ + WBT_STATE_OFF_MANUAL = 4, /* off manually by sysfs */ +}; + +struct rq_wb { + /* + * Settings that govern how we throttle + */ + unsigned int wb_background; /* background writeback */ + unsigned int wb_normal; /* normal writeback */ + + short enable_state; /* WBT_STATE_* */ + + /* + * Number of consecutive periods where we don't have enough + * information to make a firm scale up/down decision. + */ + unsigned int unknown_cnt; + + u64 win_nsec; /* default window size */ + u64 cur_win_nsec; /* current window size */ + + struct blk_stat_callback *cb; + + u64 sync_issue; + void *sync_cookie; + + unsigned int wc; + + unsigned long last_issue; /* last non-throttled issue */ + unsigned long last_comp; /* last non-throttled comp */ + unsigned long min_lat_nsec; + struct rq_qos rqos; + struct rq_wait rq_wait[WBT_NUM_RWQ]; + struct rq_depth rq_depth; +}; + +static inline struct rq_wb *RQWB(struct rq_qos *rqos) +{ + return container_of(rqos, struct rq_wb, rqos); +} + static inline void wbt_clear_state(struct request *rq) { rq->wbt_flags = 0; @@ -225,6 +292,16 @@ static u64 rwb_sync_issue_lat(struct rq_wb *rwb) return now - issue; } +static inline unsigned int wbt_inflight(struct rq_wb *rwb) +{ + unsigned int i, ret = 0; + + for (i = 0; i < WBT_NUM_RWQ; i++) + ret += atomic_read(&rwb->rq_wait[i].inflight); + + return ret; +} + enum { LAT_OK = 1, LAT_UNKNOWN, diff --git a/block/blk-wbt.h b/block/blk-wbt.h index 824047c395ff..22c8025b9cbc 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -2,92 +2,6 @@ #ifndef WB_THROTTLE_H #define WB_THROTTLE_H -#include -#include -#include -#include -#include - -#include "blk-stat.h" -#include "blk-rq-qos.h" - -enum wbt_flags { - WBT_TRACKED = 1, /* write, tracked for throttling */ - WBT_READ = 2, /* read */ - WBT_KSWAPD = 4, /* write, from kswapd */ - WBT_DISCARD = 8, /* discard */ - - WBT_NR_BITS = 4, /* number of bits */ -}; - -enum { - WBT_RWQ_BG = 0, - WBT_RWQ_KSWAPD, - WBT_RWQ_DISCARD, - WBT_NUM_RWQ, -}; - -/* - * If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other - * state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered - * to WBT_STATE_OFF/ON_MANUAL. - */ -enum { - WBT_STATE_ON_DEFAULT = 1, /* on by default */ - WBT_STATE_ON_MANUAL = 2, /* on manually by sysfs */ - WBT_STATE_OFF_DEFAULT = 3, /* off by default */ - WBT_STATE_OFF_MANUAL = 4, /* off manually by sysfs */ -}; - -struct rq_wb { - /* - * Settings that govern how we throttle - */ - unsigned int wb_background; /* background writeback */ - unsigned int wb_normal; /* normal writeback */ - - short enable_state; /* WBT_STATE_* */ - - /* - * Number of consecutive periods where we don't have enough - * information to make a firm scale up/down decision. - */ - unsigned int unknown_cnt; - - u64 win_nsec; /* default window size */ - u64 cur_win_nsec; /* current window size */ - - struct blk_stat_callback *cb; - - u64 sync_issue; - void *sync_cookie; - - unsigned int wc; - - unsigned long last_issue; /* last non-throttled issue */ - unsigned long last_comp; /* last non-throttled comp */ - unsigned long min_lat_nsec; - struct rq_qos rqos; - struct rq_wait rq_wait[WBT_NUM_RWQ]; - struct rq_depth rq_depth; -}; - -static inline struct rq_wb *RQWB(struct rq_qos *rqos) -{ - return container_of(rqos, struct rq_wb, rqos); -} - -static inline unsigned int wbt_inflight(struct rq_wb *rwb) -{ - unsigned int i, ret = 0; - - for (i = 0; i < WBT_NUM_RWQ; i++) - ret += atomic_read(&rwb->rq_wait[i].inflight); - - return ret; -} - - #ifdef CONFIG_BLK_WBT int wbt_init(struct request_queue *); diff --git a/block/elevator.c b/block/elevator.c index 6f7de2ffad0e..87199709e0b5 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -43,6 +43,7 @@ #include "blk.h" #include "blk-mq-sched.h" #include "blk-pm.h" +#include "blk-rq-qos.h" #include "blk-wbt.h" static DEFINE_SPINLOCK(elv_list_lock); -- Gitee From c6e6da4f8def8584db4a5fa3160c4abb3216dc51 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Mon, 5 Aug 2024 21:10:36 +0800 Subject: [PATCH 2/3] blk-wbt: remove wbt_track stub mainline inclusion from mainline-v5.18-rc1 commit 8d7829ebc1e48208b3c02c2a10c5f8856246033c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IA8D5J CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8d7829ebc1e48208b3c02c2a10c5f8856246033c -------------------------------- cppcheck returns this warning [block/blk-wbt.h:104] -> [block/blk-wbt.c:592]: (warning) Function 'wbt_track' argument order different: declaration 'rq, flags, ' definition 'rqos, rq, bio' In commit c1c80384c8f4 ("block: remove external dependency on wbt_flags") wbt_track was removed for the real declaration, its stub should have been as well. Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20220331185458.3427454-1-trix@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Yu Kuai --- block/blk-wbt.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/block/blk-wbt.h b/block/blk-wbt.h index 22c8025b9cbc..02f294f14734 100644 --- a/block/blk-wbt.h +++ b/block/blk-wbt.h @@ -18,9 +18,6 @@ u64 wbt_default_latency_nsec(struct request_queue *); #else -static inline void wbt_track(struct request *rq, enum wbt_flags flags) -{ -} static inline int wbt_init(struct request_queue *q) { return -EINVAL; -- Gitee From 2edcac422e7e341148ae19dddb0d3ce385ec3b23 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 5 Aug 2024 21:10:37 +0800 Subject: [PATCH 3/3] blk-wbt: don't throttle swap writes in direct reclaim mainline inclusion from mainline-v6.11-rc1 commit 4e63aeb5d0101ddada36a2f64f048e2f9d2202fc category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IA8D5J CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4e63aeb5d0101ddada36a2f64f048e2f9d2202fc -------------------------------- Now we avoid throttling swap writes by determining whether the current process is kswapd (aka current_is_kswapd()), but swap writes can come from either kswapd or direct reclaim, so the swap writes from direct reclaim will still be throttled. When a process holds a lock to allocate a free page, and enters direct reclaim because there is no free memory, then it might trigger a hung due to the wbt throttling that causes other processes to fail to get the lock. Both kswapd and direct reclaim set the REQ_SWAP flag, so use REQ_SWAP instead of current_is_kswapd() to avoid throttling swap writes. Also renamed WBT_KSWAPD to WBT_SWAP and WBT_RWQ_KSWAPD to WBT_RWQ_SWAP. Signed-off-by: Baokun Li Reviewed-by: Yu Kuai Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240604030522.3686177-1-libaokun@huaweicloud.com Signed-off-by: Jens Axboe Conflicts: block/blk-wbt.c [commit 16458cf3bd15 ("block: Use the new blk_opf_t type") is not backported] Signed-off-by: Yu Kuai --- block/blk-wbt.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 799caf8e4dcb..183f26a83347 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -35,7 +35,7 @@ enum wbt_flags { WBT_TRACKED = 1, /* write, tracked for throttling */ WBT_READ = 2, /* read */ - WBT_KSWAPD = 4, /* write, from kswapd */ + WBT_SWAP = 4, /* write, from swap_writepage() */ WBT_DISCARD = 8, /* discard */ WBT_NR_BITS = 4, /* number of bits */ @@ -43,7 +43,7 @@ enum wbt_flags { enum { WBT_RWQ_BG = 0, - WBT_RWQ_KSWAPD, + WBT_RWQ_SWAP, WBT_RWQ_DISCARD, WBT_NUM_RWQ, }; @@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb) static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb, enum wbt_flags wb_acct) { - if (wb_acct & WBT_KSWAPD) - return &rwb->rq_wait[WBT_RWQ_KSWAPD]; + if (wb_acct & WBT_SWAP) + return &rwb->rq_wait[WBT_RWQ_SWAP]; else if (wb_acct & WBT_DISCARD) return &rwb->rq_wait[WBT_RWQ_DISCARD]; @@ -536,7 +536,7 @@ static bool close_io(struct rq_wb *rwb) time_before(now, rwb->last_comp + HZ / 10); } -#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO) +#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP) static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) { @@ -554,13 +554,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) /* * At this point we know it's a buffered write. If this is - * kswapd trying to free memory, or REQ_SYNC is set, then + * swap trying to free memory, or REQ_SYNC is set, then * it's WB_SYNC_ALL writeback, and we'll use the max limit for * that. If the write is marked as a background write, then use * the idle limit, or go to normal if we haven't had competing * IO for a bit. */ - if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd()) + if ((rw & REQ_HIPRIO) || wb_recent_wait(rwb)) limit = rwb->rq_depth.max_depth; else if ((rw & REQ_BACKGROUND) || close_io(rwb)) { /* @@ -637,8 +637,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio) if (bio_op(bio) == REQ_OP_READ) { flags = WBT_READ; } else if (wbt_should_throttle(rwb, bio)) { - if (current_is_kswapd()) - flags |= WBT_KSWAPD; + if (bio->bi_opf & REQ_SWAP) + flags |= WBT_SWAP; if (bio_op(bio) == REQ_OP_DISCARD) flags |= WBT_DISCARD; flags |= WBT_TRACKED; -- Gitee