From 3dee4a6e5e495c43a81ca256b361975dc46010ce Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:14 +0800 Subject: [PATCH 01/23] mm: add bdi_set_strict_limit() function mainline inclusion from mainline-v6.2-rc1 commit 8e9d5ead865a1a7af74a444d2f00f1ef4539bfba category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8e9d5ead865a1a7af74a444d2f00f1ef4539bfba -------------------------------- Patch series "mm/block: add bdi sysfs knobs", v4. At meta network block devices (nbd) are used to implement remote block storage. In testing and during production it has been observed that these network block devices can consume a huge portion of the dirty writeback cache and writeback can take a considerable time. To be able to give stricter limits, I'm proposing the following changes: 1) introduce strictlimit knob Currently the max_ratio knob exists to limit the dirty_memory. However this knob only applies once (dirty_ratio + dirty_background_ratio) / 2 has been reached. With the BDI_CAP_STRICTLIMIT flag, the max_ratio can be applied without reaching that limit. This change exposes that knob. This knob can also be useful for NFS, fuse filesystems and USB devices. 2) Use part of 1000000 internal calculation The max_ratio is based on percentage. With the current machine sizes percentage values can be very high (1% of a 256GB main memory is already 2.5GB). This change uses part of 1000000 instead of percentages for the internal calculations. 3) Introduce two new sysfs knobs: min_bytes and max_bytes. Currently all calculations are based on ratio, but for a user it often more convenient to specify a limit in bytes. The new knobs will not store bytes values, instead they will translate the byte value to a corresponding ratio. As the internal values are now part of 1000, the ratio is closer to the specified value. However the value should be more seen as an approximation as it can fluctuate over time. 3) Introduce two new sysfs knobs: min_ratio_fine and max_ratio_fine. The granularity for the existing sysfs bdi knobs min_ratio and max_ratio is based on percentage values. The new sysfs bdi knobs min_ratio_fine and max_ratio_fine allow to specify the ratio as part of 1 million. This patch (of 20): This adds the bdi_set_strict_limit function to be able to set/unset the BDI_CAP_STRICTLIMIT flag. Link: https://lkml.kernel.org/r/20221119005215.3052436-1-shr@devkernel.io Link: https://lkml.kernel.org/r/20221119005215.3052436-2-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Jens Axboe Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a689a21abe10..c67129defa3a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,6 +106,7 @@ static inline unsigned long wb_stat_error(void) int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* * Flags in backing_dev_info::capability diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0d7cc65c6367..1f6104775a43 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -724,6 +724,21 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) +{ + if (strict_limit > 1) + return -EINVAL; + + spin_lock_bh(&bdi_lock); + if (strict_limit) + bdi->capabilities |= BDI_CAP_STRICTLIMIT; + else + bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; + spin_unlock_bh(&bdi_lock); + + return 0; +} + static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { -- Gitee From 72e6de7a4c2b4281fb865c63453a60ad2115262e Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:15 +0800 Subject: [PATCH 02/23] mm: add knob /sys/class/bdi//strict_limit mainline inclusion from mainline-v6.2-rc1 commit 27bbe9d48d4e298864e18b39f091342c68b81637 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=27bbe9d48d4e298864e18b39f091342c68b81637 -------------------------------- Add a new knob to /sys/class/bdi//strict_limit. This new knob allows to set/unset the flag BDI_CAP_STRICTLIMIT in the bdi capabilities. Link: https://lkml.kernel.org/r/20221119005215.3052436-3-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5f4f16dbff9d..d2a7b3dbfcda 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -207,11 +207,40 @@ static ssize_t stable_pages_required_show(struct device *dev, } static DEVICE_ATTR_RO(stable_pages_required); +static ssize_t strict_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int strict_limit; + ssize_t ret; + + ret = kstrtouint(buf, 10, &strict_limit); + if (ret < 0) + return ret; + + ret = bdi_set_strict_limit(bdi, strict_limit); + if (!ret) + ret = count; + + return ret; +} + +static ssize_t strict_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + !!(bdi->capabilities & BDI_CAP_STRICTLIMIT)); +} +static DEVICE_ATTR_RW(strict_limit); + static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, &dev_attr_stable_pages_required.attr, + &dev_attr_strict_limit.attr, NULL, }; ATTRIBUTE_GROUPS(bdi_dev); -- Gitee From 54b6d108b688538df73adf39fb5332a489927ec3 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:16 +0800 Subject: [PATCH 03/23] mm: document /sys/class/bdi//strict_limit knob mainline inclusion from mainline-v6.2-rc1 commit 16b837eb84e6948f92411eb32e97a05f89733ddc category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=16b837eb84e6948f92411eb32e97a05f89733ddc -------------------------------- This documents the new /sys/class/bdi//strict_limit knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-4-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflict: Documentation/ABI/testing/sysfs-class-bdi [Context differences] Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5402bd74ba43..9982fd0439ae 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -48,6 +48,17 @@ max_ratio (read-write) mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. + (read-write) +What: /sys/class/bdi//strict_limit +Date: October 2022 +Contact: Stefan Roesch +Description: + Forces per-BDI checks for the share of given device in the write-back + cache even before the global background dirty limit is reached. This + is useful in situations where the global limit is much higher than + affordable for given relatively slow (or untrusted) device. Turning + strictlimit on has no visible effect if max_ratio is equal to 100%. + stable_pages_required (read-only) If set, the backing device requires that all pages comprising a write -- Gitee From 98d095169de9258a3ca3b926fdb60b6f2d16be1f Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 29 Aug 2024 10:31:17 +0800 Subject: [PATCH 04/23] mm: rework calculation of bdi_min_ratio in bdi_set_min_ratio mainline inclusion from mainline-v5.19-rc1 commit 21f0dd88f23dc9dc46b781f8ec9acf975dca4e6e category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=21f0dd88f23dc9dc46b781f8ec9acf975dca4e6e -------------------------------- In function bdi_set_min_ratio, min_ratio is unsigned int, it will result underflow when setting min_ratio below bdi->min_ratio, it is confusing. Rework it, no functional change. Link: https://lkml.kernel.org/r/20220422095159.2858305-1-chenwandun@huawei.com Signed-off-by: Chen Wandun Cc: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 1f6104775a43..761cc9e865ae 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -685,18 +685,25 @@ static unsigned int bdi_min_ratio; int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { + unsigned int delta; int ret = 0; spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { - min_ratio -= bdi->min_ratio; - if (bdi_min_ratio + min_ratio < 100) { - bdi_min_ratio += min_ratio; - bdi->min_ratio += min_ratio; + if (min_ratio < bdi->min_ratio) { + delta = bdi->min_ratio - min_ratio; + bdi_min_ratio -= delta; + bdi->min_ratio = min_ratio; } else { - ret = -EINVAL; + delta = min_ratio - bdi->min_ratio; + if (bdi_min_ratio + delta < 100) { + bdi_min_ratio += delta; + bdi->min_ratio = min_ratio; + } else { + ret = -EINVAL; + } } } spin_unlock_bh(&bdi_lock); -- Gitee From e9c7ace471583e75a6425b5736fcec1c6dacd599 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:18 +0800 Subject: [PATCH 05/23] mm: use part per 1000000 for bdi ratios mainline inclusion from mainline-v6.2-rc1 commit ae82291e9ca47c3d6da6b77a00f427754aca413e category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ae82291e9ca47c3d6da6b77a00f427754aca413e -------------------------------- To get finer granularity for ratio calculations use part per million instead of percentiles. This is especially important if we want to automatically convert byte values to ratios. Otherwise the values that are actually used can be quite different. This is also important for machines with more main memory (1% of 256GB is already 2.5GB). Link: https://lkml.kernel.org/r/20221119005215.3052436-5-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 3 +++ mm/backing-dev.c | 6 +++--- mm/page-writeback.c | 15 +++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c67129defa3a..75a25b2145c3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -104,6 +104,9 @@ static inline unsigned long wb_stat_error(void) #endif } +/* BDI ratio is expressed as part per 1000000 for finer granularity. */ +#define BDI_RATIO_SCALE 10000 + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d2a7b3dbfcda..b0fff65d9962 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -176,7 +176,7 @@ static ssize_t min_ratio_store(struct device *dev, return ret; } -BDI_SHOW(min_ratio, bdi->min_ratio) +BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -195,7 +195,7 @@ static ssize_t max_ratio_store(struct device *dev, return ret; } -BDI_SHOW(max_ratio, bdi->max_ratio) +BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, @@ -809,7 +809,7 @@ static int bdi_init(struct backing_dev_info *bdi) kref_init(&bdi->refcnt); bdi->min_ratio = 0; - bdi->max_ratio = 100; + bdi->max_ratio = 100 * BDI_RATIO_SCALE; bdi->max_prop_frac = FPROP_FRAC_BASE; INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 761cc9e865ae..203c7ebe84ea 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -198,7 +198,7 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, min *= this_bw; min = div64_ul(min, tot_bw); } - if (max < 100) { + if (max < 100 * BDI_RATIO_SCALE) { max *= this_bw; max = div64_ul(max, tot_bw); } @@ -688,6 +688,8 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) unsigned int delta; int ret = 0; + min_ratio *= BDI_RATIO_SCALE; + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; @@ -698,7 +700,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) bdi->min_ratio = min_ratio; } else { delta = min_ratio - bdi->min_ratio; - if (bdi_min_ratio + delta < 100) { + if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) { bdi_min_ratio += delta; bdi->min_ratio = min_ratio; } else { @@ -717,6 +719,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) if (max_ratio > 100) return -EINVAL; + max_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { @@ -808,15 +811,15 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) fprop_fraction_percpu(&dom->completions, dtc->wb_completions, &numerator, &denominator); - wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; + wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE); wb_thresh *= numerator; wb_thresh = div64_ul(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); - wb_thresh += (thresh * wb_min_ratio) / 100; - if (wb_thresh > (thresh * wb_max_ratio) / 100) - wb_thresh = thresh * wb_max_ratio / 100; + wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE); + if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE)) + wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE); return wb_thresh; } -- Gitee From edf87b2509d841618635050e0b6b1601dbb97089 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:19 +0800 Subject: [PATCH 06/23] mm: add bdi_get_max_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 00df7d51263b46ed93f7572e2d09579746f7b1eb category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=00df7d51263b46ed93f7572e2d09579746f7b1eb -------------------------------- This adds a function to return the specified value for max_bytes. It converts the stored max_ratio of the bdi to the corresponding bytes value. It introduces the bdi_get_bytes helper function to do the conversion. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The helper function will also be used by the min_bytes bdi knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-6-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 75a25b2145c3..91bc6e9adb14 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -107,6 +107,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 203c7ebe84ea..8d83bd9e1c25 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -683,6 +683,18 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static u64 bdi_get_bytes(unsigned int ratio) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + u64 bytes; + + global_dirty_limits(&background_thresh, &dirty_thresh); + bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100; + + return bytes; +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; @@ -734,6 +746,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_max_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) -- Gitee From ae3545a0d5d7a988fd5bcf550f74b1ded632d1a8 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:20 +0800 Subject: [PATCH 07/23] mm: split off __bdi_set_max_ratio() function mainline inclusion from mainline-v6.2-rc1 commit efc3e6ad53ea14225b434fddca261c9a1c56c707 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=efc3e6ad53ea14225b434fddca261c9a1c56c707 -------------------------------- This splits off __bdi_set_max_ratio() from bdi_set_max_ratio(). __bdi_set_max_ratio() will also be called from bdi_set_max_bytes(), which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-7-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8d83bd9e1c25..e0de996c1c55 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -725,14 +725,10 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) return ret; } -int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { int ret = 0; - if (max_ratio > 100) - return -EINVAL; - max_ratio *= BDI_RATIO_SCALE; - spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -744,6 +740,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) return ret; } + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + if (max_ratio > 100) + return -EINVAL; + + return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); +} EXPORT_SYMBOL(bdi_set_max_ratio); u64 bdi_get_max_bytes(struct backing_dev_info *bdi) -- Gitee From ba4e5b468a52a20cd826a0e1747a4d94b9310d24 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:21 +0800 Subject: [PATCH 08/23] mm: add bdi_set_max_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 1bf27e98d26d1e62166a456ef17460be085cbe0b category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1bf27e98d26d1e62166a456ef17460be085cbe0b -------------------------------- This introduces the bdi_set_max_bytes() function. The max_bytes function does not store the max_bytes value. Instead it converts the max_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-8-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 91bc6e9adb14..62e15821ff1c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -110,6 +110,7 @@ static inline unsigned long wb_stat_error(void) u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e0de996c1c55..b13965fff44c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -683,6 +684,28 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static int bdi_check_pages_limit(unsigned long pages) +{ + unsigned long max_dirty_pages = global_dirtyable_memory(); + + if (pages > max_dirty_pages) + return -EINVAL; + + return 0; +} + +static unsigned long bdi_ratio_from_pages(unsigned long pages) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + unsigned long ratio; + + global_dirty_limits(&background_thresh, &dirty_thresh); + ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh); + + return ratio; +} + static u64 bdi_get_bytes(unsigned int ratio) { unsigned long background_thresh; @@ -755,6 +778,20 @@ u64 bdi_get_max_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->max_ratio); } +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes) +{ + int ret; + unsigned long pages = max_bytes >> PAGE_SHIFT; + unsigned long max_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + max_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) -- Gitee From 02c0983dfbf976133e9e1fff6d35fd871d266074 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:22 +0800 Subject: [PATCH 09/23] mm: add knob /sys/class/bdi//max_bytes mainline inclusion from mainline-v6.2-rc1 commit c56e049a5e401a177c7c9b39a3bcc973ff5cec0b category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c56e049a5e401a177c7c9b39a3bcc973ff5cec0b -------------------------------- This adds the new knob max_bytes to specify a dirty memory limit for the corresponding bdi. The specified bytes value is converted to a ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-9-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b0fff65d9962..383bf0a8f90e 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -197,6 +197,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_max_bytes(bdi)); +} + +static ssize_t max_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_max_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(max_bytes); + static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, char *page) @@ -239,6 +267,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, NULL, -- Gitee From 8f10167732af97fafc209ac24d5f476d044c6f21 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:23 +0800 Subject: [PATCH 10/23] mm: document /sys/class/bdi//max_bytes knob mainline inclusion from mainline-v6.2-rc1 commit c354d9268d7825eb8643f658c5091079d4f11a4a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c354d9268d7825eb8643f658c5091079d4f11a4a -------------------------------- This documents the new /sys/class/bdi//max_bytes knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-10-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 9982fd0439ae..726c2e2c15cf 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -49,6 +49,20 @@ max_ratio (read-write) be trusted to play fair. (read-write) + +What: /sys/class/bdi//max_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given 'max_bytes' of the write-back cache. This is useful in + situations where we want to avoid one device taking all or + most of the write-back cache. For example in case of an NFS + mount that is prone to get stuck, a FUSE mount which cannot be + trusted to play fair, or a nbd device. + + (read-write) + What: /sys/class/bdi//strict_limit Date: October 2022 Contact: Stefan Roesch -- Gitee From 56d0df122448b8d500cd0dc221df2afa740f216b Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:24 +0800 Subject: [PATCH 11/23] mm: add bdi_get_min_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 712c00d66a342a3ed375df41c3df7d3d2abad2c0 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=712c00d66a342a3ed375df41c3df7d3d2abad2c0 -------------------------------- This adds a function to return the specified value for min_bytes. It converts the stored min_ratio of the bdi to the corresponding bytes value. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The returned value can be different than the value when the min_bytes value was set. Link: https://lkml.kernel.org/r/20221119005215.3052436-11-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 62e15821ff1c..547e35a7715a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -107,6 +107,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b13965fff44c..67ee9b9d6f59 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -773,6 +773,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_min_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); -- Gitee From d859c79dffa1869703e10f7ebfe711cc85cefb0d Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:25 +0800 Subject: [PATCH 12/23] mm: split off __bdi_set_min_ratio() function mainline inclusion from mainline-v6.2-rc1 commit 8021fb3232f265b81c7e4e7aba15bc3a04ff1fd3 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8021fb3232f265b81c7e4e7aba15bc3a04ff1fd3 -------------------------------- This splits off the __bdi_set_min_ratio() function from the bdi_set_min_ratio() function. The __bdi_set_min_ratio() function will also be called from the bdi_set_min_bytes() function, which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-12-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 67ee9b9d6f59..a85d3716b652 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -718,7 +718,7 @@ static u64 bdi_get_bytes(unsigned int ratio) return bytes; } -int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; int ret = 0; @@ -764,6 +764,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); +} + int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { if (max_ratio > 100) -- Gitee From 87d6cc48f88bfdbe4fcf039da34c8946f5285ab7 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:26 +0800 Subject: [PATCH 13/23] mm: add bdi_set_min_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 803c98050569850be5fd51a2025c67622de887d9 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=803c98050569850be5fd51a2025c67622de887d9 -------------------------------- This introduces the bdi_set_min_bytes() function. The min_bytes function does not store the min_bytes value. Instead it converts the min_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-13-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 547e35a7715a..d8da5391f3d5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a85d3716b652..e1cd2930853b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -783,6 +783,20 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->min_ratio); } +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes) +{ + int ret; + unsigned long pages = min_bytes >> PAGE_SHIFT; + unsigned long min_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + min_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_min_ratio(bdi, min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); -- Gitee From a4c00d049c80395ef44c0a7dc60980e573fdd409 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:27 +0800 Subject: [PATCH 14/23] mm: add /sys/class/bdi//min_bytes knob mainline inclusion from mainline-v6.2-rc1 commit 9c84819bd64ec15cb15d041c45ebe4725e9d4f3b category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9c84819bd64ec15cb15d041c45ebe4725e9d4f3b -------------------------------- bdi has two existing knobs to limit the amount of dirty memory: min_ratio and max_ratio. However the granularity of the knobs is limited and often it is more convenient to specify limits in terms of bytes. This change adds the min_bytes knob. It does not store the min_bytes value, instead it converts the max_bytes value to a ratio. The value is therefore more an approximation than an absolute value. It also maintains the sum over all the bdi min_ratio values stored in the variable bdi_min_ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-14-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 383bf0a8f90e..b3595a3b06b2 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -197,6 +197,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t min_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_min_bytes(bdi)); +} + +static ssize_t min_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_min_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(min_bytes); + static ssize_t max_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -267,6 +295,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, -- Gitee From a2c4db2f26f51f3e64ea65249b689937f55d561d Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:28 +0800 Subject: [PATCH 15/23] mm: document /sys/class/bdi//min_bytes knob mainline inclusion from mainline-v6.2-rc1 commit 9c832a8d571784c998d0f9f5df480c62f7f3064c category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9c832a8d571784c998d0f9f5df480c62f7f3064c -------------------------------- This documents the new /sys/class/bdi//min_bytes knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-15-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 726c2e2c15cf..5b1341ffbd6d 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -50,6 +50,21 @@ max_ratio (read-write) (read-write) +What: /sys/class/bdi//min_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_bytes' parameter allows assigning a minimum + percentage of the write-back cache to a particular device + expressed in bytes. + For example, this is useful for providing a minimum QoS. + + (read-write) + What: /sys/class/bdi//max_bytes Date: October 2022 Contact: Stefan Roesch -- Gitee From 48afacdefe1e987cbd49e335b0f2f0034403d93d Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:29 +0800 Subject: [PATCH 16/23] mm: add bdi_set_max_ratio_no_scale() function mainline inclusion from mainline-v6.2-rc1 commit 4e230b406eda9bdf7f8a71e2cc3df18a824abcb0 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4e230b406eda9bdf7f8a71e2cc3df18a824abcb0 -------------------------------- This introduces bdi_set_max_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob max_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-16-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d8da5391f3d5..e2a09835d736 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e1cd2930853b..42a9b2664c10 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -752,6 +752,9 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra { int ret = 0; + if (max_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; + spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -764,6 +767,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); @@ -771,9 +779,6 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { - if (max_ratio > 100) - return -EINVAL; - return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); } EXPORT_SYMBOL(bdi_set_max_ratio); -- Gitee From c0edb801cac86cfd55c79e4ab83fe608898fb6e9 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:30 +0800 Subject: [PATCH 17/23] mm: add /sys/class/bdi//max_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit bca52dcbadc583f4db6435599c44a79f97293f06 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bca52dcbadc583f4db6435599c44a79f97293f06 -------------------------------- This adds the max_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-17-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b3595a3b06b2..984df664b967 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -197,6 +197,25 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_max_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(max_ratio_fine, bdi->max_ratio) + static ssize_t min_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -295,6 +314,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, -- Gitee From b393a6b5e792fa652e0ce30fa4f8013b118fbcf2 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:31 +0800 Subject: [PATCH 18/23] mm: document /sys/class/bdi//max_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit 54790f30fea74247e2f38b4a632ee3dc2fe42d86 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=54790f30fea74247e2f38b4a632ee3dc2fe42d86 -------------------------------- This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-18-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5b1341ffbd6d..6e23417e0dd9 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -50,6 +50,19 @@ max_ratio (read-write) (read-write) +What: /sys/class/bdi//max_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given value of the write-back cache. The value is given as part + of 1 million. This is useful in situations where we want to avoid + one device taking all or most of the write-back cache. For example + in case of an NFS mount that is prone to get stuck, or a FUSE mount + which cannot be trusted to play fair. + + (read-write) + What: /sys/class/bdi//min_bytes Date: October 2022 Contact: Stefan Roesch -- Gitee From 6d6bc5840ea26a77ec9077249e0f02181ee5e6ab Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:32 +0800 Subject: [PATCH 19/23] mm: add bdi_set_min_ratio_no_scale() function mainline inclusion from mainline-v6.2-rc1 commit 2c44af4f2aaa260199f218f11920c406e688693c category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2c44af4f2aaa260199f218f11920c406e688693c -------------------------------- This introduces bdi_set_min_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob min_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-19-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e2a09835d736..b10df87d6c34 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 42a9b2664c10..6adf00de9eba 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -723,6 +723,8 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra unsigned int delta; int ret = 0; + if (min_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; min_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); @@ -767,6 +769,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio); +} + int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) { return __bdi_set_max_ratio(bdi, max_ratio); -- Gitee From 8d55c56bd6a1168aece1c5d399f294a6cd5ce508 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:33 +0800 Subject: [PATCH 20/23] mm: add /sys/class/bdi//min_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit ad3e6dabf6f7d9ffd68eb711191ef16cdbdd25f0 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ad3e6dabf6f7d9ffd68eb711191ef16cdbdd25f0 -------------------------------- This adds the min_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-20-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 984df664b967..6e9162004a91 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -178,6 +178,25 @@ static ssize_t min_ratio_store(struct device *dev, } BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) +static ssize_t min_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_min_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(min_ratio_fine, bdi->min_ratio) + static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -313,6 +332,7 @@ static DEVICE_ATTR_RW(strict_limit); static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, + &dev_attr_min_ratio_fine.attr, &dev_attr_max_ratio.attr, &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, -- Gitee From 82fafa594e1c4acfefb232a342455b85e0fa2810 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 29 Aug 2024 10:31:34 +0800 Subject: [PATCH 21/23] mm: document /sys/class/bdi//min_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit eba39236f18da7a50b6c51df5d902ee72c43e760 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eba39236f18da7a50b6c51df5d902ee72c43e760 -------------------------------- This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-21-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflicts: Documentation/ABI/testing/sysfs-class-bdi [Context differences] Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 6e23417e0dd9..f1119b36f341 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -39,6 +39,21 @@ min_ratio (read-write) percentage of the write-back cache to a particular device. For example, this is useful for providing a minimum QoS. +What: /sys/class/bdi//min_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_ratio_fine' parameter allows assigning a minimum reserve + of the write-back cache to a particular device. The value is + expressed as part of 1 million. For example, this is useful for + providing a minimum QoS. + + (read-write) + max_ratio (read-write) Allows limiting a particular device to use not more than the -- Gitee From e55ccb635016075e33c2d74de4fbb372528f8ce1 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Thu, 29 Aug 2024 10:31:35 +0800 Subject: [PATCH 22/23] mm: fix arithmetic for max_prop_frac when setting max_ratio mainline inclusion from mainline-v6.7 commit fa151a39a6879144b587f35c0dfcc15e1be9450f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fa151a39a6879144b587f35c0dfcc15e1be9450f -------------------------------- Since now bdi->max_ratio is part per million, fix the wrong arithmetic for max_prop_frac when setting max_ratio. Otherwise the miscalculated max_prop_frac will affect the incrementing of writeout completion count when max_ratio is not 100%. Link: https://lkml.kernel.org/r/20231219142508.86265-3-jefflexu@linux.alibaba.com Fixes: efc3e6ad53ea ("mm: split off __bdi_set_max_ratio() function") Signed-off-by: Jingbo Xu Cc: Joseph Qi Cc: Matthew Wilcox (Oracle) Cc: Stefan Roesch Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6adf00de9eba..0e72abc6cb60 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -762,7 +762,8 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra ret = -EINVAL; } else { bdi->max_ratio = max_ratio; - bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; + bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / + (100 * BDI_RATIO_SCALE); } spin_unlock_bh(&bdi_lock); -- Gitee From d2222e017b306f37852e1c35b4c325581828294a Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Thu, 29 Aug 2024 10:31:36 +0800 Subject: [PATCH 23/23] mm: fix arithmetic for bdi min_ratio mainline inclusion from mainline-v6.7 commit e0646b7590084a5bf3b056d3ad871d9379d2c25a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e0646b7590084a5bf3b056d3ad871d9379d2c25a -------------------------------- Since now bdi->min_ratio is part per million, fix the wrong arithmetic. Otherwise it will fail with -EINVAL when setting a reasonable min_ratio, as it tries to set min_ratio to (min_ratio * BDI_RATIO_SCALE) in percentage unit, which exceeds 100% anyway. # cat /sys/class/bdi/253\:0/min_ratio 0 # cat /sys/class/bdi/253\:0/max_ratio 100 # echo 1 > /sys/class/bdi/253\:0/min_ratio -bash: echo: write error: Invalid argument Link: https://lkml.kernel.org/r/20231219142508.86265-2-jefflexu@linux.alibaba.com Fixes: 8021fb3232f2 ("mm: split off __bdi_set_min_ratio() function") Signed-off-by: Jingbo Xu Reported-by: Joseph Qi Cc: Matthew Wilcox (Oracle) Cc: Stefan Roesch Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0e72abc6cb60..0062f1146a30 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -725,7 +725,6 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra if (min_ratio > 100 * BDI_RATIO_SCALE) return -EINVAL; - min_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { -- Gitee