From 54d85bc9752e6b387942ca32ee7d2f66ea3427d4 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:11 +0800 Subject: [PATCH 01/23] mm: add bdi_set_strict_limit() function mainline inclusion from mainline-v6.2-rc1 commit 8e9d5ead865a1a7af74a444d2f00f1ef4539bfba category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8e9d5ead865a1a7af74a444d2f00f1ef4539bfba -------------------------------- Patch series "mm/block: add bdi sysfs knobs", v4. At meta network block devices (nbd) are used to implement remote block storage. In testing and during production it has been observed that these network block devices can consume a huge portion of the dirty writeback cache and writeback can take a considerable time. To be able to give stricter limits, I'm proposing the following changes: 1) introduce strictlimit knob Currently the max_ratio knob exists to limit the dirty_memory. However this knob only applies once (dirty_ratio + dirty_background_ratio) / 2 has been reached. With the BDI_CAP_STRICTLIMIT flag, the max_ratio can be applied without reaching that limit. This change exposes that knob. This knob can also be useful for NFS, fuse filesystems and USB devices. 2) Use part of 1000000 internal calculation The max_ratio is based on percentage. With the current machine sizes percentage values can be very high (1% of a 256GB main memory is already 2.5GB). This change uses part of 1000000 instead of percentages for the internal calculations. 3) Introduce two new sysfs knobs: min_bytes and max_bytes. Currently all calculations are based on ratio, but for a user it often more convenient to specify a limit in bytes. The new knobs will not store bytes values, instead they will translate the byte value to a corresponding ratio. As the internal values are now part of 1000, the ratio is closer to the specified value. However the value should be more seen as an approximation as it can fluctuate over time. 3) Introduce two new sysfs knobs: min_ratio_fine and max_ratio_fine. The granularity for the existing sysfs bdi knobs min_ratio and max_ratio is based on percentage values. The new sysfs bdi knobs min_ratio_fine and max_ratio_fine allow to specify the ratio as part of 1 million. This patch (of 20): This adds the bdi_set_strict_limit function to be able to set/unset the BDI_CAP_STRICTLIMIT flag. Link: https://lkml.kernel.org/r/20221119005215.3052436-1-shr@devkernel.io Link: https://lkml.kernel.org/r/20221119005215.3052436-2-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Jens Axboe Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a689a21abe10..c67129defa3a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,6 +106,7 @@ static inline unsigned long wb_stat_error(void) int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* * Flags in backing_dev_info::capability diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 31bc5904bbf8..e3c90349b552 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -724,6 +724,21 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) +{ + if (strict_limit > 1) + return -EINVAL; + + spin_lock_bh(&bdi_lock); + if (strict_limit) + bdi->capabilities |= BDI_CAP_STRICTLIMIT; + else + bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; + spin_unlock_bh(&bdi_lock); + + return 0; +} + static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { -- Gitee From 281db412ed53af5570d6d457270e9da0d8f96a0f Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:12 +0800 Subject: [PATCH 02/23] mm: add knob /sys/class/bdi//strict_limit mainline inclusion from mainline-v6.2-rc1 commit 27bbe9d48d4e298864e18b39f091342c68b81637 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=27bbe9d48d4e298864e18b39f091342c68b81637 -------------------------------- Add a new knob to /sys/class/bdi//strict_limit. This new knob allows to set/unset the flag BDI_CAP_STRICTLIMIT in the bdi capabilities. Link: https://lkml.kernel.org/r/20221119005215.3052436-3-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5f4f16dbff9d..d2a7b3dbfcda 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -207,11 +207,40 @@ static ssize_t stable_pages_required_show(struct device *dev, } static DEVICE_ATTR_RO(stable_pages_required); +static ssize_t strict_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int strict_limit; + ssize_t ret; + + ret = kstrtouint(buf, 10, &strict_limit); + if (ret < 0) + return ret; + + ret = bdi_set_strict_limit(bdi, strict_limit); + if (!ret) + ret = count; + + return ret; +} + +static ssize_t strict_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + !!(bdi->capabilities & BDI_CAP_STRICTLIMIT)); +} +static DEVICE_ATTR_RW(strict_limit); + static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, &dev_attr_stable_pages_required.attr, + &dev_attr_strict_limit.attr, NULL, }; ATTRIBUTE_GROUPS(bdi_dev); -- Gitee From ad9946c61439f46667de83d9133969e27ada0f28 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:13 +0800 Subject: [PATCH 03/23] mm: document /sys/class/bdi//strict_limit knob mainline inclusion from mainline-v6.2-rc1 commit 16b837eb84e6948f92411eb32e97a05f89733ddc category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=16b837eb84e6948f92411eb32e97a05f89733ddc -------------------------------- This documents the new /sys/class/bdi//strict_limit knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-4-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflicts: Documentation/ABI/testing/sysfs-class-bdi [Adapt to same document style.] Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5402bd74ba43..7b153aae9b24 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -48,6 +48,14 @@ max_ratio (read-write) mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. +strict_limit (read-write) + + Forces per-BDI checks for the share of given device in the write-back + cache even before the global background dirty limit is reached. This + is useful in situations where the global limit is much higher than + affordable for given relatively slow (or untrusted) device. Turning + strictlimit on has no visible effect if max_ratio is equal to 100%. + stable_pages_required (read-only) If set, the backing device requires that all pages comprising a write -- Gitee From f117754df19d8350ae63f8a27de98d3e41c299b4 Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 5 Sep 2024 20:28:14 +0800 Subject: [PATCH 04/23] mm: rework calculation of bdi_min_ratio in bdi_set_min_ratio mainline inclusion from mainline-v5.19-rc1 commit 21f0dd88f23dc9dc46b781f8ec9acf975dca4e6e category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=21f0dd88f23dc9dc46b781f8ec9acf975dca4e6e -------------------------------- In function bdi_set_min_ratio, min_ratio is unsigned int, it will result underflow when setting min_ratio below bdi->min_ratio, it is confusing. Rework it, no functional change. Link: https://lkml.kernel.org/r/20220422095159.2858305-1-chenwandun@huawei.com Signed-off-by: Chen Wandun Cc: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e3c90349b552..3f160f0c248b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -685,18 +685,25 @@ static unsigned int bdi_min_ratio; int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { + unsigned int delta; int ret = 0; spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { - min_ratio -= bdi->min_ratio; - if (bdi_min_ratio + min_ratio < 100) { - bdi_min_ratio += min_ratio; - bdi->min_ratio += min_ratio; + if (min_ratio < bdi->min_ratio) { + delta = bdi->min_ratio - min_ratio; + bdi_min_ratio -= delta; + bdi->min_ratio = min_ratio; } else { - ret = -EINVAL; + delta = min_ratio - bdi->min_ratio; + if (bdi_min_ratio + delta < 100) { + bdi_min_ratio += delta; + bdi->min_ratio = min_ratio; + } else { + ret = -EINVAL; + } } } spin_unlock_bh(&bdi_lock); -- Gitee From 0503fb8a6d783f26552ba1ec0a10b48af747dca8 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:15 +0800 Subject: [PATCH 05/23] mm: use part per 1000000 for bdi ratios mainline inclusion from mainline-v6.2-rc1 commit ae82291e9ca47c3d6da6b77a00f427754aca413e category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ae82291e9ca47c3d6da6b77a00f427754aca413e -------------------------------- To get finer granularity for ratio calculations use part per million instead of percentiles. This is especially important if we want to automatically convert byte values to ratios. Otherwise the values that are actually used can be quite different. This is also important for machines with more main memory (1% of 256GB is already 2.5GB). Link: https://lkml.kernel.org/r/20221119005215.3052436-5-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 3 +++ mm/backing-dev.c | 6 +++--- mm/page-writeback.c | 15 +++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c67129defa3a..75a25b2145c3 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -104,6 +104,9 @@ static inline unsigned long wb_stat_error(void) #endif } +/* BDI ratio is expressed as part per 1000000 for finer granularity. */ +#define BDI_RATIO_SCALE 10000 + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d2a7b3dbfcda..b0fff65d9962 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -176,7 +176,7 @@ static ssize_t min_ratio_store(struct device *dev, return ret; } -BDI_SHOW(min_ratio, bdi->min_ratio) +BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -195,7 +195,7 @@ static ssize_t max_ratio_store(struct device *dev, return ret; } -BDI_SHOW(max_ratio, bdi->max_ratio) +BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, @@ -809,7 +809,7 @@ static int bdi_init(struct backing_dev_info *bdi) kref_init(&bdi->refcnt); bdi->min_ratio = 0; - bdi->max_ratio = 100; + bdi->max_ratio = 100 * BDI_RATIO_SCALE; bdi->max_prop_frac = FPROP_FRAC_BASE; INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 3f160f0c248b..5fb41fc9117c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -198,7 +198,7 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, min *= this_bw; min = div64_ul(min, tot_bw); } - if (max < 100) { + if (max < 100 * BDI_RATIO_SCALE) { max *= this_bw; max = div64_ul(max, tot_bw); } @@ -688,6 +688,8 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) unsigned int delta; int ret = 0; + min_ratio *= BDI_RATIO_SCALE; + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; @@ -698,7 +700,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) bdi->min_ratio = min_ratio; } else { delta = min_ratio - bdi->min_ratio; - if (bdi_min_ratio + delta < 100) { + if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) { bdi_min_ratio += delta; bdi->min_ratio = min_ratio; } else { @@ -717,6 +719,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) if (max_ratio > 100) return -EINVAL; + max_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { @@ -808,15 +811,15 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) fprop_fraction_percpu(&dom->completions, dtc->wb_completions, &numerator, &denominator); - wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; + wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE); wb_thresh *= numerator; wb_thresh = div64_ul(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); - wb_thresh += (thresh * wb_min_ratio) / 100; - if (wb_thresh > (thresh * wb_max_ratio) / 100) - wb_thresh = thresh * wb_max_ratio / 100; + wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE); + if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE)) + wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE); return wb_thresh; } -- Gitee From fe9fe7a2be77cad61949ee1f51037687f018fd12 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:16 +0800 Subject: [PATCH 06/23] mm: add bdi_get_max_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 00df7d51263b46ed93f7572e2d09579746f7b1eb category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=00df7d51263b46ed93f7572e2d09579746f7b1eb -------------------------------- This adds a function to return the specified value for max_bytes. It converts the stored max_ratio of the bdi to the corresponding bytes value. It introduces the bdi_get_bytes helper function to do the conversion. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The helper function will also be used by the min_bytes bdi knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-6-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 75a25b2145c3..91bc6e9adb14 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -107,6 +107,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5fb41fc9117c..cf9ec9ec02a7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -683,6 +683,18 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static u64 bdi_get_bytes(unsigned int ratio) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + u64 bytes; + + global_dirty_limits(&background_thresh, &dirty_thresh); + bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100; + + return bytes; +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; @@ -734,6 +746,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_max_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) -- Gitee From af1b58976b6833c1396ff7671b301d64bd0e6365 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:17 +0800 Subject: [PATCH 07/23] mm: split off __bdi_set_max_ratio() function mainline inclusion from mainline-v6.2-rc1 commit efc3e6ad53ea14225b434fddca261c9a1c56c707 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=efc3e6ad53ea14225b434fddca261c9a1c56c707 -------------------------------- This splits off __bdi_set_max_ratio() from bdi_set_max_ratio(). __bdi_set_max_ratio() will also be called from bdi_set_max_bytes(), which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-7-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cf9ec9ec02a7..cfdbf3dbf70e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -725,14 +725,10 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) return ret; } -int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { int ret = 0; - if (max_ratio > 100) - return -EINVAL; - max_ratio *= BDI_RATIO_SCALE; - spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -744,6 +740,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) return ret; } + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + if (max_ratio > 100) + return -EINVAL; + + return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); +} EXPORT_SYMBOL(bdi_set_max_ratio); u64 bdi_get_max_bytes(struct backing_dev_info *bdi) -- Gitee From ea43ff6f7dfd6089f78976443f277a996a261140 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:18 +0800 Subject: [PATCH 08/23] mm: add bdi_set_max_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 1bf27e98d26d1e62166a456ef17460be085cbe0b category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1bf27e98d26d1e62166a456ef17460be085cbe0b -------------------------------- This introduces the bdi_set_max_bytes() function. The max_bytes function does not store the max_bytes value. Instead it converts the max_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-8-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 91bc6e9adb14..62e15821ff1c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -110,6 +110,7 @@ static inline unsigned long wb_stat_error(void) u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cfdbf3dbf70e..8ef27016875d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -683,6 +684,28 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static int bdi_check_pages_limit(unsigned long pages) +{ + unsigned long max_dirty_pages = global_dirtyable_memory(); + + if (pages > max_dirty_pages) + return -EINVAL; + + return 0; +} + +static unsigned long bdi_ratio_from_pages(unsigned long pages) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + unsigned long ratio; + + global_dirty_limits(&background_thresh, &dirty_thresh); + ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh); + + return ratio; +} + static u64 bdi_get_bytes(unsigned int ratio) { unsigned long background_thresh; @@ -755,6 +778,20 @@ u64 bdi_get_max_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->max_ratio); } +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes) +{ + int ret; + unsigned long pages = max_bytes >> PAGE_SHIFT; + unsigned long max_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + max_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) -- Gitee From 5796e536bbd9187296ded6d4d1cb3152113c6e85 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:19 +0800 Subject: [PATCH 09/23] mm: add knob /sys/class/bdi//max_bytes mainline inclusion from mainline-v6.2-rc1 commit c56e049a5e401a177c7c9b39a3bcc973ff5cec0b category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c56e049a5e401a177c7c9b39a3bcc973ff5cec0b -------------------------------- This adds the new knob max_bytes to specify a dirty memory limit for the corresponding bdi. The specified bytes value is converted to a ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-9-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflicts: mm/backing-dev.c [Context differences.] Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b0fff65d9962..383bf0a8f90e 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -197,6 +197,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_max_bytes(bdi)); +} + +static ssize_t max_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_max_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(max_bytes); + static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, char *page) @@ -239,6 +267,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, NULL, -- Gitee From 61914d5908767f05724fd278bb325c747f95b931 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:20 +0800 Subject: [PATCH 10/23] mm: document /sys/class/bdi//max_bytes knob mainline inclusion from mainline-v6.2-rc1 commit c354d9268d7825eb8643f658c5091079d4f11a4a category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c354d9268d7825eb8643f658c5091079d4f11a4a -------------------------------- This documents the new /sys/class/bdi//max_bytes knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-10-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflicts: Documentation/ABI/testing/sysfs-class-bdi [Adapt to same document style.] Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 7b153aae9b24..26f673f21016 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -48,6 +48,15 @@ max_ratio (read-write) mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. +max_bytes (read-write) + + Allows limiting a particular device to use not more than the + given 'max_bytes' of the write-back cache. This is useful in + situations where we want to avoid one device taking all or + most of the write-back cache. For example in case of an NFS + mount that is prone to get stuck, a FUSE mount which cannot be + trusted to play fair, or a nbd device. + strict_limit (read-write) Forces per-BDI checks for the share of given device in the write-back -- Gitee From 68a0b1c694807a5afa9cac58cbe5113f8cd25185 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:21 +0800 Subject: [PATCH 11/23] mm: add bdi_get_min_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 712c00d66a342a3ed375df41c3df7d3d2abad2c0 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=712c00d66a342a3ed375df41c3df7d3d2abad2c0 -------------------------------- This adds a function to return the specified value for min_bytes. It converts the stored min_ratio of the bdi to the corresponding bytes value. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The returned value can be different than the value when the min_bytes value was set. Link: https://lkml.kernel.org/r/20221119005215.3052436-11-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 62e15821ff1c..547e35a7715a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -107,6 +107,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8ef27016875d..454686423abf 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -773,6 +773,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_min_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); -- Gitee From 633fbe32804a5d9b666fa9eb9a1f956a2d3643e0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:22 +0800 Subject: [PATCH 12/23] mm: split off __bdi_set_min_ratio() function mainline inclusion from mainline-v6.2-rc1 commit 8021fb3232f265b81c7e4e7aba15bc3a04ff1fd3 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=8021fb3232f265b81c7e4e7aba15bc3a04ff1fd3 -------------------------------- This splits off the __bdi_set_min_ratio() function from the bdi_set_min_ratio() function. The __bdi_set_min_ratio() function will also be called from the bdi_set_min_bytes() function, which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-12-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 454686423abf..5e22c679acb0 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -718,7 +718,7 @@ static u64 bdi_get_bytes(unsigned int ratio) return bytes; } -int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; int ret = 0; @@ -764,6 +764,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); +} + int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { if (max_ratio > 100) -- Gitee From a258234c4c2ad33b03472f46ac9ba15235912441 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:23 +0800 Subject: [PATCH 13/23] mm: add bdi_set_min_bytes() function mainline inclusion from mainline-v6.2-rc1 commit 803c98050569850be5fd51a2025c67622de887d9 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=803c98050569850be5fd51a2025c67622de887d9 -------------------------------- This introduces the bdi_set_min_bytes() function. The min_bytes function does not store the min_bytes value. Instead it converts the min_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-13-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 547e35a7715a..d8da5391f3d5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 5e22c679acb0..23e71f043b6b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -783,6 +783,20 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->min_ratio); } +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes) +{ + int ret; + unsigned long pages = min_bytes >> PAGE_SHIFT; + unsigned long min_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + min_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_min_ratio(bdi, min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); -- Gitee From 9d961dd8f6989bee222015da75553f9800b46714 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:24 +0800 Subject: [PATCH 14/23] mm: add /sys/class/bdi//min_bytes knob mainline inclusion from mainline-v6.2-rc1 commit 9c84819bd64ec15cb15d041c45ebe4725e9d4f3b category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9c84819bd64ec15cb15d041c45ebe4725e9d4f3b -------------------------------- bdi has two existing knobs to limit the amount of dirty memory: min_ratio and max_ratio. However the granularity of the knobs is limited and often it is more convenient to specify limits in terms of bytes. This change adds the min_bytes knob. It does not store the min_bytes value, instead it converts the max_bytes value to a ratio. The value is therefore more an approximation than an absolute value. It also maintains the sum over all the bdi min_ratio values stored in the variable bdi_min_ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-14-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 383bf0a8f90e..b3595a3b06b2 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -197,6 +197,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t min_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_min_bytes(bdi)); +} + +static ssize_t min_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_min_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(min_bytes); + static ssize_t max_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -267,6 +295,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, -- Gitee From 7180738e1bba82a9acf38e56e3868373e37296eb Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:25 +0800 Subject: [PATCH 15/23] mm: document /sys/class/bdi//min_bytes knob mainline inclusion from mainline-v6.2-rc1 commit 9c832a8d571784c998d0f9f5df480c62f7f3064c category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9c832a8d571784c998d0f9f5df480c62f7f3064c -------------------------------- This documents the new /sys/class/bdi//min_bytes knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-15-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflicts: Documentation/ABI/testing/sysfs-class-bdi [Adapt to same document style.] Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 26f673f21016..0cf0c0ffff60 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -48,6 +48,17 @@ max_ratio (read-write) mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. +min_bytes (read-write) + + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_bytes' parameter allows assigning a minimum + percentage of the write-back cache to a particular device + expressed in bytes. + For example, this is useful for providing a minimum QoS. + max_bytes (read-write) Allows limiting a particular device to use not more than the -- Gitee From c4c8ed23f50327bf454b570ec717a0d12e20d5a8 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:26 +0800 Subject: [PATCH 16/23] mm: add bdi_set_max_ratio_no_scale() function mainline inclusion from mainline-v6.2-rc1 commit 4e230b406eda9bdf7f8a71e2cc3df18a824abcb0 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4e230b406eda9bdf7f8a71e2cc3df18a824abcb0 -------------------------------- This introduces bdi_set_max_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob max_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-16-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d8da5391f3d5..e2a09835d736 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 23e71f043b6b..74224e66629c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -752,6 +752,9 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra { int ret = 0; + if (max_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; + spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -764,6 +767,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); @@ -771,9 +779,6 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { - if (max_ratio > 100) - return -EINVAL; - return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); } EXPORT_SYMBOL(bdi_set_max_ratio); -- Gitee From d72b73e0ef8449809b11397d88dfa82d864a80f7 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:27 +0800 Subject: [PATCH 17/23] mm: add /sys/class/bdi//max_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit bca52dcbadc583f4db6435599c44a79f97293f06 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bca52dcbadc583f4db6435599c44a79f97293f06 -------------------------------- This adds the max_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-17-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b3595a3b06b2..984df664b967 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -197,6 +197,25 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_max_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(max_ratio_fine, bdi->max_ratio) + static ssize_t min_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -295,6 +314,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, -- Gitee From db1ce4541682d56a9ba7041d8c0a8ed965706bc4 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:28 +0800 Subject: [PATCH 18/23] mm: document /sys/class/bdi//max_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit 54790f30fea74247e2f38b4a632ee3dc2fe42d86 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=54790f30fea74247e2f38b4a632ee3dc2fe42d86 -------------------------------- This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-18-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflicts: Documentation/ABI/testing/sysfs-class-bdi [Adapt to same document style.] Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 0cf0c0ffff60..3b779e95f243 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -48,6 +48,15 @@ max_ratio (read-write) mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. +max_ratio_fine (read-write) + + Allows limiting a particular device to use not more than the + given value of the write-back cache. The value is given as part + of 1 million. This is useful in situations where we want to avoid + one device taking all or most of the write-back cache. For example + in case of an NFS mount that is prone to get stuck, or a FUSE mount + which cannot be trusted to play fair. + min_bytes (read-write) Under normal circumstances each device is given a part of the -- Gitee From aa12173343d7de4473021ff618c33cd2d9473ed0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:29 +0800 Subject: [PATCH 19/23] mm: add bdi_set_min_ratio_no_scale() function mainline inclusion from mainline-v6.2-rc1 commit 2c44af4f2aaa260199f218f11920c406e688693c category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2c44af4f2aaa260199f218f11920c406e688693c -------------------------------- This introduces bdi_set_min_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob min_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-19-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e2a09835d736..b10df87d6c34 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 74224e66629c..14192abc4210 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -723,6 +723,8 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra unsigned int delta; int ret = 0; + if (min_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; min_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); @@ -767,6 +769,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio); +} + int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) { return __bdi_set_max_ratio(bdi, max_ratio); -- Gitee From 9450ff8d36377915582d0be2ae558828686797b0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:30 +0800 Subject: [PATCH 20/23] mm: add /sys/class/bdi//min_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit ad3e6dabf6f7d9ffd68eb711191ef16cdbdd25f0 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ad3e6dabf6f7d9ffd68eb711191ef16cdbdd25f0 -------------------------------- This adds the min_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-20-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 984df664b967..6e9162004a91 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -178,6 +178,25 @@ static ssize_t min_ratio_store(struct device *dev, } BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) +static ssize_t min_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_min_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(min_ratio_fine, bdi->min_ratio) + static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -313,6 +332,7 @@ static DEVICE_ATTR_RW(strict_limit); static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, + &dev_attr_min_ratio_fine.attr, &dev_attr_max_ratio.attr, &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, -- Gitee From d46a857a366c7bdb9526ae236c51f2274a7d4b8b Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Thu, 5 Sep 2024 20:28:31 +0800 Subject: [PATCH 21/23] mm: document /sys/class/bdi//min_ratio_fine knob mainline inclusion from mainline-v6.2-rc1 commit eba39236f18da7a50b6c51df5d902ee72c43e760 category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=eba39236f18da7a50b6c51df5d902ee72c43e760 -------------------------------- This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-21-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Conflicts: Documentation/ABI/testing/sysfs-class-bdi [Adapt to same document style.] Signed-off-by: Yifan Qiao --- Documentation/ABI/testing/sysfs-class-bdi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 3b779e95f243..ffaf85789687 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -39,6 +39,17 @@ min_ratio (read-write) percentage of the write-back cache to a particular device. For example, this is useful for providing a minimum QoS. +min_ratio_fine (read-write) + + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_ratio_fine' parameter allows assigning a minimum reserve + of the write-back cache to a particular device. The value is + expressed as part of 1 million. For example, this is useful for + providing a minimum QoS. + max_ratio (read-write) Allows limiting a particular device to use not more than the -- Gitee From 6df618214c54522c965813b92c69b8f9986983c5 Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Thu, 5 Sep 2024 20:28:32 +0800 Subject: [PATCH 22/23] mm: fix arithmetic for max_prop_frac when setting max_ratio mainline inclusion from mainline-v6.7 commit fa151a39a6879144b587f35c0dfcc15e1be9450f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fa151a39a6879144b587f35c0dfcc15e1be9450f -------------------------------- Since now bdi->max_ratio is part per million, fix the wrong arithmetic for max_prop_frac when setting max_ratio. Otherwise the miscalculated max_prop_frac will affect the incrementing of writeout completion count when max_ratio is not 100%. Link: https://lkml.kernel.org/r/20231219142508.86265-3-jefflexu@linux.alibaba.com Fixes: efc3e6ad53ea ("mm: split off __bdi_set_max_ratio() function") Signed-off-by: Jingbo Xu Cc: Joseph Qi Cc: Matthew Wilcox (Oracle) Cc: Stefan Roesch Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 14192abc4210..b1c73a9fc3c7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -762,7 +762,8 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra ret = -EINVAL; } else { bdi->max_ratio = max_ratio; - bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; + bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / + (100 * BDI_RATIO_SCALE); } spin_unlock_bh(&bdi_lock); -- Gitee From 17ef6b2056b1bd82bb8323c8f22028881d28442b Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Thu, 5 Sep 2024 20:28:33 +0800 Subject: [PATCH 23/23] mm: fix arithmetic for bdi min_ratio mainline inclusion from mainline-v6.7 commit e0646b7590084a5bf3b056d3ad871d9379d2c25a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAN96I Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e0646b7590084a5bf3b056d3ad871d9379d2c25a -------------------------------- Since now bdi->min_ratio is part per million, fix the wrong arithmetic. Otherwise it will fail with -EINVAL when setting a reasonable min_ratio, as it tries to set min_ratio to (min_ratio * BDI_RATIO_SCALE) in percentage unit, which exceeds 100% anyway. # cat /sys/class/bdi/253\:0/min_ratio 0 # cat /sys/class/bdi/253\:0/max_ratio 100 # echo 1 > /sys/class/bdi/253\:0/min_ratio -bash: echo: write error: Invalid argument Link: https://lkml.kernel.org/r/20231219142508.86265-2-jefflexu@linux.alibaba.com Fixes: 8021fb3232f2 ("mm: split off __bdi_set_min_ratio() function") Signed-off-by: Jingbo Xu Reported-by: Joseph Qi Cc: Matthew Wilcox (Oracle) Cc: Stefan Roesch Signed-off-by: Andrew Morton Signed-off-by: Yifan Qiao --- mm/page-writeback.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b1c73a9fc3c7..85031998bd58 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -725,7 +725,6 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra if (min_ratio > 100 * BDI_RATIO_SCALE) return -EINVAL; - min_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { -- Gitee