From 24270a22336b69ac6e276a60d0f69d74716efb33 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 14 Dec 2020 19:14:50 -0800 Subject: [PATCH 01/24] mm:backing-dev: use sysfs_emit in macro defining functions ANBZ: #7747 commit 5e4c0d86cf4a7a22abb9468e84f4480dd6b67032 upstream. The cocci script used in commit bdacbb8d04f ("mm: Use sysfs_emit for struct kobject * uses") does not convert the name##_show macro because the macro uses concatenation via ##. Convert it by hand. Link: https://lkml.kernel.org/r/45ec6cfc177d743f9c0ebaf35e43969dce43af42.1605376435.git.joe@perches.com Signed-off-by: Joe Perches Cc: Christoph Lameter Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mike Kravetz Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Jingbo Xu --- mm/backing-dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5c63c31af70f..d3f561804138 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -166,11 +166,11 @@ static ssize_t read_ahead_kb_store(struct device *dev, #define BDI_SHOW(name, expr) \ static ssize_t name##_show(struct device *dev, \ - struct device_attribute *attr, char *page) \ + struct device_attribute *attr, char *buf) \ { \ struct backing_dev_info *bdi = dev_get_drvdata(dev); \ \ - return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ + return sysfs_emit(buf, "%lld\n", (long long)expr); \ } \ static DEVICE_ATTR_RW(name); @@ -216,11 +216,11 @@ BDI_SHOW(max_ratio, bdi->max_ratio) static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, - char *page) + char *buf) { dev_warn_once(dev, "the stable_pages_required attribute has been removed. Use the stable_writes queue attribute instead.\n"); - return snprintf(page, PAGE_SIZE-1, "%d\n", 0); + return sysfs_emit(buf, "%d\n", 0); } static DEVICE_ATTR_RO(stable_pages_required); -- Gitee From 7226f0cfc6cbac67e3bb8218fa4a6f6acf7f5bdd Mon Sep 17 00:00:00 2001 From: Chen Wandun Date: Thu, 28 Apr 2022 23:15:57 -0700 Subject: [PATCH 02/24] mm: rework calculation of bdi_min_ratio in bdi_set_min_ratio ANBZ: #7747 commit 21f0dd88f23dc9dc46b781f8ec9acf975dca4e6e upstream. In function bdi_set_min_ratio, min_ratio is unsigned int, it will result underflow when setting min_ratio below bdi->min_ratio, it is confusing. Rework it, no functional change. Link: https://lkml.kernel.org/r/20220422095159.2858305-1-chenwandun@huawei.com Signed-off-by: Chen Wandun Cc: Peter Zijlstra Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/page-writeback.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index fdf35195a0a4..fbccf3b27f99 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -668,18 +668,25 @@ static unsigned int bdi_min_ratio; int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { + unsigned int delta; int ret = 0; spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { - min_ratio -= bdi->min_ratio; - if (bdi_min_ratio + min_ratio < 100) { - bdi_min_ratio += min_ratio; - bdi->min_ratio += min_ratio; + if (min_ratio < bdi->min_ratio) { + delta = bdi->min_ratio - min_ratio; + bdi_min_ratio -= delta; + bdi->min_ratio = min_ratio; } else { - ret = -EINVAL; + delta = min_ratio - bdi->min_ratio; + if (bdi_min_ratio + delta < 100) { + bdi_min_ratio += delta; + bdi->min_ratio = min_ratio; + } else { + ret = -EINVAL; + } } } spin_unlock_bh(&bdi_lock); -- Gitee From 2051b799769ca227882c935a470b451519bb2a5a Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:56 -0800 Subject: [PATCH 03/24] mm: add bdi_set_strict_limit() function ANBZ: #7747 commit 8e9d5ead865a1a7af74a444d2f00f1ef4539bfba upstream. Patch series "mm/block: add bdi sysfs knobs", v4. At meta network block devices (nbd) are used to implement remote block storage. In testing and during production it has been observed that these network block devices can consume a huge portion of the dirty writeback cache and writeback can take a considerable time. To be able to give stricter limits, I'm proposing the following changes: 1) introduce strictlimit knob Currently the max_ratio knob exists to limit the dirty_memory. However this knob only applies once (dirty_ratio + dirty_background_ratio) / 2 has been reached. With the BDI_CAP_STRICTLIMIT flag, the max_ratio can be applied without reaching that limit. This change exposes that knob. This knob can also be useful for NFS, fuse filesystems and USB devices. 2) Use part of 1000000 internal calculation The max_ratio is based on percentage. With the current machine sizes percentage values can be very high (1% of a 256GB main memory is already 2.5GB). This change uses part of 1000000 instead of percentages for the internal calculations. 3) Introduce two new sysfs knobs: min_bytes and max_bytes. Currently all calculations are based on ratio, but for a user it often more convenient to specify a limit in bytes. The new knobs will not store bytes values, instead they will translate the byte value to a corresponding ratio. As the internal values are now part of 1000, the ratio is closer to the specified value. However the value should be more seen as an approximation as it can fluctuate over time. 3) Introduce two new sysfs knobs: min_ratio_fine and max_ratio_fine. The granularity for the existing sysfs bdi knobs min_ratio and max_ratio is based on percentage values. The new sysfs bdi knobs min_ratio_fine and max_ratio_fine allow to specify the ratio as part of 1 million. This patch (of 20): This adds the bdi_set_strict_limit function to be able to set/unset the BDI_CAP_STRICTLIMIT flag. Link: https://lkml.kernel.org/r/20221119005215.3052436-1-shr@devkernel.io Link: https://lkml.kernel.org/r/20221119005215.3052436-2-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Jens Axboe Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fa7054409df0..324efea77d35 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -106,6 +106,7 @@ static inline unsigned long wb_stat_error(void) int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* * Flags in backing_dev_info::capability diff --git a/mm/page-writeback.c b/mm/page-writeback.c index fbccf3b27f99..2cfc45299c2f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -714,6 +714,21 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) +{ + if (strict_limit > 1) + return -EINVAL; + + spin_lock_bh(&bdi_lock); + if (strict_limit) + bdi->capabilities |= BDI_CAP_STRICTLIMIT; + else + bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; + spin_unlock_bh(&bdi_lock); + + return 0; +} + static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { -- Gitee From e780d0ed7524bb3678af938dda6023d88af3c14f Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:57 -0800 Subject: [PATCH 04/24] mm: add knob /sys/class/bdi//strict_limit ANBZ: #7747 commit 27bbe9d48d4e298864e18b39f091342c68b81637 upstream. Add a new knob to /sys/class/bdi//strict_limit. This new knob allows to set/unset the flag BDI_CAP_STRICTLIMIT in the bdi capabilities. Link: https://lkml.kernel.org/r/20221119005215.3052436-3-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index d3f561804138..b6cb2bbfa13b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -224,11 +224,40 @@ static ssize_t stable_pages_required_show(struct device *dev, } static DEVICE_ATTR_RO(stable_pages_required); +static ssize_t strict_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int strict_limit; + ssize_t ret; + + ret = kstrtouint(buf, 10, &strict_limit); + if (ret < 0) + return ret; + + ret = bdi_set_strict_limit(bdi, strict_limit); + if (!ret) + ret = count; + + return ret; +} + +static ssize_t strict_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + !!(bdi->capabilities & BDI_CAP_STRICTLIMIT)); +} +static DEVICE_ATTR_RW(strict_limit); + static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, &dev_attr_stable_pages_required.attr, + &dev_attr_strict_limit.attr, NULL, }; ATTRIBUTE_GROUPS(bdi_dev); -- Gitee From 3962b105aa5481f08bf05214a69a4f83d4ba2ba9 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:58 -0800 Subject: [PATCH 05/24] mm: document /sys/class/bdi//strict_limit knob ANBZ: #7747 commit 16b837eb84e6948f92411eb32e97a05f89733ddc upstream. This documents the new /sys/class/bdi//strict_limit knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-4-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- Documentation/ABI/testing/sysfs-class-bdi | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5402bd74ba43..9982fd0439ae 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -48,6 +48,17 @@ max_ratio (read-write) mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. + (read-write) +What: /sys/class/bdi//strict_limit +Date: October 2022 +Contact: Stefan Roesch +Description: + Forces per-BDI checks for the share of given device in the write-back + cache even before the global background dirty limit is reached. This + is useful in situations where the global limit is much higher than + affordable for given relatively slow (or untrusted) device. Turning + strictlimit on has no visible effect if max_ratio is equal to 100%. + stable_pages_required (read-only) If set, the backing device requires that all pages comprising a write -- Gitee From 31ff44f4a92276922b563a0bac9fb444b2d6e31e Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:51:59 -0800 Subject: [PATCH 06/24] mm: use part per 1000000 for bdi ratios ANBZ: #7747 commit ae82291e9ca47c3d6da6b77a00f427754aca413e upstream. To get finer granularity for ratio calculations use part per million instead of percentiles. This is especially important if we want to automatically convert byte values to ratios. Otherwise the values that are actually used can be quite different. This is also important for machines with more main memory (1% of 256GB is already 2.5GB). Link: https://lkml.kernel.org/r/20221119005215.3052436-5-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 3 +++ mm/backing-dev.c | 6 +++--- mm/page-writeback.c | 15 +++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 324efea77d35..6c220b403981 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -104,6 +104,9 @@ static inline unsigned long wb_stat_error(void) #endif } +/* BDI ratio is expressed as part per 1000000 for finer granularity. */ +#define BDI_RATIO_SCALE 10000 + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index b6cb2bbfa13b..9d08a0e4e3f9 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -193,7 +193,7 @@ static ssize_t min_ratio_store(struct device *dev, return ret; } -BDI_SHOW(min_ratio, bdi->min_ratio) +BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -212,7 +212,7 @@ static ssize_t max_ratio_store(struct device *dev, return ret; } -BDI_SHOW(max_ratio, bdi->max_ratio) +BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, @@ -1034,7 +1034,7 @@ static int bdi_init(struct backing_dev_info *bdi) kref_init(&bdi->refcnt); bdi->min_ratio = 0; - bdi->max_ratio = 100; + bdi->max_ratio = 100 * BDI_RATIO_SCALE; bdi->max_prop_frac = FPROP_FRAC_BASE; INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2cfc45299c2f..8440bb82ab1f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -203,7 +203,7 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, min *= this_bw; min = div64_ul(min, tot_bw); } - if (max < 100) { + if (max < 100 * BDI_RATIO_SCALE) { max *= this_bw; max = div64_ul(max, tot_bw); } @@ -671,6 +671,8 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) unsigned int delta; int ret = 0; + min_ratio *= BDI_RATIO_SCALE; + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; @@ -681,7 +683,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) bdi->min_ratio = min_ratio; } else { delta = min_ratio - bdi->min_ratio; - if (bdi_min_ratio + delta < 100) { + if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) { bdi_min_ratio += delta; bdi->min_ratio = min_ratio; } else { @@ -700,6 +702,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) if (max_ratio > 100) return -EINVAL; + max_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { @@ -791,15 +794,15 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) fprop_fraction_percpu(&dom->completions, dtc->wb_completions, &numerator, &denominator); - wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; + wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE); wb_thresh *= numerator; wb_thresh = div64_ul(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); - wb_thresh += (thresh * wb_min_ratio) / 100; - if (wb_thresh > (thresh * wb_max_ratio) / 100) - wb_thresh = thresh * wb_max_ratio / 100; + wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE); + if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE)) + wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE); return wb_thresh; } -- Gitee From 3e15f7de8b1a000599b758f14b686b72d6442fdd Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:00 -0800 Subject: [PATCH 07/24] mm: add bdi_get_max_bytes() function ANBZ: #7747 commit 00df7d51263b46ed93f7572e2d09579746f7b1eb upstream. This adds a function to return the specified value for max_bytes. It converts the stored max_ratio of the bdi to the corresponding bytes value. It introduces the bdi_get_bytes helper function to do the conversion. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The helper function will also be used by the min_bytes bdi knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-6-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 6c220b403981..7b29c204cf16 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -107,6 +107,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8440bb82ab1f..c8fb34c9e584 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -666,6 +666,18 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static u64 bdi_get_bytes(unsigned int ratio) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + u64 bytes; + + global_dirty_limits(&background_thresh, &dirty_thresh); + bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100; + + return bytes; +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; @@ -717,6 +729,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_max_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) -- Gitee From 2ccbce3d69b019ca1e8fd37dfa88788b2d7216d0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:01 -0800 Subject: [PATCH 08/24] mm: split off __bdi_set_max_ratio() function ANBZ: #7747 commit efc3e6ad53ea14225b434fddca261c9a1c56c707 upstream. This splits off __bdi_set_max_ratio() from bdi_set_max_ratio(). __bdi_set_max_ratio() will also be called from bdi_set_max_bytes(), which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-7-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/page-writeback.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c8fb34c9e584..61e8302f5d6e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -708,14 +708,10 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) return ret; } -int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { int ret = 0; - if (max_ratio > 100) - return -EINVAL; - max_ratio *= BDI_RATIO_SCALE; - spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -727,6 +723,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) return ret; } + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + if (max_ratio > 100) + return -EINVAL; + + return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); +} EXPORT_SYMBOL(bdi_set_max_ratio); u64 bdi_get_max_bytes(struct backing_dev_info *bdi) -- Gitee From 3584b82ac6de8898fa94a468ef0eaf388515879d Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:02 -0800 Subject: [PATCH 09/24] mm: add bdi_set_max_bytes() function ANBZ: #7747 commit 1bf27e98d26d1e62166a456ef17460be085cbe0b upstream. This introduces the bdi_set_max_bytes() function. The max_bytes function does not store the max_bytes value. Instead it converts the max_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-8-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 7b29c204cf16..a0a910bcbb5e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -110,6 +110,7 @@ static inline unsigned long wb_stat_error(void) u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 61e8302f5d6e..03f779aeef1d 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -666,6 +667,28 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; +static int bdi_check_pages_limit(unsigned long pages) +{ + unsigned long max_dirty_pages = global_dirtyable_memory(); + + if (pages > max_dirty_pages) + return -EINVAL; + + return 0; +} + +static unsigned long bdi_ratio_from_pages(unsigned long pages) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + unsigned long ratio; + + global_dirty_limits(&background_thresh, &dirty_thresh); + ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh); + + return ratio; +} + static u64 bdi_get_bytes(unsigned int ratio) { unsigned long background_thresh; @@ -738,6 +761,20 @@ u64 bdi_get_max_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->max_ratio); } +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes) +{ + int ret; + unsigned long pages = max_bytes >> PAGE_SHIFT; + unsigned long max_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + max_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) { if (strict_limit > 1) -- Gitee From 9fab6c5fa8e52792a1c1de64ec661fdb1716cb99 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:03 -0800 Subject: [PATCH 10/24] mm: add knob /sys/class/bdi//max_bytes ANBZ: #7747 commit c56e049a5e401a177c7c9b39a3bcc973ff5cec0b upstream. This adds the new knob max_bytes to specify a dirty memory limit for the corresponding bdi. The specified bytes value is converted to a ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-9-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 9d08a0e4e3f9..16bfcd1ec589 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -214,6 +214,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_max_bytes(bdi)); +} + +static ssize_t max_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_max_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(max_bytes); + static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -256,6 +284,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, NULL, -- Gitee From 80ef4672740f09e608f224ef2b308843efcf4090 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:04 -0800 Subject: [PATCH 11/24] mm: document /sys/class/bdi//max_bytes knob ANBZ: #7747 commit c354d9268d7825eb8643f658c5091079d4f11a4a upstream. This documents the new /sys/class/bdi//max_bytes knob. Link: https://lkml.kernel.org/r/20221119005215.3052436-10-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- Documentation/ABI/testing/sysfs-class-bdi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 9982fd0439ae..726c2e2c15cf 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -49,6 +49,20 @@ max_ratio (read-write) be trusted to play fair. (read-write) + +What: /sys/class/bdi//max_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given 'max_bytes' of the write-back cache. This is useful in + situations where we want to avoid one device taking all or + most of the write-back cache. For example in case of an NFS + mount that is prone to get stuck, a FUSE mount which cannot be + trusted to play fair, or a nbd device. + + (read-write) + What: /sys/class/bdi//strict_limit Date: October 2022 Contact: Stefan Roesch -- Gitee From f5bd7e984d83d29f8108f29e964eb11edea6bce6 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:05 -0800 Subject: [PATCH 12/24] mm: add bdi_get_min_bytes() function ANBZ: #7747 commit 712c00d66a342a3ed375df41c3df7d3d2abad2c0 upstream. This adds a function to return the specified value for min_bytes. It converts the stored min_ratio of the bdi to the corresponding bytes value. This is an approximation as it is based on the value that is returned by global_dirty_limits(), which can change. The returned value can be different than the value when the min_bytes value was set. Link: https://lkml.kernel.org/r/20221119005215.3052436-11-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a0a910bcbb5e..a2ae403c66b5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -107,6 +107,7 @@ static inline unsigned long wb_stat_error(void) /* BDI ratio is expressed as part per 1000000 for finer granularity. */ #define BDI_RATIO_SCALE 10000 +u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 03f779aeef1d..34e01ba85a31 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -756,6 +756,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) } EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_min_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); -- Gitee From ed2521c6687dac6b09567135702da003798c6fc8 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:06 -0800 Subject: [PATCH 13/24] mm: split off __bdi_set_min_ratio() function ANBZ: #7747 commit 8021fb3232f265b81c7e4e7aba15bc3a04ff1fd3 upstream. This splits off the __bdi_set_min_ratio() function from the bdi_set_min_ratio() function. The __bdi_set_min_ratio() function will also be called from the bdi_set_min_bytes() function, which will be introduced in the next patch. Link: https://lkml.kernel.org/r/20221119005215.3052436-12-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/page-writeback.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 34e01ba85a31..ca57aa1598f6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -701,7 +701,7 @@ static u64 bdi_get_bytes(unsigned int ratio) return bytes; } -int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { unsigned int delta; int ret = 0; @@ -747,6 +747,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); +} + int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { if (max_ratio > 100) -- Gitee From ec78ef8dd91b2dcdee98f0050e6918e80ad34adc Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:07 -0800 Subject: [PATCH 14/24] mm: add bdi_set_min_bytes() function ANBZ: #7747 commit 803c98050569850be5fd51a2025c67622de887d9 upstream. This introduces the bdi_set_min_bytes() function. The min_bytes function does not store the min_bytes value. Instead it converts the min_bytes value into the corresponding ratio value. Link: https://lkml.kernel.org/r/20221119005215.3052436-13-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index a2ae403c66b5..277b097575d5 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ca57aa1598f6..e7284ce79b22 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -766,6 +766,20 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi) return bdi_get_bytes(bdi->min_ratio); } +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes) +{ + int ret; + unsigned long pages = min_bytes >> PAGE_SHIFT; + unsigned long min_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + min_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_min_ratio(bdi, min_ratio); +} + u64 bdi_get_max_bytes(struct backing_dev_info *bdi) { return bdi_get_bytes(bdi->max_ratio); -- Gitee From 8c7c48225ae4a4f3b2f041811a03453fc62e7a8a Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:08 -0800 Subject: [PATCH 15/24] mm: add /sys/class/bdi//min_bytes knob ANBZ: #7747 commit 9c84819bd64ec15cb15d041c45ebe4725e9d4f3b upstream. bdi has two existing knobs to limit the amount of dirty memory: min_ratio and max_ratio. However the granularity of the knobs is limited and often it is more convenient to specify limits in terms of bytes. This change adds the min_bytes knob. It does not store the min_bytes value, instead it converts the max_bytes value to a ratio. The value is therefore more an approximation than an absolute value. It also maintains the sum over all the bdi min_ratio values stored in the variable bdi_min_ratio. Link: https://lkml.kernel.org/r/20221119005215.3052436-14-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/backing-dev.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 16bfcd1ec589..621c65b665a4 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -214,6 +214,34 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t min_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_min_bytes(bdi)); +} + +static ssize_t min_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_min_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(min_bytes); + static ssize_t max_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -284,6 +312,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, &dev_attr_strict_limit.attr, -- Gitee From eb1ca96c0911fe08b44e87e658994249dc7bd000 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:09 -0800 Subject: [PATCH 16/24] mm: document /sys/class/bdi//min_bytes knob ANBZ: #7747 commit 9c832a8d571784c998d0f9f5df480c62f7f3064c upstream. This documents the new /sys/class/bdi//min_bytes knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-15-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 726c2e2c15cf..5b1341ffbd6d 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -50,6 +50,21 @@ max_ratio (read-write) (read-write) +What: /sys/class/bdi//min_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_bytes' parameter allows assigning a minimum + percentage of the write-back cache to a particular device + expressed in bytes. + For example, this is useful for providing a minimum QoS. + + (read-write) + What: /sys/class/bdi//max_bytes Date: October 2022 Contact: Stefan Roesch -- Gitee From fc00b2d5f3559eae1494cca2bececdcf01502ed3 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:10 -0800 Subject: [PATCH 17/24] mm: add bdi_set_max_ratio_no_scale() function ANBZ: #7747 commit 4e230b406eda9bdf7f8a71e2cc3df18a824abcb0 upstream. This introduces bdi_set_max_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob max_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-16-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 277b097575d5..4299f45a4265 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index e7284ce79b22..7cec25f032f3 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -735,6 +735,9 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra { int ret = 0; + if (max_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; + spin_lock_bh(&bdi_lock); if (bdi->min_ratio > max_ratio) { ret = -EINVAL; @@ -747,6 +750,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + return __bdi_set_max_ratio(bdi, max_ratio); +} + int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); @@ -754,9 +762,6 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { - if (max_ratio > 100) - return -EINVAL; - return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); } EXPORT_SYMBOL(bdi_set_max_ratio); -- Gitee From 0f4c4a31cd3c72a9da1773c6e43b5c7d5bd31699 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:11 -0800 Subject: [PATCH 18/24] mm: add /sys/class/bdi//max_ratio_fine knob ANBZ: #7747 commit bca52dcbadc583f4db6435599c44a79f97293f06 upstream. This adds the max_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-17-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 621c65b665a4..ac5901934845 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -214,6 +214,25 @@ static ssize_t max_ratio_store(struct device *dev, } BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) +static ssize_t max_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_max_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(max_ratio_fine, bdi->max_ratio) + static ssize_t min_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -312,6 +331,7 @@ static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, -- Gitee From 3e464e7ee155d88d1913072a6c9024b5d60d22da Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:12 -0800 Subject: [PATCH 19/24] mm: document /sys/class/bdi//max_ratio_fine knob ANBZ: #7747 commit 54790f30fea74247e2f38b4a632ee3dc2fe42d86 upstream. This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-18-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- Documentation/ABI/testing/sysfs-class-bdi | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5b1341ffbd6d..6e23417e0dd9 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -50,6 +50,19 @@ max_ratio (read-write) (read-write) +What: /sys/class/bdi//max_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given value of the write-back cache. The value is given as part + of 1 million. This is useful in situations where we want to avoid + one device taking all or most of the write-back cache. For example + in case of an NFS mount that is prone to get stuck, or a FUSE mount + which cannot be trusted to play fair. + + (read-write) + What: /sys/class/bdi//min_bytes Date: October 2022 Contact: Stefan Roesch -- Gitee From 18fe93d9c72d152dd1d5d804eb502ade6912da8a Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:13 -0800 Subject: [PATCH 20/24] mm: add bdi_set_min_ratio_no_scale() function ANBZ: #7747 commit 2c44af4f2aaa260199f218f11920c406e688693c upstream. This introduces bdi_set_min_ratio_no_scale(). It uses the max granularity for the ratio. This function by the new sysfs knob min_ratio_fine. Link: https://lkml.kernel.org/r/20221119005215.3052436-19-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- include/linux/backing-dev.h | 1 + mm/page-writeback.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 4299f45a4265..7ddb289ae659 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -111,6 +111,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi); u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7cec25f032f3..4e6e293c3de7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -706,6 +706,8 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra unsigned int delta; int ret = 0; + if (min_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; min_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); @@ -750,6 +752,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra return ret; } +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio); +} + int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) { return __bdi_set_max_ratio(bdi, max_ratio); -- Gitee From ab7dcfae07b7a65f9ef2b3ede2a5ac221585e820 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:14 -0800 Subject: [PATCH 21/24] mm: add /sys/class/bdi//min_ratio_fine knob ANBZ: #7747 commit ad3e6dabf6f7d9ffd68eb711191ef16cdbdd25f0 upstream. This adds the min_ratio_fine knob. The knob specifies the values not based on 1 of 100, but instead 1 per million. Link: https://lkml.kernel.org/r/20221119005215.3052436-20-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/backing-dev.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mm/backing-dev.c b/mm/backing-dev.c index ac5901934845..d136be108dde 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -195,6 +195,25 @@ static ssize_t min_ratio_store(struct device *dev, } BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) +static ssize_t min_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_min_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(min_ratio_fine, bdi->min_ratio) + static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -330,6 +349,7 @@ static DEVICE_ATTR_RW(strict_limit); static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, + &dev_attr_min_ratio_fine.attr, &dev_attr_max_ratio.attr, &dev_attr_max_ratio_fine.attr, &dev_attr_min_bytes.attr, -- Gitee From 50fb13fdbb82a05cf1c43365251bf6212b6f1de2 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 18 Nov 2022 16:52:15 -0800 Subject: [PATCH 22/24] mm: document /sys/class/bdi//min_ratio_fine knob ANBZ: #7747 commit eba39236f18da7a50b6c51df5d902ee72c43e760 upstream. This documents the new /sys/class/bdi//max_ratio_fine knob. [akpm@linux-foundation.org: fix htmldocs warnings] Link: https://lkml.kernel.org/r/20221119005215.3052436-21-shr@devkernel.io Signed-off-by: Stefan Roesch Cc: Chris Mason Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 6e23417e0dd9..70b1f151bd20 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -39,6 +39,21 @@ min_ratio (read-write) percentage of the write-back cache to a particular device. For example, this is useful for providing a minimum QoS. + (read-write) + +What: /sys/class/bdi//min_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_ratio_fine' parameter allows assigning a minimum reserve + of the write-back cache to a particular device. The value is + expressed as part of 1 million. For example, this is useful for + providing a minimum QoS. + max_ratio (read-write) Allows limiting a particular device to use not more than the -- Gitee From 491498676b607fb637db805025c24cf894e8c1aa Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 19 Dec 2023 22:25:07 +0800 Subject: [PATCH 23/24] mm: fix arithmetic for bdi min_ratio ANBZ: #7747 commit e0646b7590084a5bf3b056d3ad871d9379d2c25a upstream. Since now bdi->min_ratio is part per million, fix the wrong arithmetic. Otherwise it will fail with -EINVAL when setting a reasonable min_ratio, as it tries to set min_ratio to (min_ratio * BDI_RATIO_SCALE) in percentage unit, which exceeds 100% anyway. # cat /sys/class/bdi/253\:0/min_ratio 0 # cat /sys/class/bdi/253\:0/max_ratio 100 # echo 1 > /sys/class/bdi/253\:0/min_ratio -bash: echo: write error: Invalid argument Link: https://lkml.kernel.org/r/20231219142508.86265-2-jefflexu@linux.alibaba.com Fixes: 8021fb3232f2 ("mm: split off __bdi_set_min_ratio() function") Signed-off-by: Jingbo Xu Reported-by: Joseph Qi Cc: Matthew Wilcox (Oracle) Cc: Stefan Roesch Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/page-writeback.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 4e6e293c3de7..8757c56e76c2 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -708,7 +708,6 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra if (min_ratio > 100 * BDI_RATIO_SCALE) return -EINVAL; - min_ratio *= BDI_RATIO_SCALE; spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { -- Gitee From 6eafc465043041c703a1c6c7293f14680647e85d Mon Sep 17 00:00:00 2001 From: Jingbo Xu Date: Tue, 19 Dec 2023 22:25:08 +0800 Subject: [PATCH 24/24] mm: fix arithmetic for max_prop_frac when setting max_ratio ANBZ: #7747 commit fa151a39a6879144b587f35c0dfcc15e1be9450f upstream. Since now bdi->max_ratio is part per million, fix the wrong arithmetic for max_prop_frac when setting max_ratio. Otherwise the miscalculated max_prop_frac will affect the incrementing of writeout completion count when max_ratio is not 100%. Link: https://lkml.kernel.org/r/20231219142508.86265-3-jefflexu@linux.alibaba.com Fixes: efc3e6ad53ea ("mm: split off __bdi_set_max_ratio() function") Signed-off-by: Jingbo Xu Cc: Joseph Qi Cc: Matthew Wilcox (Oracle) Cc: Stefan Roesch Signed-off-by: Andrew Morton Signed-off-by: Jingbo Xu --- mm/page-writeback.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 8757c56e76c2..d6778f02d013 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -744,7 +744,8 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra ret = -EINVAL; } else { bdi->max_ratio = max_ratio; - bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; + bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / + (100 * BDI_RATIO_SCALE); } spin_unlock_bh(&bdi_lock); -- Gitee