diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi index 5402bd74ba43b4303f95d026bcde1df86bbd737a..70b1f151bd207d406f938dcfccac56eef6ddab81 100644 --- a/Documentation/ABI/testing/sysfs-class-bdi +++ b/Documentation/ABI/testing/sysfs-class-bdi @@ -39,6 +39,21 @@ min_ratio (read-write) percentage of the write-back cache to a particular device. For example, this is useful for providing a minimum QoS. + (read-write) + +What: /sys/class/bdi//min_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_ratio_fine' parameter allows assigning a minimum reserve + of the write-back cache to a particular device. The value is + expressed as part of 1 million. For example, this is useful for + providing a minimum QoS. + max_ratio (read-write) Allows limiting a particular device to use not more than the @@ -48,6 +63,59 @@ max_ratio (read-write) mount that is prone to get stuck, or a FUSE mount which cannot be trusted to play fair. + (read-write) + +What: /sys/class/bdi//max_ratio_fine +Date: November 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given value of the write-back cache. The value is given as part + of 1 million. This is useful in situations where we want to avoid + one device taking all or most of the write-back cache. For example + in case of an NFS mount that is prone to get stuck, or a FUSE mount + which cannot be trusted to play fair. + + (read-write) + +What: /sys/class/bdi//min_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Under normal circumstances each device is given a part of the + total write-back cache that relates to its current average + writeout speed in relation to the other devices. + + The 'min_bytes' parameter allows assigning a minimum + percentage of the write-back cache to a particular device + expressed in bytes. + For example, this is useful for providing a minimum QoS. + + (read-write) + +What: /sys/class/bdi//max_bytes +Date: October 2022 +Contact: Stefan Roesch +Description: + Allows limiting a particular device to use not more than the + given 'max_bytes' of the write-back cache. This is useful in + situations where we want to avoid one device taking all or + most of the write-back cache. For example in case of an NFS + mount that is prone to get stuck, a FUSE mount which cannot be + trusted to play fair, or a nbd device. + + (read-write) + +What: /sys/class/bdi//strict_limit +Date: October 2022 +Contact: Stefan Roesch +Description: + Forces per-BDI checks for the share of given device in the write-back + cache even before the global background dirty limit is reached. This + is useful in situations where the global limit is much higher than + affordable for given relatively slow (or untrusted) device. Turning + strictlimit on has no visible effect if max_ratio is equal to 100%. + stable_pages_required (read-only) If set, the backing device requires that all pages comprising a write diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index fa7054409df0f9d62ee53f29de9fe7ace378e687..7ddb289ae65940ee8ee473810e21b0fbe910aafd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -104,8 +104,18 @@ static inline unsigned long wb_stat_error(void) #endif } +/* BDI ratio is expressed as part per 1000000 for finer granularity. */ +#define BDI_RATIO_SCALE 10000 + +u64 bdi_get_min_bytes(struct backing_dev_info *bdi); +u64 bdi_get_max_bytes(struct backing_dev_info *bdi); int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio); +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio); +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes); +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes); +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit); /* * Flags in backing_dev_info::capability diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 5c63c31af70f9f3c5ba9c3746e70de354b246dbd..d136be108dde7883d2ffd39a22707e2ad3b4651f 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -166,11 +166,11 @@ static ssize_t read_ahead_kb_store(struct device *dev, #define BDI_SHOW(name, expr) \ static ssize_t name##_show(struct device *dev, \ - struct device_attribute *attr, char *page) \ + struct device_attribute *attr, char *buf) \ { \ struct backing_dev_info *bdi = dev_get_drvdata(dev); \ \ - return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ + return sysfs_emit(buf, "%lld\n", (long long)expr); \ } \ static DEVICE_ATTR_RW(name); @@ -193,7 +193,26 @@ static ssize_t min_ratio_store(struct device *dev, return ret; } -BDI_SHOW(min_ratio, bdi->min_ratio) +BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE) + +static ssize_t min_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_min_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(min_ratio_fine, bdi->min_ratio) static ssize_t max_ratio_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -212,23 +231,131 @@ static ssize_t max_ratio_store(struct device *dev, return ret; } -BDI_SHOW(max_ratio, bdi->max_ratio) +BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE) + +static ssize_t max_ratio_fine_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int ratio; + ssize_t ret; + + ret = kstrtouint(buf, 10, &ratio); + if (ret < 0) + return ret; + + ret = bdi_set_max_ratio_no_scale(bdi, ratio); + if (!ret) + ret = count; + + return ret; +} +BDI_SHOW(max_ratio_fine, bdi->max_ratio) + +static ssize_t min_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_min_bytes(bdi)); +} + +static ssize_t min_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_min_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(min_bytes); + +static ssize_t max_bytes_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%llu\n", bdi_get_max_bytes(bdi)); +} + +static ssize_t max_bytes_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + u64 bytes; + ssize_t ret; + + ret = kstrtoull(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = bdi_set_max_bytes(bdi, bytes); + if (!ret) + ret = count; + + return ret; +} +DEVICE_ATTR_RW(max_bytes); static ssize_t stable_pages_required_show(struct device *dev, struct device_attribute *attr, - char *page) + char *buf) { dev_warn_once(dev, "the stable_pages_required attribute has been removed. Use the stable_writes queue attribute instead.\n"); - return snprintf(page, PAGE_SIZE-1, "%d\n", 0); + return sysfs_emit(buf, "%d\n", 0); } static DEVICE_ATTR_RO(stable_pages_required); +static ssize_t strict_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + unsigned int strict_limit; + ssize_t ret; + + ret = kstrtouint(buf, 10, &strict_limit); + if (ret < 0) + return ret; + + ret = bdi_set_strict_limit(bdi, strict_limit); + if (!ret) + ret = count; + + return ret; +} + +static ssize_t strict_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + !!(bdi->capabilities & BDI_CAP_STRICTLIMIT)); +} +static DEVICE_ATTR_RW(strict_limit); + static struct attribute *bdi_dev_attrs[] = { &dev_attr_read_ahead_kb.attr, &dev_attr_min_ratio.attr, + &dev_attr_min_ratio_fine.attr, &dev_attr_max_ratio.attr, + &dev_attr_max_ratio_fine.attr, + &dev_attr_min_bytes.attr, + &dev_attr_max_bytes.attr, &dev_attr_stable_pages_required.attr, + &dev_attr_strict_limit.attr, NULL, }; ATTRIBUTE_GROUPS(bdi_dev); @@ -1005,7 +1132,7 @@ static int bdi_init(struct backing_dev_info *bdi) kref_init(&bdi->refcnt); bdi->min_ratio = 0; - bdi->max_ratio = 100; + bdi->max_ratio = 100 * BDI_RATIO_SCALE; bdi->max_prop_frac = FPROP_FRAC_BASE; INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index fdf35195a0a409867891a01902dfe053c130dcae..d6778f02d01394e572e53b034aeddc9d08dec5b4 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -203,7 +204,7 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, min *= this_bw; min = div64_ul(min, tot_bw); } - if (max < 100) { + if (max < 100 * BDI_RATIO_SCALE) { max *= this_bw; max = div64_ul(max, tot_bw); } @@ -666,20 +667,64 @@ void wb_domain_exit(struct wb_domain *dom) */ static unsigned int bdi_min_ratio; -int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +static int bdi_check_pages_limit(unsigned long pages) +{ + unsigned long max_dirty_pages = global_dirtyable_memory(); + + if (pages > max_dirty_pages) + return -EINVAL; + + return 0; +} + +static unsigned long bdi_ratio_from_pages(unsigned long pages) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + unsigned long ratio; + + global_dirty_limits(&background_thresh, &dirty_thresh); + ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh); + + return ratio; +} + +static u64 bdi_get_bytes(unsigned int ratio) +{ + unsigned long background_thresh; + unsigned long dirty_thresh; + u64 bytes; + + global_dirty_limits(&background_thresh, &dirty_thresh); + bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100; + + return bytes; +} + +static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) { + unsigned int delta; int ret = 0; + if (min_ratio > 100 * BDI_RATIO_SCALE) + return -EINVAL; + spin_lock_bh(&bdi_lock); if (min_ratio > bdi->max_ratio) { ret = -EINVAL; } else { - min_ratio -= bdi->min_ratio; - if (bdi_min_ratio + min_ratio < 100) { - bdi_min_ratio += min_ratio; - bdi->min_ratio += min_ratio; + if (min_ratio < bdi->min_ratio) { + delta = bdi->min_ratio - min_ratio; + bdi_min_ratio -= delta; + bdi->min_ratio = min_ratio; } else { - ret = -EINVAL; + delta = min_ratio - bdi->min_ratio; + if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) { + bdi_min_ratio += delta; + bdi->min_ratio = min_ratio; + } else { + ret = -EINVAL; + } } } spin_unlock_bh(&bdi_lock); @@ -687,11 +732,11 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) return ret; } -int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) +static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) { int ret = 0; - if (max_ratio > 100) + if (max_ratio > 100 * BDI_RATIO_SCALE) return -EINVAL; spin_lock_bh(&bdi_lock); @@ -699,14 +744,88 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio) ret = -EINVAL; } else { bdi->max_ratio = max_ratio; - bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / 100; + bdi->max_prop_frac = (FPROP_FRAC_BASE * max_ratio) / + (100 * BDI_RATIO_SCALE); } spin_unlock_bh(&bdi_lock); return ret; } + +int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio); +} + +int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + return __bdi_set_max_ratio(bdi, max_ratio); +} + +int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio) +{ + return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE); +} + +int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio) +{ + return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE); +} EXPORT_SYMBOL(bdi_set_max_ratio); +u64 bdi_get_min_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->min_ratio); +} + +int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes) +{ + int ret; + unsigned long pages = min_bytes >> PAGE_SHIFT; + unsigned long min_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + min_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_min_ratio(bdi, min_ratio); +} + +u64 bdi_get_max_bytes(struct backing_dev_info *bdi) +{ + return bdi_get_bytes(bdi->max_ratio); +} + +int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes) +{ + int ret; + unsigned long pages = max_bytes >> PAGE_SHIFT; + unsigned long max_ratio; + + ret = bdi_check_pages_limit(pages); + if (ret) + return ret; + + max_ratio = bdi_ratio_from_pages(pages); + return __bdi_set_max_ratio(bdi, max_ratio); +} + +int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit) +{ + if (strict_limit > 1) + return -EINVAL; + + spin_lock_bh(&bdi_lock); + if (strict_limit) + bdi->capabilities |= BDI_CAP_STRICTLIMIT; + else + bdi->capabilities &= ~BDI_CAP_STRICTLIMIT; + spin_unlock_bh(&bdi_lock); + + return 0; +} + static unsigned long dirty_freerun_ceiling(unsigned long thresh, unsigned long bg_thresh) { @@ -769,15 +888,15 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc) fprop_fraction_percpu(&dom->completions, dtc->wb_completions, &numerator, &denominator); - wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100; + wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE); wb_thresh *= numerator; wb_thresh = div64_ul(wb_thresh, denominator); wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio); - wb_thresh += (thresh * wb_min_ratio) / 100; - if (wb_thresh > (thresh * wb_max_ratio) / 100) - wb_thresh = thresh * wb_max_ratio / 100; + wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE); + if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE)) + wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE); return wb_thresh; }