From e915cf34836e36ba8bbe0155766e31499d9994d6 Mon Sep 17 00:00:00 2001 From: Tony Lu Date: Wed, 19 Jun 2024 10:16:50 +0800 Subject: [PATCH] anolis: net/smc: introduce two sysctls for SMC-R memory limitation ANBZ: #11181 This introduces twos sysctl knobs for SMC-R, which aims to limit the whole SMC-R and per net namespace memory usage and footprint. These knobs contain three arguments, min / pressure / max, for more flexable control. First argument min means the safe watermark for SMC-R, and second argument pressure SMC-R stack use enough memory and need to take actions to reduce memory usage, and the third argument max is the hard limit, no more connection can run in SMC-R, they should fallback to TCP. In this patch set, reducing memory usage (eg. recycle LGR buffers) and SMC-D limitation are not implemented. Only the third argument has effect, these two arguments leaves here for further work. Signed-off-by: Tony Lu --- Documentation/networking/smc-sysctl.rst | 30 ++++++++++++++++ include/net/netns/smc.h | 6 ++-- net/smc/af_smc.c | 7 ++++ net/smc/smc_core.c | 37 ++++++++++++++----- net/smc/smc_core.h | 47 +++++++++++++++++++++++++ net/smc/smc_sysctl.c | 37 +++++++++++++++++-- net/smc/smc_sysctl.h | 23 ++++++++++++ 7 files changed, 173 insertions(+), 14 deletions(-) diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index ea814e7ce73c..74592185ab7d 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -95,3 +95,33 @@ autosplit_size - INTEGER Autosplit_size ranges from 32KiB to 512MiB. Default: 128KiB + +global_mem - vector of 3 INTEGERs: min, pressure, max + min: below this number of bytes the whole SMC-R and SMC-D stack is not + bothered about its memory appetite. + + pressure: when amount of memory allocated by the whole SMC-R and SMC-D + exceeds this number of bytes, this doesn't do anything in this version. + SMC-R will recycle memory in pressure mode, which is exited when memory + consumption falls under "min". + + max: number of bytes allowed for the whole SMC-R and SMC-D, otherwise + fallback to TCP. Also it takes effect when per net namespace limit reached. + + Defaults are calculated at boot time from amount of available + memory. + +mem - vector of 3 INTEGERs: min, pressure, max + min: below this number of bytes current net namespace SMC-R and SMC-D + stack is not bothered about its memory appetite. + + pressure: when amount of memory allocated by current net namespace of + SMC-R and SMC-D exceeds this number of bytes, this doesn't do anything + in this version. SMC-R will recycle memory in pressure mode, which is + exited when memory consumption falls under "min". + + max: number of bytes allowed for current net namespace SMC-R and SMC-D, + otherwise fallback to TCP. + + Defaults are calculated at boot time from amount of available + memory. diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 766a3ac94ed1..95399d6c571d 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -31,11 +31,9 @@ struct netns_smc { int sysctl_max_links_per_lgr; int sysctl_max_conns_per_lgr; CK_KABI_USE_SPLIT(1, unsigned int sysctl_autosplit_size) + CK_KABI_USE(2, 3, 4, long sysctl_mem[3]) + CK_KABI_USE(5, atomic_long_t memory_allocated) - CK_KABI_RESERVE(2) - CK_KABI_RESERVE(3) - CK_KABI_RESERVE(4) - CK_KABI_RESERVE(5) CK_KABI_RESERVE(6) CK_KABI_RESERVE(7) CK_KABI_RESERVE(8) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 881cd5504868..0b072ede2f2a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -229,6 +229,9 @@ static bool smc_hs_congested(const struct sock *sk) if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq)) return true; + if (smc_net_mem_exceeded(smc)) + return true; + if (!smc_sock_should_select_smc(smc, NULL)) return true; @@ -1728,6 +1731,10 @@ static int __smc_connect(struct smc_sock *smc) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC, version); + if (smc_net_mem_exceeded(smc)) + return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, + version); + ini = kzalloc(sizeof(*ini), GFP_KERNEL); if (!ini) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 94be1ba17ee1..017c5f4a4114 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -42,6 +42,9 @@ #define SMC_RTOKEN_UNINITIALIZED -1 +long sysctl_global_mem[3] __read_mostly; +atomic_long_t smc_global_memory_allocated; /* global smcr memory allocated */ + struct smc_lgr_list smc_lgr_list = { /* established link groups */ .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), .list = LIST_HEAD_INIT(smc_lgr_list.list), @@ -1303,6 +1306,7 @@ static void smc_buf_unuse(struct smc_connection *conn, struct smc_link_group *lgr) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + struct net *net = sock_net(&smc->sk); bool is_smcd = lgr->is_smcd; int bufsize; @@ -1313,6 +1317,7 @@ static void smc_buf_unuse(struct smc_connection *conn, else WRITE_ONCE(conn->sndbuf_desc->used, 0); SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize); + smc_net_mem_allocated_sub(net, conn->sndbuf_desc->len); } if (conn->rmb_desc) { bufsize = conn->rmb_desc->len; @@ -1324,6 +1329,7 @@ static void smc_buf_unuse(struct smc_connection *conn, WRITE_ONCE(conn->rmb_desc->used, 0); } SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize); + smc_net_mem_allocated_sub(net, conn->rmb_desc->len); } } @@ -1486,6 +1492,7 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, __free_pages(buf_desc->pages, buf_desc->order); else if (buf_desc->is_vm && buf_desc->cpu_addr) vfree(buf_desc->cpu_addr); + smc_global_mem_allocated_sub(buf_desc->len); kfree(buf_desc); } @@ -1499,6 +1506,7 @@ static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, } else { kfree(buf_desc->cpu_addr); } + smc_global_mem_allocated_sub(buf_desc->len); kfree(buf_desc); } @@ -2524,15 +2532,9 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, return buf_desc; } -static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) +int smc_get_bufsize(struct smc_sock *smc, bool is_rmb) { - struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); - struct smc_connection *conn = &smc->conn; - struct smc_link_group *lgr = conn->lgr; - struct list_head *buf_list; - int bufsize, bufsize_comp; - struct rw_semaphore *lock; /* lock buffer list */ - bool is_dgraded = false; + int bufsize; if (smc_sock_is_inet_sock(&smc->sk)) { if (is_rmb) @@ -2551,6 +2553,22 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) /* use socket send buffer size (w/o overhead) as start value */ bufsize = smc->sk.sk_sndbuf; } + + return bufsize; +} + +static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) +{ + struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM); + struct smc_connection *conn = &smc->conn; + struct smc_link_group *lgr = conn->lgr; + struct net *net = sock_net(&smc->sk); + struct list_head *buf_list; + int bufsize, bufsize_comp; + struct rw_semaphore *lock; /* lock buffer list */ + bool is_dgraded = false; + + bufsize = smc_get_bufsize(smc, is_rmb); for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb); bufsize_comp >= 0; bufsize_comp--) { if (is_rmb) { @@ -2568,6 +2586,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) buf_desc->is_dma_need_sync = 0; SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize); SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb); + smc_net_mem_allocated_add(net, buf_desc->len); break; /* found reusable slot */ } @@ -2588,6 +2607,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb); SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize); + smc_net_mem_allocated_add(net, buf_desc->len); + smc_global_mem_allocated_add(buf_desc->len); buf_desc->used = 1; down_write(lock); smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c81eb59b0df1..df77329c7713 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -37,6 +37,10 @@ */ #define SMC_MAX_TOKEN_LOCAL 255 + +extern long sysctl_global_mem[3]; +extern atomic_long_t smc_global_memory_allocated; + struct smc_lgr_list { /* list of link group definition */ struct list_head list; spinlock_t lock; /* protects list of link groups */ @@ -628,6 +632,7 @@ void smc_smcd_terminate_all(struct smcd_dev *dev); void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smcd_buf_attach(struct smc_sock *smc); +int smc_get_bufsize(struct smc_sock *smc, bool is_rmb); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link, struct smc_clc_msg_accept_confirm *clc); @@ -693,4 +698,46 @@ static inline struct smc_link_group *smc_get_lgr(struct smc_link *link) { return link->lgr; } + +static inline long smc_net_mem_allocated(struct net *net) +{ + return atomic_long_read(&net->smc.memory_allocated); +} + +static inline long smc_global_mem_allocated(void) +{ + return atomic_long_read(&smc_global_memory_allocated); +} + +static inline void smc_net_mem_allocated_add(struct net *net, int val) +{ + atomic_long_add(val, &net->smc.memory_allocated); +} + +static inline void smc_net_mem_allocated_sub(struct net *net, int val) +{ + atomic_long_sub(val, &net->smc.memory_allocated); +} + +static inline void smc_global_mem_allocated_add(int val) +{ + atomic_long_add(val, &smc_global_memory_allocated); +} + +static inline void smc_global_mem_allocated_sub(int val) +{ + atomic_long_sub(val, &smc_global_memory_allocated); +} + +static inline bool smc_net_mem_exceeded(struct smc_sock *smc) +{ + int bufsize = smc_get_bufsize(smc, true) + smc_get_bufsize(smc, false); + long global_mem_allocated = smc_global_mem_allocated() + bufsize; + struct net *net = sock_net(&smc->sk); + long net_mem_allocated = smc_net_mem_allocated(net) + bufsize; + + return net_mem_allocated >= READ_ONCE(net->smc.sysctl_mem[2]) || + global_mem_allocated >= READ_ONCE(sysctl_global_mem[2]); +} + #endif diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 44784aea7acf..43718f766b53 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -33,6 +33,16 @@ static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; static unsigned int autosplit_size_min = SZ_32K; static unsigned int autosplit_size_max = SZ_512M; /* max size of snd/recv buffer */ +static int proc_global_mem(struct ctl_table *ctl, + int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tbl = { .maxlen = sizeof(sysctl_global_mem) }; + + tbl.data = &sysctl_global_mem; + return proc_doulongvec_minmax(&tbl, write, buffer, lenp, ppos); +} + static struct ctl_table smc_table[] = { { .procname = "autocorking_size", @@ -125,6 +135,19 @@ static struct ctl_table smc_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "mem", + .data = &init_net.smc.sysctl_mem, + .maxlen = sizeof(init_net.smc.sysctl_mem), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "global_mem", + .maxlen = sizeof(sysctl_global_mem), + .mode = 0644, + .proc_handler = proc_global_mem, + }, { } }; @@ -140,8 +163,17 @@ int __net_init smc_sysctl_net_init(struct net *net) if (!table) goto err_alloc; - for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++) - table[i].data += (void *)net - (void *)&init_net; + for (i = 0; i < ARRAY_SIZE(smc_table) - 1; i++) { + if (table[i].data) { + /* Calcute current net data. */ + table[i].data += (void *)net - (void *)&init_net; + } else { + /* Enties without data are global and read-only, + * handle in their own proc handle. + */ + table[i].mode &= ~0222; + } + } } net->smc.smc_hdr = register_net_sysctl(net, "net/smc", table); @@ -160,6 +192,7 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; net->smc.sysctl_autosplit_size = SZ_128K; + smc_mem_init(net); return 0; err_reg: diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h index eb2465ae1e15..dbf896e7fb6d 100644 --- a/net/smc/smc_sysctl.h +++ b/net/smc/smc_sysctl.h @@ -13,6 +13,28 @@ #ifndef _SMC_SYSCTL_H #define _SMC_SYSCTL_H +#include + +static inline void smc_mem_init(struct net *net) +{ + /* Set memory limits to no more than 1/4 the whole memory */ + unsigned long mem_min = nr_free_buffer_pages() << (PAGE_SHIFT - 2); + + mem_min = max_t(unsigned long, mem_min, SMC_BUF_MIN_SIZE * 2); /* one conn at least */ + + if (net_eq(net, &init_net)) { + atomic_long_set(&smc_global_memory_allocated, 0); + sysctl_global_mem[0] = mem_min; /* 25% */ + sysctl_global_mem[1] = mem_min * 2; /* 50% */ + sysctl_global_mem[2] = mem_min * 3; /* 75% */ + } + + atomic_long_set(&net->smc.memory_allocated, 0); + net->smc.sysctl_mem[0] = mem_min; /* 25% */ + net->smc.sysctl_mem[1] = mem_min * 2; /* 50% */ + net->smc.sysctl_mem[2] = mem_min * 3; /* 75% */ +} + #ifdef CONFIG_SYSCTL int __net_init smc_sysctl_net_init(struct net *net); @@ -25,6 +47,7 @@ static inline int smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; + smc_mem_init(net); return 0; } -- Gitee