From 4ddd9970d2a4791cebee7932aa39a7dd51a70e7d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 31 Oct 2023 02:08:54 +0000 Subject: [PATCH 1/4] seq_buf: Add seq_buf_terminate() API mainline inclusion from mainline-v5.13-rc1 commit f2616c772c768485de18e7fcb2816bcdcd098339 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f2616c772c768485de18e7fcb2816bcdcd098339 ------------------------------------------------- In the case that the seq_buf buffer needs to be printed directly, add a way to make sure that the buffer is safe to read by forcing a nul terminating character at the end of the string, or the last byte of the buffer if the string has overflowed. Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Lu Jialin (cherry picked from commit 87f8b7b404acc3c2f8485cea0d1739632a4ca573) --- include/linux/seq_buf.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9d6c28cc4d8f..5b31c5147969 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -71,6 +71,31 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) return min(s->len, s->size); } +/** + * seq_buf_terminate - Make sure buffer is nul terminated + * @s: the seq_buf descriptor to terminate. + * + * This makes sure that the buffer in @s is nul terminated and + * safe to read as a string. + * + * Note, if this is called when the buffer has overflowed, then + * the last byte of the buffer is zeroed, and the len will still + * point passed it. + * + * After this function is called, s->buffer is safe to use + * in string operations. + */ +static inline void seq_buf_terminate(struct seq_buf *s) +{ + if (WARN_ON(s->size == 0)) + return; + + if (seq_buf_buffer_left(s)) + s->buffer[s->len] = 0; + else + s->buffer[s->size - 1] = 0; +} + /** * seq_buf_get_buf - get buffer to write arbitrary data to * @s: the seq_buf handle -- Gitee From f61aba717d43b068b6b4256f0048e9158b44102e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 31 Oct 2023 02:08:55 +0000 Subject: [PATCH 2/4] seq_buf: Add seq_buf_do_printk() helper mainline inclusion from mainline-v6.4-rc1 commit 96928d9032a7c34f12a88df879665562bcebf59a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=96928d9032a7c34f12a88df879665562bcebf59a ------------------------------------------------- Sometimes we use seq_buf to format a string buffer, which we then pass to printk(). However, in certain situations the seq_buf string buffer can get too big, exceeding the PRINTKRB_RECORD_MAX bytes limit, and causing printk() to truncate the string. Add a new seq_buf helper. This helper prints the seq_buf string buffer line by line, using \n as a delimiter, rather than passing the whole string buffer to printk() at once. Link: https://lkml.kernel.org/r/20230415100110.1419872-1-senozhatsky@chromium.org Cc: Andrew Morton Signed-off-by: Sergey Senozhatsky Reviewed-by: Petr Mladek Tested-by: Yosry Ahmed Signed-off-by: Steven Rostedt (Google) Signed-off-by: Lu Jialin (cherry picked from commit b5e03080bb1a698c45d432ab2404c893bc8e56a9) --- include/linux/seq_buf.h | 2 ++ lib/seq_buf.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 5b31c5147969..515d7fcb9634 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -159,4 +159,6 @@ extern int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); #endif +void seq_buf_do_printk(struct seq_buf *s, const char *lvl); + #endif /* _LINUX_SEQ_BUF_H */ diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 6dafde851333..5ffae44c1055 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -93,6 +93,38 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) } EXPORT_SYMBOL_GPL(seq_buf_printf); +/** + * seq_buf_do_printk - printk seq_buf line by line + * @s: seq_buf descriptor + * @lvl: printk level + * + * printk()-s a multi-line sequential buffer line by line. The function + * makes sure that the buffer in @s is nul terminated and safe to read + * as a string. + */ +void seq_buf_do_printk(struct seq_buf *s, const char *lvl) +{ + const char *start, *lf; + + if (s->size == 0 || s->len == 0) + return; + + seq_buf_terminate(s); + + start = s->buffer; + while ((lf = strchr(start, '\n'))) { + int len = lf - start + 1; + + printk("%s%.*s", lvl, len, start); + start = ++lf; + } + + /* No trailing LF */ + if (start < s->buffer + s->len) + printk("%s%s\n", lvl, start); +} +EXPORT_SYMBOL_GPL(seq_buf_do_printk); + #ifdef CONFIG_BINARY_PRINTF /** * seq_buf_bprintf - Write the printf string from binary arguments -- Gitee From 5a2ccc53b0ee71cb711c25bdf37acb6a649a94b7 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 31 Oct 2023 02:08:56 +0000 Subject: [PATCH 3/4] memcg: use seq_buf_do_printk() with mem_cgroup_print_oom_meminfo() mainline inclusion from mainline-v6.5-rc1 commit 5b42360c73b0679505dac6ec44d234cfec61120c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=5b42360c73b0679505dac6ec44d234cfec61120c ------------------------------------------------- Currently, we format all the memcg stats into a buffer in mem_cgroup_print_oom_meminfo() and use pr_info() to dump it to the logs. However, this buffer is large in size. Although it is currently working as intended, ther is a dependency between the memcg stats buffer and the printk record size limit. If we add more stats in the future and the buffer becomes larger than the printk record size limit, or if the prink record size limit is reduced, the logs may be truncated. It is safer to use seq_buf_do_printk(), which will automatically break up the buffer at line breaks and issue small printk() calls. Refactor the code to move the seq_buf from memory_stat_format() to its callers, and use seq_buf_do_printk() to print the seq_buf in mem_cgroup_print_oom_meminfo(). Link: https://lkml.kernel.org/r/20230428132406.2540811-2-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Michal Hocko Reviewed-by: Sergey Senozhatsky Acked-by: Shakeel Butt Reviewed-by: Muchun Song Cc: Johannes Weiner Cc: Michal Hocko Cc: Petr Mladek Cc: Roman Gushchin Cc: Steven Rostedt (Google) Signed-off-by: Andrew Morton Conflict: mm/memcontrol.c Signed-off-by: Lu Jialin (cherry picked from commit 9b6aa892b8eab301e0dbf0c9946be04b8fddac4b) --- mm/memcontrol.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d0e9806cd591..45632fbaa56b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1514,13 +1514,10 @@ static int __init memory_stats_init(void) } pure_initcall(memory_stats_init); -static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { - struct seq_buf s; int i; - seq_buf_init(&s, buf, bufsize); - /* * Provide statistics on the state of the memory subsystem as * well as cumulative event counters that show past behavior. @@ -1538,47 +1535,47 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) size = memcg_page_state(memcg, memory_stats[i].idx); size *= memory_stats[i].ratio; - seq_buf_printf(&s, "%s %llu\n", memory_stats[i].name, size); + seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size); if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) { size = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B); - seq_buf_printf(&s, "slab %llu\n", size); + seq_buf_printf(s, "slab %llu\n", size); } } /* Accumulated memory events */ - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGFAULT), memcg_events(memcg, PGFAULT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGMAJFAULT), memcg_events(memcg, PGMAJFAULT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGREFILL), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGREFILL), memcg_events(memcg, PGREFILL)); - seq_buf_printf(&s, "pgscan %lu\n", + seq_buf_printf(s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT)); - seq_buf_printf(&s, "pgsteal %lu\n", + seq_buf_printf(s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGACTIVATE), memcg_events(memcg, PGACTIVATE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGDEACTIVATE), memcg_events(memcg, PGDEACTIVATE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGLAZYFREE), memcg_events(memcg, PGLAZYFREE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGLAZYFREED), memcg_events(memcg, PGLAZYFREED)); #ifdef CONFIG_TRANSPARENT_HUGEPAGE - seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC), + seq_buf_printf(s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC), memcg_events(memcg, THP_FAULT_ALLOC)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC), + seq_buf_printf(s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC), memcg_events(memcg, THP_COLLAPSE_ALLOC)); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ /* The above should easily fit into one page */ - WARN_ON_ONCE(seq_buf_has_overflowed(&s)); + WARN_ON_ONCE(seq_buf_has_overflowed(s)); } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -1617,6 +1614,7 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) /* Use static buffer, for the caller is holding oom_lock. */ static char buf[PAGE_SIZE]; static char pathbuf[PATH_MAX]; + struct seq_buf s; lockdep_assert_held(&oom_lock); @@ -1639,8 +1637,9 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) pr_info("Memory cgroup stats for "); pr_cont_cgroup_path(memcg->css.cgroup); pr_cont(":"); - memory_stat_format(memcg, buf, sizeof(buf)); - pr_info("%s", buf); + seq_buf_init(&s, buf, sizeof(buf)); + memory_stat_format(memcg, &s); + seq_buf_do_printk(&s, KERN_INFO); mem_cgroup_print_memfs_info(memcg, pathbuf, NULL); } @@ -7031,10 +7030,12 @@ static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + struct seq_buf s; if (!buf) return -ENOMEM; - memory_stat_format(memcg, buf, PAGE_SIZE); + seq_buf_init(&s, buf, PAGE_SIZE); + memory_stat_format(memcg, &s); seq_puts(m, buf); kfree(buf); return 0; -- Gitee From 392211789103226273b3c50e30cd7bd9590f46c6 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Tue, 31 Oct 2023 02:08:57 +0000 Subject: [PATCH 4/4] memcg: dump memory.stat during cgroup OOM for v1 mainline inclusion from mainline-v6.5-rc1 commit dddb44ffa0d59c8a3f2a5cb9690ccebe3150810c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=dddb44ffa0d59c8a3f2a5cb9690ccebe3150810c ------------------------------------------------- Patch series "memcg: OOM log improvements", v2. This short patch series brings back some cgroup v1 stats in OOM logs that were unnecessarily changed before. It also makes memcg OOM logs less reliant on printk() internals. This patch (of 2): Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM") made sure we dump all the stats in memory.stat during a cgroup OOM, but it also introduced a slight behavioral change. The code used to print the non-hierarchical v1 cgroup stats for the entire cgroup subtree, now it only prints the v2 cgroup stats for the cgroup under OOM. For cgroup v1 users, this introduces a few problems: (a) The non-hierarchical stats of the memcg under OOM are no longer shown. (b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer shown. (c) We show the list of cgroup v2 stats, even in cgroup v1. This list of stats is not tracked with v1 in mind. While most of the stats seem to be working on v1, there may be some stats that are not fully or correctly tracked. Although OOM log is not set in stone, we should not change it for no reason. When upgrading the kernel version to a version including commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM"), these behavioral changes are noticed in cgroup v1. The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM") separated stats formatting from stats display for v2, to reuse the stats formatting in the OOM logs. Do the same for v1. Move the v2 specific formatting from memory_stat_format() to memcg_stat_format(), add memcg1_stat_format() for v1, and make memory_stat_format() select between them based on cgroup version. Since memory_stat_show() now works for both v1 & v2, drop memcg_stat_show(). Link: https://lkml.kernel.org/r/20230428132406.2540811-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20230428132406.2540811-3-yosryahmed@google.com Signed-off-by: Yosry Ahmed Acked-by: Shakeel Butt Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Muchun Song Cc: Petr Mladek Cc: Roman Gushchin Cc: Sergey Senozhatsky Cc: Steven Rostedt (Google) Cc: Michal Hocko Signed-off-by: Andrew Morton Conflict: mm/memcontrol.c Signed-off-by: Lu Jialin (cherry picked from commit 197ebe6a3f14bf77a897f390d0dc18ce5a12d3ad) --- mm/memcontrol.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 45632fbaa56b..561d4a157798 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1514,7 +1514,7 @@ static int __init memory_stats_init(void) } pure_initcall(memory_stats_init); -static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { int i; @@ -1578,6 +1578,17 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) WARN_ON_ONCE(seq_buf_has_overflowed(s)); } +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s); + +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) +{ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + memcg_stat_format(memcg, s); + else + memcg1_stat_format(memcg, s); + WARN_ON_ONCE(seq_buf_has_overflowed(s)); +} + #define K(x) ((x) << (PAGE_SHIFT-10)) /** * mem_cgroup_print_oom_context: Print OOM information relevant to @@ -4181,9 +4192,8 @@ static const unsigned int memcg1_events[] = { PGMAJFAULT, }; -static int memcg_stat_show(struct seq_file *m, void *v) +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { - struct mem_cgroup *memcg = mem_cgroup_from_seq(m); unsigned long memory, memsw; struct mem_cgroup *mi; unsigned int i; @@ -4202,15 +4212,15 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == NR_ANON_THPS) nr *= HPAGE_PMD_NR; #endif - seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); + seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]), + seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]), memcg_events_local(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "%s %lu\n", lru_list_name(i), + seq_buf_printf(s, "%s %lu\n", lru_list_name(i), memcg_page_state_local(memcg, NR_LRU_BASE + i) * PAGE_SIZE); @@ -4220,11 +4230,11 @@ static int memcg_stat_show(struct seq_file *m, void *v) memory = min(memory, READ_ONCE(mi->memory.max)); memsw = min(memsw, READ_ONCE(mi->memsw.max)); } - seq_printf(m, "hierarchical_memory_limit %llu\n", - (u64)memory * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memory_limit %llu\n", + (u64)memory * PAGE_SIZE); if (do_memsw_account()) - seq_printf(m, "hierarchical_memsw_limit %llu\n", - (u64)memsw * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memsw_limit %llu\n", + (u64)memsw * PAGE_SIZE); for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { unsigned long nr; @@ -4236,17 +4246,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == NR_ANON_THPS) nr *= HPAGE_PMD_NR; #endif - seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], + seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i], (u64)nr * PAGE_SIZE); } for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "total_%s %llu\n", + seq_buf_printf(s, "total_%s %llu\n", vm_event_name(memcg1_events[i]), (u64)memcg_events(memcg, memcg1_events[i])); for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "total_%s %llu\n", lru_list_name(i), + seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i), (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE); @@ -4263,12 +4273,10 @@ static int memcg_stat_show(struct seq_file *m, void *v) anon_cost += mz->lruvec.anon_cost; file_cost += mz->lruvec.file_cost; } - seq_printf(m, "anon_cost %lu\n", anon_cost); - seq_printf(m, "file_cost %lu\n", file_cost); + seq_buf_printf(s, "anon_cost %lu\n", anon_cost); + seq_buf_printf(s, "file_cost %lu\n", file_cost); } #endif - - return 0; } static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, @@ -5512,6 +5520,8 @@ static ssize_t wb_blkio_write(struct kernfs_open_file *of, char *buf, } #endif +static int memory_stat_show(struct seq_file *m, void *v); + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -5544,7 +5554,7 @@ static struct cftype mem_cgroup_legacy_files[] = { }, { .name = "stat", - .seq_show = memcg_stat_show, + .seq_show = memory_stat_show, }, { .name = "force_empty", -- Gitee