openEuler 22.03内核在打Kpatch的时候报错进程栈不可信

**【标题描述】在openEuler-22.03-LTS-SP1上如果有一个内核模块创建了一个线程，那么在打Kpatch的时候报错进程栈不可信**

**一、缺陷信息**

**内核信息：**
5.10.0-136.16.0.oe2203sp1.x86_64
**缺陷简述：**
在openEuler-22.03-LTS-SP1上如果有一个内核模块创建了一个线程，那么在打Kpatch的时候就会报错进程栈不可信

**【环境信息】**
硬件信息
- CPU：Intel(R) Xeon(R) CPU E5-2650 v4
- 内存
- 磁盘
      *-disk:0
        DELL PERC H330 Mini SSD 239GB
      *-disk:1
        DELL PERC H330 Mini SSD 239GB
软件信息
- OS版本：openEuler-22.03-LTS-SP1
- 内核信息：5.10.0-136.16.0.oe2203sp1.x86_64

**【问题复现步骤】**
1.编写内核模块如下

#include <linux/module.h>
    #include <linux/kernel.h>
    #include <linux/kthread.h>
    #include <linux/delay.h>

static struct task_struct *thread;

static int thread_fn(void *data)
    {
        printk(KERN_INFO "Kernel thread created with PID: %d\n", current->pid);
        while (!kthread_should_stop()) {
            ssleep(5);
        }
        printk(KERN_INFO "Kernel thread stopping.\n");
        return 0;
    }

static int __init my_module_init(void)
    {
        printk(KERN_INFO "Loading my kernel module.\n");
        thread = kthread_run(thread_fn, NULL, "my_thread");
        if (IS_ERR(thread)) {
            printk(KERN_ERR "Failed to create the thread.\n");
            return PTR_ERR(thread);
        }

return 0;
    }

static void __exit my_module_exit(void)
    {
        printk(KERN_INFO "Unloading my kernel module.\n");

if (thread) {
            kthread_stop(thread);
            printk(KERN_INFO "Kernel thread stopped.\n");
        }
    }

module_init(my_module_init);
    module_exit(my_module_exit);

MODULE_LICENSE("GPL");
    MODULE_AUTHOR("Your Name");
    MODULE_DESCRIPTION("A simple Linux kernel module that prints its own PID.");
    MODULE_VERSION("1.0");

2.将这个模块编译后insmod进系统

obj-m += my_module.o

all:
            make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules

clean:
            make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean

3.这个时候通过syscare去active一个patch就会报错(前提是你得有一个Kpatch)

命令举例：syscare active bdab329e-e46a-405a-b903-6328737f107f

**【实际结果】**
1.报错内容如下
导致在机器上打Kpatch失败，报错内容如下：

[root ~]# syscare active bdab329e-e46a-405a-b903-6328737f107f
    Error: Operation failed

Caused by:
    0. Transaction "Active patch 'bdab329e-e46a-405a-b903-6328737f107f'" failed

Caused by:
        0: Driver: Failed to active patch "kernel-5.10.0-136.16.0.mt20230627.508.mt2203sp1/50801-1-1/vmlinux"
        1: Kpatch: Failed to write patch "kernel-5.10.0-136.16.0.mt20230627.508.mt2203sp1/50801-1-1/vmlinux" status
        2: Cannot write "/sys/kernel/livepatch/vmlinux_bdab329e_e46a_405a_b903_6328737f107f/enabled", invalid argument (os error 22)

**【其他相关附件信息】**
dmesg查看日志报错如下：

[1039663.250063] livepatch: my_thread:1103969 has an unreliable stack, ret=-22
    
不可信的栈内容如下：

[root]# cat /proc/1103969/stack
    [<0>] msleep+0x2a/0x40
    [<0>] thread_fn+0x33/0x48 [my_module]

我在系统中增加了一些打印发现，他在arch_stack_walk_reliable函数中返回了-22，并且是因为这个函数如下面展示的代码中81行返回的。

*/
38  int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
39  			     void *cookie, struct task_struct *task)
40  {
41  	struct unwind_state state;
42  	struct pt_regs *regs;
43  	unsigned long addr;
44  
45  	for (unwind_start(&state, task, NULL, NULL);
46  	     !unwind_done(&state) && !unwind_error(&state);
47  	     unwind_next_frame(&state)) {
48  
49  		regs = unwind_get_entry_regs(&state, NULL);
50  		if (regs) {
51  			/* Success path for user tasks */
52  			if (user_mode(regs))
53  				return 0;
54  
55  			/*
56  			 * Kernel mode registers on the stack indicate an
57  			 * in-kernel interrupt or exception (e.g., preemption
58  			 * or a page fault), which can make frame pointers
59  			 * unreliable.
60  			 */
61  			if (IS_ENABLED(CONFIG_FRAME_POINTER))
62  				return -EINVAL;
63  		}
64  
65  		addr = unwind_get_return_address(&state);
66  
67  		/*
68  		 * A NULL or invalid return address probably means there's some
69  		 * generated code which __kernel_text_address() doesn't know
70  		 * about.
71  		 */
72  		if (!addr)
73  			return -EINVAL;
74  
75  		if (!consume_entry(cookie, addr))
76  			return -EINVAL;
77  	}
78  
79  	/* Check for stack corruption */
80  	if (unwind_error(&state))
81  		return -EINVAL;
82  
83  	return 0;
84  }

在这里返回的原因是他在上面41行的unwind_next_frame(&state)将state->error设置成true，并且在下面的457行设置的（由于篇幅原因，下面函数的代码并未展示完全），他似乎找不到这个栈帧对应的orc_entry（可能是因为这个模块的代码并不属于内核？）

422  bool unwind_next_frame(struct unwind_state *state)
423  {
424  	unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
425  	enum stack_type prev_type = state->stack_info.type;
426  	struct orc_entry *orc;
427  	bool indirect = false;
428  
429  	if (unwind_done(state))
430  		return false;
431  
432  	/* Don't let modules unload while we're reading their ORC data. */
433  	preempt_disable();
434  
435  	/* End-of-stack check for user tasks: */
436  	if (state->regs && user_mode(state->regs))
437  		goto the_end;
438  
439  	/*
440  	 * Find the orc_entry associated with the text address.
441  	 *
442  	 * For a call frame (as opposed to a signal frame), state->ip points to
443  	 * the instruction after the call.  That instruction's stack layout
444  	 * could be different from the call instruction's layout, for example
445  	 * if the call was to a noreturn function.  So get the ORC data for the
446  	 * call instruction itself.
447  	 */
448  	orc = orc_find(state->signal ? state->ip : state->ip - 1);
449  	if (!orc) {
450  		/*
451  		 * As a fallback, try to assume this code uses a frame pointer.
452  		 * This is useful for generated code, like BPF, which ORC
453  		 * doesn't know about.  This is just a guess, so the rest of
454  		 * the unwind is no longer considered reliable.
455  		 */
456  		orc = &orc_fp_entry;
457  		state->error = true;
458  	}

紧接着我又进一步追踪这个orc_find到其中的orc_ftrace_find(ip)，想知道为什么找不到orc_entry，orc_find代码如下：（貌似下面188行是去寻找模块的orc_entry，感觉像是他原本应该在其中去得到，但事实是他并未正确返回）

149  static struct orc_entry *orc_find(unsigned long ip)
150  {
151  	static struct orc_entry *orc;
152  
153  	if (ip == 0)
154  		return &null_orc_entry;
155  
156  	/* For non-init vmlinux addresses, use the fast lookup table: */
157  	if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
158  		unsigned int idx, start, stop;
159  
160  		idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
161  
162  		if (unlikely((idx >= lookup_num_blocks-1))) {
163  			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
164  				 idx, lookup_num_blocks, (void *)ip);
165  			return NULL;
166  		}
167  
168  		start = orc_lookup[idx];
169  		stop = orc_lookup[idx + 1] + 1;
170  
171  		if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
172  			     (__start_orc_unwind + stop > __stop_orc_unwind))) {
173  			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
174  				 idx, lookup_num_blocks, start, stop, (void *)ip);
175  			return NULL;
176  		}
177  
178  		return __orc_find(__start_orc_unwind_ip + start,
179  				  __start_orc_unwind + start, stop - start, ip);
180  	}
181  
182  	/* vmlinux .init slow lookup: */
183  	if (init_kernel_text(ip))
184  		return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
185  				  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
186  
187  	/* Module lookup: */
188  	orc = orc_module_find(ip);
189  	if (orc)
190  		return orc;
191  
192  	return orc_ftrace_find(ip);
193  }

GVP openEuler/kernel

内容风险标识

评论 (10)

GVPopenEuler/kernel

内容风险标识