【标题描述】在openEuler-22.03-LTS-SP1上如果有一个内核模块创建了一个线程,那么在打Kpatch的时候报错进程栈不可信
一、缺陷信息
内核信息:
5.10.0-136.16.0.oe2203sp1.x86_64
缺陷简述:
在openEuler-22.03-LTS-SP1上如果有一个内核模块创建了一个线程,那么在打Kpatch的时候就会报错进程栈不可信
【环境信息】
硬件信息
【问题复现步骤】
1.编写内核模块如下
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/delay.h>
static struct task_struct *thread;
static int thread_fn(void *data)
{
printk(KERN_INFO "Kernel thread created with PID: %d\n", current->pid);
while (!kthread_should_stop()) {
ssleep(5);
}
printk(KERN_INFO "Kernel thread stopping.\n");
return 0;
}
static int __init my_module_init(void)
{
printk(KERN_INFO "Loading my kernel module.\n");
thread = kthread_run(thread_fn, NULL, "my_thread");
if (IS_ERR(thread)) {
printk(KERN_ERR "Failed to create the thread.\n");
return PTR_ERR(thread);
}
return 0;
}
static void __exit my_module_exit(void)
{
printk(KERN_INFO "Unloading my kernel module.\n");
if (thread) {
kthread_stop(thread);
printk(KERN_INFO "Kernel thread stopped.\n");
}
}
module_init(my_module_init);
module_exit(my_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Your Name");
MODULE_DESCRIPTION("A simple Linux kernel module that prints its own PID.");
MODULE_VERSION("1.0");
2.将这个模块编译后insmod进系统
obj-m += my_module.o
all:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
clean:
make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
3.这个时候通过syscare去active一个patch就会报错(前提是你得有一个Kpatch)
命令举例:syscare active bdab329e-e46a-405a-b903-6328737f107f
【实际结果】
1.报错内容如下
导致在机器上打Kpatch失败,报错内容如下:
[root ~]# syscare active bdab329e-e46a-405a-b903-6328737f107f
Error: Operation failed
Caused by:
0. Transaction "Active patch 'bdab329e-e46a-405a-b903-6328737f107f'" failed
Caused by:
0: Driver: Failed to active patch "kernel-5.10.0-136.16.0.mt20230627.508.mt2203sp1/50801-1-1/vmlinux"
1: Kpatch: Failed to write patch "kernel-5.10.0-136.16.0.mt20230627.508.mt2203sp1/50801-1-1/vmlinux" status
2: Cannot write "/sys/kernel/livepatch/vmlinux_bdab329e_e46a_405a_b903_6328737f107f/enabled", invalid argument (os error 22)
【其他相关附件信息】
dmesg查看日志报错如下:
[1039663.250063] livepatch: my_thread:1103969 has an unreliable stack, ret=-22
不可信的栈内容如下:
[root]# cat /proc/1103969/stack
[<0>] msleep+0x2a/0x40
[<0>] thread_fn+0x33/0x48 [my_module]
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
能否在最后检查的函数里加个维测看看具体挂在哪个条件
用例无法复现,可能存在其他影响因素未发现?
插入创建内核线程的模块后,热补丁可以激活成功:
内核日志:
内核线程栈如下:
版本信息:(暂无sp1环境,使用sp3代替)
用sp1尝试未复现
我在系统中增加了一些打印发现,他在arch_stack_walk_reliable函数中返回了-22,并且是因为这个函数如下面展示的代码中81行返回的。
*/
38 int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
39 void *cookie, struct task_struct *task)
40 {
41 struct unwind_state state;
42 struct pt_regs regs;
43 unsigned long addr;
44
45 for (unwind_start(&state, task, NULL, NULL);
46 !unwind_done(&state) && !unwind_error(&state);
47 unwind_next_frame(&state)) {
48
49 regs = unwind_get_entry_regs(&state, NULL);
50 if (regs) {
51 / Success path for user tasks /
52 if (user_mode(regs))
53 return 0;
54
55 /
56 * Kernel mode registers on the stack indicate an
57 * in-kernel interrupt or exception (e.g., preemption
58 * or a page fault), which can make frame pointers
59 * unreliable.
60 /
61 if (IS_ENABLED(CONFIG_FRAME_POINTER))
62 return -EINVAL;
63 }
64
65 addr = unwind_get_return_address(&state);
66
67 /
68 * A NULL or invalid return address probably means there's some
69 * generated code which __kernel_text_address() doesn't know
70 * about.
71 /
72 if (!addr)
73 return -EINVAL;
74
75 if (!consume_entry(cookie, addr))
76 return -EINVAL;
77 }
78
79 / Check for stack corruption */
80 if (unwind_error(&state))
81 return -EINVAL;
82
83 return 0;
84 }
在这里返回的原因是他在上面41行的unwind_next_frame(&state)将state->error设置成true,并且在下面的457行设置的(由于篇幅原因,下面函数的代码并未展示完全),他似乎找不到这个栈帧对应的orc_entry(可能是因为这个模块的代码并不属于内核?)
422 bool unwind_next_frame(struct unwind_state *state)
423 {
424 unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp;
425 enum stack_type prev_type = state->stack_info.type;
426 struct orc_entry orc;
427 bool indirect = false;
428
429 if (unwind_done(state))
430 return false;
431
432 / Don't let modules unload while we're reading their ORC data. /
433 preempt_disable();
434
435 / End-of-stack check for user tasks: /
436 if (state->regs && user_mode(state->regs))
437 goto the_end;
438
439 /
440 * Find the orc_entry associated with the text address.
441 *
442 * For a call frame (as opposed to a signal frame), state->ip points to
443 * the instruction after the call. That instruction's stack layout
444 * could be different from the call instruction's layout, for example
445 * if the call was to a noreturn function. So get the ORC data for the
446 * call instruction itself.
447 /
448 orc = orc_find(state->signal ? state->ip : state->ip - 1);
449 if (!orc) {
450 /
451 * As a fallback, try to assume this code uses a frame pointer.
452 * This is useful for generated code, like BPF, which ORC
453 * doesn't know about. This is just a guess, so the rest of
454 * the unwind is no longer considered reliable.
455 */
456 orc = &orc_fp_entry;
457 state->error = true;
458 }
紧接着我又进一步追踪这个orc_find到其中的orc_ftrace_find(ip),想知道为什么找不到orc_entry,orc_find代码如下:(貌似下面188行是去寻找模块的orc_entry,感觉像是他原本应该在其中去得到,但事实是他并未正确返回)
149 static struct orc_entry *orc_find(unsigned long ip)
150 {
151 static struct orc_entry orc;
152
153 if (ip == 0)
154 return &null_orc_entry;
155
156 / For non-init vmlinux addresses, use the fast lookup table: */
157 if (ip >= LOOKUP_START_IP && ip < LOOKUP_STOP_IP) {
158 unsigned int idx, start, stop;
159
160 idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
161
162 if (unlikely((idx >= lookup_num_blocks-1))) {
163 orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
164 idx, lookup_num_blocks, (void *)ip);
165 return NULL;
166 }
167
168 start = orc_lookup[idx];
169 stop = orc_lookup[idx + 1] + 1;
170
171 if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
172 (__start_orc_unwind + stop > __stop_orc_unwind))) {
173 orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
174 idx, lookup_num_blocks, start, stop, (void )ip);
175 return NULL;
176 }
177
178 return __orc_find(__start_orc_unwind_ip + start,
179 __start_orc_unwind + start, stop - start, ip);
180 }
181
182 / vmlinux .init slow lookup: /
183 if (init_kernel_text(ip))
184 return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
185 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
186
187 / Module lookup: */
188 orc = orc_module_find(ip);
189 if (orc)
190 return orc;
191
192 return orc_ftrace_find(ip);
193 }
我尝试了目前最新的5.10.0-136.85.0.166.oe2203sp1.x86_64,复现了这个问题。
这是在我们机器上的结果
感谢支持,目前问题已经解决,是因为在编译模块的时候没有安装一个非必要的elfutils-devel软件包,在没有安装这个软件包的机器上编译的内核不会有orc信息,这样的模块被加载进入内核后,内核在打kpatch检查栈的时候就会因为找不到orc信息而报错
登录 后才可以发表评论