该 Issue 用于汇总所有出现 sync 卡死问题的系统状态,方便进一步排查问题。
当前问题已经在 Ubuntu, Deepin 等平台相继出现,在硬件真机和虚拟机都有出现,另外,在 5.3, 5.4, 5.6 都有出现。
下述环境有必现路径: 就是先起来 Linux Lab,再执行 sync 命令,一直不返回,这个时候浏览器也无法打开,但是系统负载并不高。 如果不跑 Linux Lab,没有问题。
Product: Gigabyte Technology Co., Ltd., B150N Phoenix-WIFI-CF, B150N Phoenix-WIFI
ARCH: x86_64
CPU: 4 x Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz
RAM: 7915 MiB
System: Deepin 20 Beta, n/a
Kernel: Linux 5.3.0-3-amd64
Docker: Docker version 19.03.8, build afacb8b7f0
Shell: /bin/bash 5.0.3(1)-release
接下来需要继续排查别的问题:
相关参考资料:
Product: Gigabyte Technology Co., Ltd., B150N Phoenix-WIFI-CF, B150N Phoenix-WIFI
ARCH: x86_64
CPU: 4 x Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz
RAM: 7915 MiB
System: Deepin 20 Beta, n/a
Kernel: Linux 5.3.0-3-amd64
Docker: Docker version 19.03.8, build afacb8b7f0
Shell: /bin/bash 5.0.3(1)-release
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。
Product: ASUSTeK COMPUTER INC., X555LD, X555LD
ARCH: x86_64
CPU: 4 x Intel(R) Core(TM) i5-4210U CPU @ 1.70GHz
RAM: 7845 MiB
System: Ubuntu 18.04.4 LTS, bionic
Kernel: Linux 5.3.0-51-generic
Docker: Docker version 19.03.8, build afacb8b7f0
Shell: /bin/bash 4.4.20(1)-release
Product: Hasee, HM65, QTH6
ARCH: x86_64
CPU: 8 x Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz
RAM: 7908 MiB
System: Ubuntu 20.04 LTS, focal
Kernel: Linux 5.4.0-29-generic
Docker: Docker version 19.03.8, build afacb8b7f0
Shell: /bin/bash 5.0.16(1)-release
http://showterm.io/7683579af57bf70fcdcbb
[ 298.582552] Oops: 0002 [#1] SMP PTI
[ 298.582554] CPU: 3 PID: 6373 Comm: modprobe Not tainted 5.4.0-29-generic #33-Ubuntu
[ 298.582555] Hardware name: Hasee QTH6/HM65, BIOS SU272 12/16/2011
[ 298.582568] RIP: 0010:nfsd_fill_super+0x72/0x90 [nfsd]
[ 298.582570] Code: 89 c4 85 c0 74 0a 5b 44 89 e0 41 5c 41 5d 5d c3 48 8b 7b 68 31 f6 48 c7 c2 e3 75 0e c1 e8 96 fe ff ff 48 3d 00 f0 ff ff 77 0e <49> 89 45 58 44 89 e0 5b 41 5c 41 5d 5d c3 41 89 c4 eb c9 66 66 2e
[ 298.582571] RSP: 0018:ffffb13a4343ba90 EFLAGS: 00010287
[ 298.582572] RAX: ffff9a6e9c20b0c0 RBX: ffff9a6ec5543800 RCX: 0000000000000002
[ 298.582574] RDX: 0000000000000000 RSI: 0000000000000100 RDI: ffff9a6e9c295ac0
[ 298.582575] RBP: ffffb13a4343baa8 R08: ffff9a6e9c20b0e0 R09: 0000000000000000
[ 298.582576] R10: 0000000000000000 R11: fefefefefefefeff R12: 0000000000000000
[ 298.582577] R13: 0000000000000000 R14: ffffffffc10abf10 R15: 0000000000000000
[ 298.582578] FS: 00007f28c3160740(0000) GS:ffff9a6ef78c0000(0000) knlGS:0000000000000000
[ 298.582579] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 298.582581] CR2: 0000000000000058 CR3: 0000000201c1e004 CR4: 00000000000606e0
[ 298.582582] Call Trace:
[ 298.582587] vfs_get_super+0x7f/0x100
[ 298.582590] get_tree_keyed+0x1a/0x20
[ 298.582599] nfsd_fs_get_tree+0x23/0x30 [nfsd]
[ 298.582601] vfs_get_tree+0x2a/0xc0
[ 298.582603] fc_mount+0x13/0x50
[ 298.582605] vfs_kern_mount.part.0+0x78/0x90
[ 298.582607] vfs_kern_mount+0x13/0x20
[ 298.582615] nfsd_init_net+0x11f/0x150 [nfsd]
[ 298.582617] ops_init+0x42/0x100
[ 298.582619] register_pernet_operations+0xee/0x1e0
[ 298.582620] register_pernet_subsys+0x29/0x40
[ 298.582632] ? trace_event_define_fields_nfsd_file_fsnotify_handle_event+0xb4/0xb4 [nfsd]
[ 298.582642] init_nfsd+0x23/0x8ba [nfsd]
[ 298.582653] ? trace_event_define_fields_nfsd_file_fsnotify_handle_event+0xb4/0xb4 [nfsd]
[ 298.582655] do_one_initcall+0x4a/0x1fa
[ 298.582657] ? kfree+0x224/0x240
[ 298.582659] ? _cond_resched+0x19/0x30
[ 298.582661] ? kmem_cache_alloc_trace+0x163/0x230
[ 298.582663] do_init_module+0x62/0x250
[ 298.582665] load_module+0x10b8/0x1200
[ 298.582669] __do_sys_finit_module+0xbe/0x120
[ 298.582671] ? __do_sys_finit_module+0xbe/0x120
[ 298.582673] __x64_sys_finit_module+0x1a/0x20
[ 298.582675] do_syscall_64+0x57/0x190
[ 298.582677] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 298.582679] RIP: 0033:0x7f28c2c803c9
[ 298.582681] Code: 01 00 48 81 c4 80 00 00 00 e9 f1 fe ff ff 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8f 9a 2c 00 f7 d8 64 89 01 48
[ 298.582682] RSP: 002b:00007ffca1e956d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[ 298.582684] RAX: ffffffffffffffda RBX: 00005626e2264ee9 RCX: 00007f28c2c803c9
[ 298.582685] RDX: 0000000000000000 RSI: 00005626e2264ee9 RDI: 0000000000000003
[ 298.582686] RBP: 0000000000000000 R08: 0000000000000000 R09: 00005626e286f240
[ 298.582687] R10: 0000000000000003 R11: 0000000000000246 R12: 00005626e2867290
[ 298.582688] R13: 00005626e286ab60 R14: 00005626e286f390 R15: 0000000000040000
[ 298.582689] Modules linked in: nfsd(+) auth_rpcgss nfs_acl lockd grace sunrpc binfmt_misc 9pnet_virtio 9pnet minix xt_nat xt_tcpudp veth xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user xfrm_algo xt_addrtype iptable_filter iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c bpfilter br_netfilter bridge stp llc aufs ccm overlay nls_iso8859_1 intel_rapl_msr intel_rapl_common x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic uvcvideo ledtrig_audio kvm_intel videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 ath9k videobuf2_common kvm videodev mc ath9k_common crct10dif_pclmul ghash_clmulni_intel ath9k_hw snd_hda_intel ath aesni_intel snd_intel_dspcfg nouveau snd_hda_codec mac80211 crypto_simd mei_hdcp cryptd glue_helper snd_seq_midi snd_seq_midi_event snd_rawmidi cfg80211 libarc4 ttm i915 snd_seq snd_hda_core mei_me snd_hwdep snd_pcm mei intel_cstate intel_rapl_perf lg_laptop snd_seq_device
[ 298.582713] snd_timer drm_kms_helper snd i2c_algo_bit fb_sys_fops syscopyarea soundcore sysfillrect sysimgblt sparse_keymap mxm_wmi wmi_bmof input_leds mac_hid serio_raw sch_fq_codel parport_pc ppdev lp parport drm ip_tables x_tables autofs4 ums_realtek uas usb_storage hid_generic usbhid hid crc32_pclmul ahci psmouse lpc_ich libahci i2c_i801 atl1c wmi video
[ 298.582726] CR2: 0000000000000058
[ 298.582728] ---[ end trace 92a2bff8887bc92b ]---
[ 298.582737] RIP: 0010:nfsd_fill_super+0x72/0x90 [nfsd]
[ 298.582738] Code: 89 c4 85 c0 74 0a 5b 44 89 e0 41 5c 41 5d 5d c3 48 8b 7b 68 31 f6 48 c7 c2 e3 75 0e c1 e8 96 fe ff ff 48 3d 00 f0 ff ff 77 0e <49> 89 45 58 44 89 e0 5b 41 5c 41 5d 5d c3 41 89 c4 eb c9 66 66 2e
[ 298.582739] RSP: 0018:ffffb13a4343ba90 EFLAGS: 00010287
[ 298.582741] RAX: ffff9a6e9c20b0c0 RBX: ffff9a6ec5543800 RCX: 0000000000000002
[ 298.582742] RDX: 0000000000000000 RSI: 0000000000000100 RDI: ffff9a6e9c295ac0
[ 298.582743] RBP: ffffb13a4343baa8 R08: ffff9a6e9c20b0e0 R09: 0000000000000000
[ 298.582744] R10: 0000000000000000 R11: fefefefefefefeff R12: 0000000000000000
[ 298.582745] R13: 0000000000000000 R14: ffffffffc10abf10 R15: 0000000000000000
[ 298.582746] FS: 00007f28c3160740(0000) GS:ffff9a6ef78c0000(0000) knlGS:0000000000000000
[ 298.582747] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 298.582748] CR2: 0000000000000058 CR3: 0000000201c1e004 CR4: 00000000000606e0
docker/for-linux的问题跟踪链接,里面有详细的复现路径和问题描述。具体的分析过程我会专门写一篇文章,发表在“泰晓科技”公众号上。
https://github.com/docker/for-linux/issues/996
Product: OptiPlex 7020
Board: 0F5C5X
ARCH: x86_64
CPU: 4 x Intel(R) Core(TM) i5-4590 CPU @ 3.30GHz
RAM: 11890 MiB
System: Ubuntu 18.04.4 LTS, bionic
Linux: 5.3.0-51-generic
Docker: Docker version 19.03.8, build afacb8b7f0
Shell: /bin/bash 4.4.20(1)-release
@RXD workaround patch 地址如下:
https://gitee.com/tinylab/cloud-lab/commit/a7c5a09f22a684df9ecea47b5fee02b0c8725750
This is the patch:
https://patchwork.kernel.org/patch/11604115/
这个 bug 已经解决了,内核的 patch 如下:
commit 681370f4b00af0fcc65bbfb9f82de526ab7ceb0a
Author: J. Bruce Fields <bfields@redhat.com>
Date: Tue Jun 23 16:00:33 2020 -0400
nfsd4: fix nfsdfs reference count loop
We don't drop the reference on the nfsdfs filesystem with
mntput(nn->nfsd_mnt) until nfsd_exit_net(), but that won't be called
until the nfsd module's unloaded, and we can't unload the module as long
as there's a reference on nfsdfs. So this prevents module unloading.
Fixes: 2c830dd7209b ("nfsd: persist nfsd filesystem across mounts")
Reported-and-Tested-by: Luo Xiaogang <lxgrxd@163.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bb3d2c3..cce2510 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -7912,9 +7912,14 @@ static int nfs4_state_create_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
- ret = nfs4_state_create_net(net);
+ ret = get_nfsdfs(net);
if (ret)
return ret;
+ ret = nfs4_state_create_net(net);
+ if (ret) {
+ mntput(nn->nfsd_mnt);
+ return ret;
+ }
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
if (nn->track_reclaim_completes && nn->reclaim_str_hashtbl_size == 0)
@@ -7984,6 +7989,7 @@ static int nfs4_state_create_net(struct net *net)
nfsd4_client_tracking_exit(net);
nfs4_state_destroy_net(net);
+ mntput(nn->nfsd_mnt);
}
void
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index b68e966..cf98a81 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1424,6 +1424,18 @@ static void nfsd_umount(struct super_block *sb)
};
MODULE_ALIAS_FS("nfsd");
+int get_nfsdfs(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct vfsmount *mnt;
+
+ mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
+ if (IS_ERR(mnt))
+ return PTR_ERR(mnt);
+ nn->nfsd_mnt = mnt;
+ return 0;
+}
+
#ifdef CONFIG_PROC_FS
static int create_proc_exports_entry(void)
{
@@ -1451,7 +1463,6 @@ static int create_proc_exports_entry(void)
static __net_init int nfsd_init_net(struct net *net)
{
int retval;
- struct vfsmount *mnt;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
retval = nfsd_export_init(net);
@@ -1478,16 +1489,8 @@ static __net_init int nfsd_init_net(struct net *net)
init_waitqueue_head(&nn->ntf_wq);
seqlock_init(&nn->boot_lock);
- mnt = vfs_kern_mount(&nfsd_fs_type, SB_KERNMOUNT, "nfsd", NULL);
- if (IS_ERR(mnt)) {
- retval = PTR_ERR(mnt);
- goto out_mount_err;
- }
- nn->nfsd_mnt = mnt;
return 0;
-out_mount_err:
- nfsd_reply_cache_shutdown(nn);
out_drc_error:
nfsd_idmap_shutdown(net);
out_idmap_error:
@@ -1500,7 +1503,6 @@ static __net_exit void nfsd_exit_net(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- mntput(nn->nfsd_mnt);
nfsd_reply_cache_shutdown(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 36cdd81..57c832d 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -90,6 +90,8 @@ struct readdir_cd {
bool i_am_nfsd(void);
+int get_nfsdfs(struct net *);
+
struct nfsdfs_client {
struct kref cl_ref;
void (*cl_release)(struct kref *kref);
@@ -100,6 +102,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn,
struct nfsdfs_client *ncl, u32 id, const struct tree_descr *);
void nfsd_client_rmdir(struct dentry *dentry);
+
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
extern const struct svc_version nfsd_acl_version2;
Sign in to comment