From 159a82c553deee2005e29a87ac4e4b09e9b576d8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Apr 2024 12:31:19 +0800 Subject: [PATCH 1/5] nfs: only issue commit in DIO codepath if we have uncommitted data ANBZ: #8717 commit 69d966510d9f5de81588b37d23a9ee8ccc477b23 upstream. Currently, we try to determine whether to issue a commit based on nfs_write_need_commit which looks at the current verifier. In the case where we got a short write and then tried to follow it up with one that failed, the verifier can't be trusted. What we really want to know is whether the pgio request had any successful writes that came back as UNSTABLE. Add a new flag to the pgio request, and use that to indicate that we've had a successful unstable write. Only issue a commit if that flag is set. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Zhao Qiang --- fs/nfs/direct.c | 2 +- fs/nfs/write.c | 48 +++++++++++++++++++++++++---------------- include/linux/nfs_xdr.h | 1 + 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index c220810c61d1..d854c6ba8e4a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -716,7 +716,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } nfs_direct_count_bytes(dreq, hdr); - if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) { + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { if (!dreq->flags) dreq->flags = NFS_ODIRECT_DO_COMMIT; flags = dreq->flags; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index dc08a0c02f09..012972cb4c78 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1546,25 +1546,37 @@ static int nfs_writeback_done(struct rpc_task *task, nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); trace_nfs_writeback_done(task, hdr); - if (hdr->res.verf->committed < hdr->args.stable && - task->tk_status >= 0) { - /* We tried a write call, but the server did not - * commit data to stable storage even though we - * requested it. - * Note: There is a known bug in Tru64 < 5.0 in which - * the server reports NFS_DATA_SYNC, but performs - * NFS_FILE_SYNC. We therefore implement this checking - * as a dprintk() in order to avoid filling syslog. - */ - static unsigned long complain; + if (task->tk_status >= 0) { + enum nfs3_stable_how committed = hdr->res.verf->committed; + + if (committed == NFS_UNSTABLE) { + /* + * We have some uncommitted data on the server at + * this point, so ensure that we keep track of that + * fact irrespective of what later writes do. + */ + set_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags); + } - /* Note this will print the MDS for a DS write */ - if (time_before(complain, jiffies)) { - dprintk("NFS: faulty NFS server %s:" - " (committed = %d) != (stable = %d)\n", - NFS_SERVER(inode)->nfs_client->cl_hostname, - hdr->res.verf->committed, hdr->args.stable); - complain = jiffies + 300 * HZ; + if (committed < hdr->args.stable) { + /* We tried a write call, but the server did not + * commit data to stable storage even though we + * requested it. + * Note: There is a known bug in Tru64 < 5.0 in which + * the server reports NFS_DATA_SYNC, but performs + * NFS_FILE_SYNC. We therefore implement this checking + * as a dprintk() in order to avoid filling syslog. + */ + static unsigned long complain; + + /* Note this will print the MDS for a DS write */ + if (time_before(complain, jiffies)) { + dprintk("NFS: faulty NFS server %s:" + " (committed = %d) != (stable = %d)\n", + NFS_SERVER(inode)->nfs_client->cl_hostname, + committed, hdr->args.stable); + complain = jiffies + 300 * HZ; + } } } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5491ad5f48a9..c980a3deee03 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1580,6 +1580,7 @@ enum { NFS_IOHDR_STAT, NFS_IOHDR_RESEND_PNFS, NFS_IOHDR_RESEND_MDS, + NFS_IOHDR_UNSTABLE_WRITES, }; struct nfs_io_completion; -- Gitee From 6806e85a134541f515267216a15c715b557228a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Apr 2024 19:54:48 +0800 Subject: [PATCH 2/5] NFS: Fix error handling for O_DIRECT write scheduling ANBZ: #8717 commit 954998b60caa8f2a3bf3abe490de6f08d283687a upstream. If we fail to schedule a request for transmission, there are 2 possibilities: 1) Either we hit a fatal error, and we just want to drop the remaining requests on the floor. 2) We were asked to try again, in which case we should allow the outstanding RPC calls to complete, so that we can recoalesce requests and try again. Fixes: d600ad1f2bdb ("NFS41: pop some layoutget errors to application") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Zhao Qiang --- fs/nfs/direct.c | 62 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d854c6ba8e4a..f2b044673292 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -541,10 +541,9 @@ nfs_direct_write_scan_commit_list(struct inode *inode, static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; - struct nfs_page *req, *tmp; + struct nfs_page *req; LIST_HEAD(reqs); struct nfs_commit_info cinfo; - LIST_HEAD(failed); nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); @@ -562,27 +561,36 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; - list_for_each_entry_safe(req, tmp, &reqs, wb_list) { + while (!list_empty(&reqs)) { + req = nfs_list_entry(reqs.next); /* Bump the transmission count */ req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { - nfs_list_move_request(req, &failed); spin_lock(&cinfo.inode->i_lock); - dreq->flags = 0; - if (desc.pg_error < 0) + if (dreq->error < 0) { + desc.pg_error = dreq->error; + } else if (desc.pg_error != -EAGAIN) { + dreq->flags = 0; + if (!desc.pg_error) + desc.pg_error = -EIO; dreq->error = desc.pg_error; - else - dreq->error = -EIO; + } else + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; spin_unlock(&cinfo.inode->i_lock); + break; } nfs_release_request(req); } nfs_pageio_complete(&desc); - while (!list_empty(&failed)) { - req = nfs_list_entry(failed.next); + while (!list_empty(&reqs)) { + req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); nfs_unlock_and_release_request(req); + if (desc.pg_error == -EAGAIN) + nfs_mark_request_commit(req, NULL, &cinfo, 0); + else + nfs_release_request(req); } if (put_dreq(dreq)) @@ -797,9 +805,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; + struct nfs_commit_info cinfo; ssize_t result = 0; size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); + bool defer = false; nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); @@ -839,19 +849,39 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, break; } + pgbase = 0; + bytes -= req_len; + requested_bytes += req_len; + pos += req_len; + dreq->bytes_left -= req_len; + + if (defer) { + nfs_mark_request_commit(req, NULL, &cinfo, 0); + continue; + } + nfs_lock_request(req); req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; - if (!nfs_pageio_add_request(&desc, req)) { + if (nfs_pageio_add_request(&desc, req)) + continue; + + /* Exit on hard errors */ + if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) { result = desc.pg_error; nfs_unlock_and_release_request(req); break; } - pgbase = 0; - bytes -= req_len; - requested_bytes += req_len; - pos += req_len; - dreq->bytes_left -= req_len; + + /* If the error is soft, defer remaining requests */ + nfs_init_cinfo_from_dreq(&cinfo, dreq); + spin_lock(&cinfo.inode->i_lock); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&cinfo.inode->i_lock); + nfs_unlock_request(req); + nfs_mark_request_commit(req, NULL, &cinfo, 0); + desc.pg_error = 0; + defer = true; } nfs_direct_release_pages(pagevec, npages); kvfree(pagevec); -- Gitee From 4529986f567d6af5fa5cea2c52bd7ef92c5b1931 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Apr 2024 20:01:54 +0800 Subject: [PATCH 3/5] NFS: Fix O_DIRECT locking issues ANBZ: #8717 commit 7c6339322ce0c6128acbe36aacc1eeb986dd7bf1 upstream. The dreq fields are protected by the dreq->lock. Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Zhao Qiang --- fs/nfs/direct.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f2b044673292..4910c03bb092 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -566,7 +566,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) /* Bump the transmission count */ req->wb_nio++; if (!nfs_pageio_add_request(&desc, req)) { - spin_lock(&cinfo.inode->i_lock); + spin_lock(&dreq->lock); if (dreq->error < 0) { desc.pg_error = dreq->error; } else if (desc.pg_error != -EAGAIN) { @@ -576,7 +576,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->error = desc.pg_error; } else dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - spin_unlock(&cinfo.inode->i_lock); + spin_unlock(&dreq->lock); break; } nfs_release_request(req); @@ -875,9 +875,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, /* If the error is soft, defer remaining requests */ nfs_init_cinfo_from_dreq(&cinfo, dreq); - spin_lock(&cinfo.inode->i_lock); + spin_lock(&dreq->lock); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - spin_unlock(&cinfo.inode->i_lock); + spin_unlock(&dreq->lock); nfs_unlock_request(req); nfs_mark_request_commit(req, NULL, &cinfo, 0); desc.pg_error = 0; -- Gitee From 196b8df9a8d6250934b1930fcaab21f2bc23fc8c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Apr 2024 20:18:24 +0800 Subject: [PATCH 4/5] pNFS: Fix the pnfs block driver's calculation of layoutget size ANBZ: #8717 commit 8a6291bf3b0eae1bf26621e6419a91682f2d6227 upstream. Instead of relying on the value of the 'bytes_left' field, we should calculate the layout size based on the offset of the request that is being written out. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling") Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker Signed-off-by: Zhao Qiang --- fs/nfs/blocklayout/blocklayout.c | 5 ++--- fs/nfs/direct.c | 5 +++-- fs/nfs/internal.h | 2 +- fs/nfs/pnfs.c | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 08108b6d2fa1..d98042c8ba4c 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -917,10 +917,9 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) } if (pgio->pg_dreq == NULL) - wb_size = pnfs_num_cont_bytes(pgio->pg_inode, - req->wb_index); + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); else - wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req)); pnfs_generic_pg_init_write(pgio, req, wb_size); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4910c03bb092..45a30d61fc73 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -229,9 +229,10 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } -ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset) { - return dreq->bytes_left; + loff_t start = offset - dreq->io_start; + return dreq->max_count - start; } EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a7e0970b5bfe..3590aa1335cf 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -605,7 +605,7 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); -extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset); /* nfs4proc.c */ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 21436721745b..b9c024053391 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2715,7 +2715,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); else - rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq, + req_offset(req)); pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), -- Gitee From 4bd2e42ec3d30df0b7a5b434789e9e854a08e4fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 15 Apr 2024 20:36:32 +0800 Subject: [PATCH 5/5] NFS: More O_DIRECT accounting fixes for error paths ANBZ: #8717 commit 8982f7aff39fb526aba4441fff2525fcedd5e1a3 upstream. If we hit a fatal error when retransmitting, we do need to record the removal of the request from the count of written bytes. Fixes: 031d73ed768a ("NFS: Fix O_DIRECT accounting of number of bytes read/written") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Zhao Qiang --- fs/nfs/direct.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 45a30d61fc73..a37ed0744570 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -124,12 +124,10 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq, dreq->max_count = dreq_len; if (dreq->count > dreq_len) dreq->count = dreq_len; - - if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) - dreq->error = hdr->error; - else /* Clear outstanding error if this is EOF */ - dreq->error = 0; } + + if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error) + dreq->error = hdr->error; } static void @@ -151,6 +149,18 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq, dreq->count = dreq_len; } +static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, + struct nfs_page *req) +{ + loff_t offs = req_offset(req); + size_t req_start = (size_t)(offs - dreq->io_start); + + if (req_start < dreq->max_count) + dreq->max_count = req_start; + if (req_start < dreq->count) + dreq->count = req_start; +} + /** * nfs_direct_IO - NFS address space operation for direct I/O * @iocb: target I/O control block @@ -551,10 +561,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_direct_join_group(&reqs, dreq->inode); - dreq->count = 0; - dreq->max_count = 0; - list_for_each_entry(req, &reqs, wb_list) - dreq->max_count += req->wb_bytes; nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); get_dreq(dreq); @@ -588,10 +594,14 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); nfs_unlock_and_release_request(req); - if (desc.pg_error == -EAGAIN) + if (desc.pg_error == -EAGAIN) { nfs_mark_request_commit(req, NULL, &cinfo, 0); - else + } else { + spin_lock(&dreq->lock); + nfs_direct_truncate_request(dreq, req); + spin_unlock(&dreq->lock); nfs_release_request(req); + } } if (put_dreq(dreq)) @@ -609,8 +619,6 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) if (status < 0) { /* Errors in commit are fatal */ dreq->error = status; - dreq->max_count = 0; - dreq->count = 0; dreq->flags = NFS_ODIRECT_DONE; } else if (dreq->flags == NFS_ODIRECT_DONE) status = dreq->error; @@ -620,7 +628,12 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - if (status >= 0 && !nfs_write_match_verf(verf, req)) { + if (status < 0) { + spin_lock(&dreq->lock); + nfs_direct_truncate_request(dreq, req); + spin_unlock(&dreq->lock); + nfs_release_request(req); + } else if (!nfs_write_match_verf(verf, req)) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; /* * Despite the reboot, the write was successful, @@ -628,7 +641,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) */ req->wb_nio = 0; nfs_mark_request_commit(req, NULL, &cinfo, 0); - } else /* Error or match */ + } else nfs_release_request(req); nfs_unlock_and_release_request(req); } @@ -679,6 +692,7 @@ static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) while (!list_empty(&reqs)) { req = nfs_list_entry(reqs.next); nfs_list_remove_request(req); + nfs_direct_truncate_request(dreq, req); nfs_release_request(req); nfs_unlock_and_release_request(req); } @@ -725,7 +739,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } nfs_direct_count_bytes(dreq, hdr); - if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags)) { + if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) && + !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (!dreq->flags) dreq->flags = NFS_ODIRECT_DO_COMMIT; flags = dreq->flags; -- Gitee