diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4fb26194ed66c23c1e2a75f78d75d1ba2e1e0b7e..7533a7484dbe5b8048f82803d514c532b99caf3b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -477,12 +477,19 @@ static void nvme_free_ns_head(struct kref *ref) { struct nvme_ns_head *head = container_of(ref, struct nvme_ns_head, ref); + struct nvme_ns_head_wrapper *head_wrapper = + container_of(head, struct nvme_ns_head_wrapper, head); nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); - kfree(head); + kfree(head_wrapper); +} + +static bool nvme_tryget_ns_head(struct nvme_ns_head *head) +{ + return kref_get_unless_zero(&head->ref); } static void nvme_put_ns_head(struct nvme_ns_head *head) @@ -2312,9 +2319,7 @@ static const struct block_device_operations nvme_fops = { #ifdef CONFIG_NVME_MULTIPATH static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode) { - struct nvme_ns_head *head = bdev->bd_disk->private_data; - - if (!kref_get_unless_zero(&head->ref)) + if (!nvme_tryget_ns_head(bdev->bd_disk->private_data)) return -ENXIO; return 0; } @@ -3707,7 +3712,9 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id == nsid && kref_get_unless_zero(&h->ref)) + if (h->ns_id != nsid) + continue; + if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; } @@ -3732,17 +3739,19 @@ static int nvme_subsys_check_duplicate_ids(struct nvme_subsystem *subsys, static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, unsigned nsid, struct nvme_ns_ids *ids) { + struct nvme_ns_head_wrapper *head_wrapper; struct nvme_ns_head *head; - size_t size = sizeof(*head); + size_t size = sizeof(*head_wrapper); int ret = -ENOMEM; #ifdef CONFIG_NVME_MULTIPATH size += num_possible_nodes() * sizeof(struct nvme_ns *); #endif - head = kzalloc(size, GFP_KERNEL); - if (!head) + head_wrapper = kzalloc(size, GFP_KERNEL); + if (!head_wrapper) goto out; + head = &head_wrapper->head; ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL); if (ret < 0) goto out_free_head; @@ -3784,7 +3793,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, out_ida_remove: ida_simple_remove(&ctrl->subsys->ns_ida, head->instance); out_free_head: - kfree(head); + kfree(head_wrapper); out: if (ret > 0) ret = blk_status_to_errno(nvme_error_status(ret)); @@ -3960,6 +3969,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, static void nvme_ns_remove(struct nvme_ns *ns) { + bool last_path = false; + if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -3968,8 +3979,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); - if (list_empty(&ns->head->list)) + if (list_empty(&ns->head->list)) { list_del_init(&ns->head->entry); + last_path = true; + } mutex_unlock(&ns->ctrl->subsys->lock); synchronize_rcu(); /* guarantee not available in head->list */ @@ -3987,7 +4000,8 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - nvme_mpath_check_last_path(ns); + if (last_path) + nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 379d6818a0635d69319346348165142dd59c132d..c79b67989a22122d900e1cd78364cfd4ce9c9b58 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -277,6 +277,9 @@ static bool nvme_available_path(struct nvme_ns_head *head) { struct nvme_ns *ns; + if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) + return NULL; + list_for_each_entry_rcu(ns, &head->list, siblings) { switch (ns->ctrl->state) { case NVME_CTRL_LIVE: @@ -332,6 +335,27 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) return ret; } +static void nvme_partition_scan_work(struct work_struct *work) +{ + struct nvme_ns_head_wrapper *head_wrapper = + container_of(work, struct nvme_ns_head_wrapper, partition_scan_work); + struct nvme_ns_head *head = &head_wrapper->head; + struct block_device *bdev; + + if (WARN_ON_ONCE(!test_and_clear_bit(GENHD_FL_NO_PART_SCAN, + &head->disk->state))) + return; + + bdev = bdget_part(&head->disk->part0); + if (!bdev) + return; + + mutex_lock(&bdev->bd_mutex); + bdev_disk_changed(bdev, false); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); +} + static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = @@ -358,12 +382,15 @@ static void nvme_requeue_work(struct work_struct *work) int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) { struct request_queue *q; + struct nvme_ns_head_wrapper *head_wrapper = + container_of(head, struct nvme_ns_head_wrapper, head); bool vwc = false; mutex_init(&head->lock); bio_list_init(&head->requeue_list); spin_lock_init(&head->requeue_lock); INIT_WORK(&head->requeue_work, nvme_requeue_work); + INIT_WORK(&head_wrapper->partition_scan_work, nvme_partition_scan_work); /* * Add a multipath node if the subsystems supports multiple controllers. @@ -393,6 +420,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) head->disk->private_data = head; head->disk->queue = q; head->disk->flags = GENHD_FL_EXT_DEVT; + + /* + * We need to suppress the partition scan from occuring within the + * controller's scan_work context. If a path error occurs here, the IO + * will wait until a path becomes available or all paths are torn down, + * but that action also occurs within scan_work, so it would deadlock. + * Defer the partion scan to a different context that does not block + * scan_work. + */ + set_bit(GENHD_FL_NO_PART_SCAN, &head->disk->state); sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); return 0; @@ -406,13 +443,17 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) static void nvme_mpath_set_live(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; + struct nvme_ns_head_wrapper *head_wrapper = + container_of(head, struct nvme_ns_head_wrapper, head); if (!head->disk) return; - if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) + if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); + kblockd_schedule_work(&head_wrapper->partition_scan_work); + } mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { @@ -709,16 +750,33 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) #endif } -void nvme_mpath_remove_disk(struct nvme_ns_head *head) +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (head->disk->flags & GENHD_FL_UP) + if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + /* + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared + * to allow multipath to fail all I/O. + */ + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); del_gendisk(head->disk); + } +} + +void nvme_mpath_remove_disk(struct nvme_ns_head *head) +{ + struct nvme_ns_head_wrapper *head_wrapper = + container_of(head, struct nvme_ns_head_wrapper, head); + + if (!head->disk) + return; blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); + flush_work(&head_wrapper->partition_scan_work); blk_cleanup_queue(head->disk->queue); if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { /* diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7bb773aca705e56fbd7a4f9fdc6c76cb0af4ff3f..61ce3d4ee64e87fa38b1ff48b3a875c36ff140d4 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -429,6 +429,13 @@ struct nvme_ns_head { #endif }; +struct nvme_ns_head_wrapper { +#ifdef CONFIG_NVME_MULTIPATH + struct work_struct partition_scan_work; +#endif + struct nvme_ns_head head; +}; + enum nvme_ns_features { NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ @@ -730,14 +737,7 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); blk_qc_t nvme_ns_head_submit_bio(struct bio *bio); - -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head->disk && list_empty(&head->list)) - kblockd_schedule_work(&head->requeue_work); -} +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); static inline void nvme_trace_bio_complete(struct request *req, blk_status_t status) @@ -792,7 +792,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) +static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { } static inline void nvme_trace_bio_complete(struct request *req, diff --git a/fs/block_dev.c b/fs/block_dev.c index a0e4d3ec300ea2e69592b040d2a395c0d17dae92..77fc1c0b546071d500fcacdaa77276571c83a999 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -976,6 +976,7 @@ struct block_device *bdget_part(struct hd_struct *part) { return bdget(part_devt(part)); } +EXPORT_SYMBOL_GPL(bdget_part); long nr_blockdev_pages(void) {