diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c92d25b71a72844978661adc0a3ccf452b4a3dd1..0a37d15caae4f8bf29263f59a664d8ae66d324a1 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -131,6 +131,7 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) do { struct request *rq; + int budget_token; if (e->type->ops.has_work && !e->type->ops.has_work(hctx)) break; @@ -140,12 +141,13 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) break; } - if (!blk_mq_get_dispatch_budget(q)) + budget_token = blk_mq_get_dispatch_budget(q); + if (budget_token < 0) break; rq = e->type->ops.dispatch_request(hctx); if (!rq) { - blk_mq_put_dispatch_budget(q); + blk_mq_put_dispatch_budget(q, budget_token); /* * We're releasing without dispatching. Holding the * budget could have blocked any "hctx"s with the @@ -157,6 +159,8 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) break; } + blk_mq_set_rq_budget_token(rq, budget_token); + /* * Now this rq owns the budget which has to be released * if this rq won't be queued to driver via .queue_rq() @@ -237,6 +241,8 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) struct request *rq; do { + int budget_token; + if (!list_empty_careful(&hctx->dispatch)) { ret = -EAGAIN; break; @@ -245,12 +251,13 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) if (!sbitmap_any_bit_set(&hctx->ctx_map)) break; - if (!blk_mq_get_dispatch_budget(q)) + budget_token = blk_mq_get_dispatch_budget(q); + if (budget_token < 0) break; rq = blk_mq_dequeue_from_ctx(hctx, ctx); if (!rq) { - blk_mq_put_dispatch_budget(q); + blk_mq_put_dispatch_budget(q, budget_token); /* * We're releasing without dispatching. Holding the * budget could have blocked any "hctx"s with the @@ -262,6 +269,8 @@ static int blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) break; } + blk_mq_set_rq_budget_token(rq, budget_token); + /* * Now this rq owns the budget which has to be released * if this rq won't be queued to driver via .queue_rq() diff --git a/block/blk-mq.c b/block/blk-mq.c index 376f1661645c0c6adf54198cd2e6a79d93bcbe0f..5a9c02d0199cb2296ec1e70ab4243be0676d1a84 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1391,10 +1391,15 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq, bool need_budget) { struct blk_mq_hw_ctx *hctx = rq->mq_hctx; + int budget_token = -1; - if (need_budget && !blk_mq_get_dispatch_budget(rq->q)) { - blk_mq_put_driver_tag(rq); - return PREP_DISPATCH_NO_BUDGET; + if (need_budget) { + budget_token = blk_mq_get_dispatch_budget(rq->q); + if (budget_token < 0) { + blk_mq_put_driver_tag(rq); + return PREP_DISPATCH_NO_BUDGET; + } + blk_mq_set_rq_budget_token(rq, budget_token); } if (!blk_mq_get_driver_tag(rq)) { @@ -1411,7 +1416,7 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq, * together during handling partial dispatch */ if (need_budget) - blk_mq_put_dispatch_budget(rq->q); + blk_mq_put_dispatch_budget(rq->q, budget_token); return PREP_DISPATCH_NO_TAG; } } @@ -1421,12 +1426,16 @@ static enum prep_dispatch blk_mq_prep_dispatch_rq(struct request *rq, /* release all allocated budgets before calling to blk_mq_dispatch_rq_list */ static void blk_mq_release_budgets(struct request_queue *q, - unsigned int nr_budgets) + struct list_head *list) { - int i; + struct request *rq; - for (i = 0; i < nr_budgets; i++) - blk_mq_put_dispatch_budget(q); + list_for_each_entry(rq, list, queuelist) { + int budget_token = blk_mq_get_rq_budget_token(rq); + + if (budget_token >= 0) + blk_mq_put_dispatch_budget(q, budget_token); + } } /* @@ -1529,7 +1538,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) || blk_mq_is_sbitmap_shared(hctx->flags)); - blk_mq_release_budgets(q, nr_budgets); + if (nr_budgets) + blk_mq_release_budgets(q, list); spin_lock(&hctx->lock); list_splice_tail_init(list, &hctx->dispatch); @@ -2179,6 +2189,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, { struct request_queue *q = rq->q; bool run_queue = true; + int budget_token; /* * RCU or SRCU read lock is needed before checking quiesced flag. @@ -2196,11 +2207,14 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (q->elevator && !bypass_insert) goto insert; - if (!blk_mq_get_dispatch_budget(q)) + budget_token = blk_mq_get_dispatch_budget(q); + if (budget_token < 0) goto insert; + blk_mq_set_rq_budget_token(rq, budget_token); + if (!blk_mq_get_driver_tag(rq)) { - blk_mq_put_dispatch_budget(q); + blk_mq_put_dispatch_budget(q, budget_token); goto insert; } @@ -2967,7 +2981,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, goto free_cpumask; if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8), - gfp, node)) + gfp, node, false, false)) goto free_ctxs; hctx->nr_ctx = 0; diff --git a/block/blk-mq.h b/block/blk-mq.h index 7bb0b82bfbe972f522959c85d411787fec761bcc..20bceea10081b4cd87836e0cd62614613cc16bcf 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -203,17 +203,34 @@ unsigned int blk_mq_in_flight_with_stat(struct request_queue *q, struct hd_struct *part); #endif -static inline void blk_mq_put_dispatch_budget(struct request_queue *q) +static inline void blk_mq_put_dispatch_budget(struct request_queue *q, + int budget_token) { if (q->mq_ops->put_budget) - q->mq_ops->put_budget(q); + q->mq_ops->put_budget(q, budget_token); } -static inline bool blk_mq_get_dispatch_budget(struct request_queue *q) +static inline int blk_mq_get_dispatch_budget(struct request_queue *q) { if (q->mq_ops->get_budget) return q->mq_ops->get_budget(q); - return true; + return 0; +} + +static inline void blk_mq_set_rq_budget_token(struct request *rq, int token) +{ + if (token < 0) + return; + + if (rq->q->mq_ops->set_rq_budget_token) + rq->q->mq_ops->set_rq_budget_token(rq, token); +} + +static inline int blk_mq_get_rq_budget_token(struct request *rq) +{ + if (rq->q->mq_ops->get_rq_budget_token) + return rq->q->mq_ops->get_rq_budget_token(rq); + return -1; } static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 3036f087ec7bd5706d21401ddef2636954b0d9f3..fbf8f7e0024122ce161322d14bfe07e1373a8297 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -476,7 +476,8 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) for (i = 0; i < KYBER_NUM_DOMAINS; i++) { if (sbitmap_init_node(&khd->kcq_map[i], hctx->nr_ctx, - ilog2(8), GFP_KERNEL, hctx->numa_node)) { + ilog2(8), GFP_KERNEL, hctx->numa_node, + false, false)) { while (--i >= 0) sbitmap_free(&khd->kcq_map[i]); goto err_kcqs; diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 18b91ea1a353f21c7ec85936cfc6faf51267af4e..25dd9b2e12abdb302ea463e1e114130b4310c64b 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -3756,7 +3756,7 @@ mptsas_send_link_status_event(struct fw_event_work *fw_event) printk(MYIOC_s_DEBUG_FMT "SDEV OUTSTANDING CMDS" "%d\n", ioc->name, - atomic_read(&sdev->device_busy))); + scsi_device_busy(sdev))); } } diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h index f78cb87e46fa4a710c08f999abbdf3275c027cb0..40767eface53ee903de1d8820e4a83bd26b43dba 100644 --- a/drivers/scsi/megaraid/megaraid_sas.h +++ b/drivers/scsi/megaraid/megaraid_sas.h @@ -2021,10 +2021,12 @@ union megasas_frame { * struct MR_PRIV_DEVICE - sdev private hostdata * @is_tm_capable: firmware managed tm_capable flag * @tm_busy: TM request is in progress + * @sdev_priv_busy: pending command per sdev */ struct MR_PRIV_DEVICE { bool is_tm_capable; bool tm_busy; + atomic_t sdev_priv_busy; atomic_t r1_ldio_hint; u8 interface_type; u8 task_abort_tmo; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 85942c145502080ecc7827e75de9efe5d865ea1b..badbff720df7fe094cceac5749e7fe301dd6219f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -220,6 +220,40 @@ megasas_clear_intr_fusion(struct megasas_instance *instance) return 1; } +static inline void +megasas_sdev_busy_inc(struct megasas_instance *instance, + struct scsi_cmnd *scmd) +{ + if (instance->perf_mode == MR_BALANCED_PERF_MODE) { + struct MR_PRIV_DEVICE *mr_device_priv_data = + scmd->device->hostdata; + atomic_inc(&mr_device_priv_data->sdev_priv_busy); + } +} + +static inline void +megasas_sdev_busy_dec(struct megasas_instance *instance, + struct scsi_cmnd *scmd) +{ + if (instance->perf_mode == MR_BALANCED_PERF_MODE) { + struct MR_PRIV_DEVICE *mr_device_priv_data = + scmd->device->hostdata; + atomic_dec(&mr_device_priv_data->sdev_priv_busy); + } +} + +static inline int +megasas_sdev_busy_read(struct megasas_instance *instance, + struct scsi_cmnd *scmd) +{ + if (instance->perf_mode == MR_BALANCED_PERF_MODE) { + struct MR_PRIV_DEVICE *mr_device_priv_data = + scmd->device->hostdata; + return atomic_read(&mr_device_priv_data->sdev_priv_busy); + } + return 0; +} + /** * megasas_get_cmd_fusion - Get a command from the free pool * @instance: Adapter soft state @@ -357,15 +391,9 @@ megasas_get_msix_index(struct megasas_instance *instance, struct megasas_cmd_fusion *cmd, u8 data_arms) { - int sdev_busy; - - /* TBD - if sml remove device_busy in future, driver - * should track counter in internal structure. - */ - sdev_busy = atomic_read(&scmd->device->device_busy); - if (instance->perf_mode == MR_BALANCED_PERF_MODE && - sdev_busy > (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH)) { + (megasas_sdev_busy_read(instance, scmd) > + (data_arms * MR_DEVICE_HIGH_IOPS_DEPTH))) { cmd->request_desc->SCSIIO.MSIxIndex = mega_mod64((atomic64_add_return(1, &instance->high_iops_outstanding) / MR_HIGH_IOPS_BATCH_COUNT), instance->low_latency_index_start); @@ -3390,6 +3418,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance, * Issue the command to the FW */ + megasas_sdev_busy_inc(instance, scmd); megasas_fire_cmd_fusion(instance, req_desc); if (r1_cmd) @@ -3450,6 +3479,7 @@ megasas_complete_r1_command(struct megasas_instance *instance, scmd_local->SCp.ptr = NULL; megasas_return_cmd_fusion(instance, cmd); scsi_dma_unmap(scmd_local); + megasas_sdev_busy_dec(instance, scmd_local); scmd_local->scsi_done(scmd_local); } } @@ -3558,6 +3588,7 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex, scmd_local->SCp.ptr = NULL; megasas_return_cmd_fusion(instance, cmd_fusion); scsi_dma_unmap(scmd_local); + megasas_sdev_busy_dec(instance, scmd_local); scmd_local->scsi_done(scmd_local); } else /* Optimal VD - R1 FP command completion. */ megasas_complete_r1_command(instance, cmd_fusion); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index c4a322c4b83ca61ba7a4841b3575f75cfc1444b7..e69394c00b39197f2f45f814bffeef0a484651b5 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3255,7 +3255,7 @@ scsih_dev_reset(struct scsi_cmnd *scmd) MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 0, tr_timeout, tr_method); /* Check for busy commands after reset */ - if (r == SUCCESS && atomic_read(&scmd->device->device_busy)) + if (r == SUCCESS && scsi_device_busy(scmd->device)) r = FAILED; out: sdev_printk(KERN_INFO, scmd->device, "device reset: %s scmd(0x%p)\n", diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 57b0dc08afeb45e8e20e1d5027af86c936e89fc9..77cb26bb4df4aacd7d79484ba39ccb7b2b53fde6 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -214,6 +214,15 @@ void scsi_finish_command(struct scsi_cmnd *cmd) scsi_io_completion(cmd, good_bytes); } + +/* + * 1024 is big enough for saturating fast SCSI LUNs. + */ +int scsi_device_max_queue_depth(struct scsi_device *sdev) +{ + return min_t(int, sdev->host->can_queue, 1024); +} + /** * scsi_change_queue_depth - change a device's queue depth * @sdev: SCSI Device in question @@ -223,6 +232,8 @@ void scsi_finish_command(struct scsi_cmnd *cmd) */ int scsi_change_queue_depth(struct scsi_device *sdev, int depth) { + depth = min_t(int, depth, scsi_device_max_queue_depth(sdev)); + if (depth > 0) { sdev->queue_depth = depth; wmb(); @@ -231,6 +242,8 @@ int scsi_change_queue_depth(struct scsi_device *sdev, int depth) if (sdev->request_queue) blk_set_queue_depth(sdev->request_queue, depth); + sbitmap_resize(sdev->budget_map, sdev->queue_depth); + return sdev->queue_depth; } EXPORT_SYMBOL(scsi_change_queue_depth); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 2732b0c3f0e2cabba62e195ca10fcb44bd8fd389..a8c115643af335ccfe07ef0cfbde090bf1faa96c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -333,7 +333,8 @@ void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd) if (starget->can_queue > 0) atomic_dec(&starget->target_busy); - atomic_dec(&sdev->device_busy); + sbitmap_put(sdev->budget_map, cmd->budget_token); + cmd->budget_token = -1; } static void scsi_kick_queue(struct request_queue *q) @@ -389,7 +390,7 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev) static inline bool scsi_device_is_busy(struct scsi_device *sdev) { - if (atomic_read(&sdev->device_busy) >= sdev->queue_depth) + if (scsi_device_busy(sdev) >= sdev->queue_depth) return true; if (atomic_read(&sdev->device_blocked) > 0) return true; @@ -1145,6 +1146,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) unsigned long jiffies_at_alloc; int retries, to_clear; bool in_flight; + int budget_token = cmd->budget_token; if (!blk_rq_is_scsi(rq) && !(flags & SCMD_INITIALIZED)) { flags |= SCMD_INITIALIZED; @@ -1174,6 +1176,7 @@ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) cmd->retries = retries; if (in_flight) __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); + cmd->budget_token = budget_token; } @@ -1255,19 +1258,20 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req) } /* - * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else - * return 0. - * - * Called with the queue_lock held. + * scsi_dev_queue_ready: if we can send requests to sdev, assign one token + * and return the token else return -1. */ static inline int scsi_dev_queue_ready(struct request_queue *q, struct scsi_device *sdev) { - unsigned int busy; + int token; - busy = atomic_inc_return(&sdev->device_busy) - 1; + token = sbitmap_get(sdev->budget_map); if (atomic_read(&sdev->device_blocked)) { - if (busy) + if (token < 0) + goto out; + + if (scsi_device_busy(sdev) > 1) goto out_dec; /* @@ -1279,13 +1283,12 @@ static inline int scsi_dev_queue_ready(struct request_queue *q, "unblocking device at zero depth\n")); } - if (busy >= sdev->queue_depth) - goto out_dec; - - return 1; + return token; out_dec: - atomic_dec(&sdev->device_busy); - return 0; + if (token >= 0) + sbitmap_put(sdev->budget_map, token); +out: + return -1; } /* @@ -1608,19 +1611,20 @@ static void scsi_mq_done(struct scsi_cmnd *cmd) blk_mq_complete_request(cmd->request); } -static void scsi_mq_put_budget(struct request_queue *q) +static void scsi_mq_put_budget(struct request_queue *q, int budget_token) { struct scsi_device *sdev = q->queuedata; - atomic_dec(&sdev->device_busy); + sbitmap_put(sdev->budget_map, budget_token); } -static bool scsi_mq_get_budget(struct request_queue *q) +static int scsi_mq_get_budget(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; + int token = scsi_dev_queue_ready(q, sdev); - if (scsi_dev_queue_ready(q, sdev)) - return true; + if (token >= 0) + return token; atomic_inc(&sdev->restarts); @@ -1639,10 +1643,24 @@ static bool scsi_mq_get_budget(struct request_queue *q) * the .restarts flag, and the request queue will be run for handling * this request, see scsi_end_request(). */ - if (unlikely(atomic_read(&sdev->device_busy) == 0 && + if (unlikely(scsi_device_busy(sdev) == 0 && !scsi_device_blocked(sdev))) blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY); - return false; + return -1; +} + +static void scsi_mq_set_rq_budget_token(struct request *req, int token) +{ + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); + + cmd->budget_token = token; +} + +static int scsi_mq_get_rq_budget_token(struct request *req) +{ + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); + + return cmd->budget_token; } static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, @@ -1656,6 +1674,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, blk_status_t ret; int reason; + WARN_ON_ONCE(cmd->budget_token < 0); + /* * If the device is not in running state we will reject some or all * commands. @@ -1707,7 +1727,8 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (scsi_target(sdev)->can_queue > 0) atomic_dec(&scsi_target(sdev)->target_busy); out_put_budget: - scsi_mq_put_budget(q); + scsi_mq_put_budget(q, cmd->budget_token); + cmd->budget_token = -1; switch (ret) { case BLK_STS_OK: break; @@ -1854,6 +1875,8 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = { .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, + .set_rq_budget_token = scsi_mq_set_rq_budget_token, + .get_rq_budget_token = scsi_mq_get_rq_budget_token, }; @@ -1882,6 +1905,8 @@ static const struct blk_mq_ops scsi_mq_ops = { .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, + .set_rq_budget_token = scsi_mq_set_rq_budget_token, + .get_rq_budget_token = scsi_mq_get_rq_budget_token, }; struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev) diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index ba39c21196f502c8a3e83ad3e3fe3c07eea5affc..f2004e8478223ec2afd75325d995c9dff91b6bb4 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -5,6 +5,7 @@ #include #include #include +#include struct request_queue; struct request; @@ -182,6 +183,8 @@ static inline void scsi_dh_add_device(struct scsi_device *sdev) { } static inline void scsi_dh_release_device(struct scsi_device *sdev) { } #endif +extern int scsi_device_max_queue_depth(struct scsi_device *sdev); + /* * internal scsi timeout functions: for use by mid-layer and transport * classes. diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 1e052c1570c492fbf6e47c76f5b55f65f289dc8f..1be912544932bc1997a049bab8572cb32fcd4e29 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -198,6 +198,38 @@ static void scsi_unlock_floptical(struct scsi_device *sdev, SCSI_TIMEOUT, 3, NULL); } +static int scsi_alloc_and_init_budget_map(struct scsi_device *sdev, unsigned int depth) +{ + sdev->budget_map = kzalloc(sizeof(*sdev->budget_map), GFP_KERNEL); + if (!sdev->budget_map) + return -ENOMEM; + + /* + * Use .can_queue as budget map's depth because we have to + * support adjusting queue depth from sysfs. Meantime use + * default device queue depth to figure out sbitmap shift + * since we use this queue depth most of times. + */ + if (sbitmap_init_node(sdev->budget_map, + scsi_device_max_queue_depth(sdev), + sbitmap_calculate_shift(depth), + GFP_KERNEL, sdev->request_queue->node, + false, true)) { + kfree(sdev->budget_map); + sdev->budget_map = NULL; + return -ENOMEM; + } + + return 0; +} + +void scsi_free_budget_map(struct scsi_device *sdev) +{ + sbitmap_free(sdev->budget_map); + kfree(sdev->budget_map); + sdev->budget_map = NULL; +} + /** * scsi_alloc_sdev - allocate and setup a scsi_Device * @starget: which target to allocate a &scsi_device for @@ -215,6 +247,7 @@ static void scsi_unlock_floptical(struct scsi_device *sdev, static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, u64 lun, void *hostdata) { + unsigned int depth; struct scsi_device *sdev; int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); @@ -276,8 +309,15 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, WARN_ON_ONCE(!blk_get_queue(sdev->request_queue)); sdev->request_queue->queuedata = sdev; - scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun ? - sdev->host->cmd_per_lun : 1); + depth = sdev->host->cmd_per_lun ?: 1; + + if (scsi_alloc_and_init_budget_map(sdev, depth)) { + put_device(&starget->dev); + kfree(sdev); + goto out; + } + + scsi_change_queue_depth(sdev, depth); scsi_sysfs_device_initialize(sdev); @@ -980,6 +1020,7 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, scsi_attach_vpd(sdev); sdev->max_queue_depth = sdev->queue_depth; + WARN_ON_ONCE(sdev->max_queue_depth > sdev->budget_map->depth); sdev->sdev_bflags = *bflags; /* diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index e15f561e756c0c6f3d1faea8cd8d25f0cdecb831..842d7ca99ddb79805f04083a2a04daa5af0b8d02 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -480,6 +480,8 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) /* NULL queue means the device can't be used */ sdev->request_queue = NULL; + scsi_free_budget_map(sdev); + mutex_lock(&sdev->inquiry_mutex); vpd_pg0 = rcu_replace_pointer(sdev->vpd_pg0, vpd_pg0, lockdep_is_held(&sdev->inquiry_mutex)); @@ -679,7 +681,7 @@ sdev_show_device_busy(struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev = to_scsi_device(dev); - return snprintf(buf, 20, "%d\n", atomic_read(&sdev->device_busy)); + return snprintf(buf, 20, "%d\n", scsi_device_busy(sdev)); } static DEVICE_ATTR(device_busy, S_IRUGO, sdev_show_device_busy, NULL); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index e1c086ac8a60ec216c2be4f9ccd50e5e13cc2154..ec89119ce5b798e989baac4862ad69284612f1f4 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -2517,7 +2517,7 @@ static int sg_proc_seq_show_dev(struct seq_file *s, void *v) scsidp->id, scsidp->lun, (int) scsidp->type, 1, (int) scsidp->queue_depth, - (int) atomic_read(&scsidp->device_busy), + (int) scsi_device_busy(scsidp), (int) scsi_device_online(scsidp)); } read_unlock_irqrestore(&sg_index_lock, iflags); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 4ce9f00ae10e84ba7ad6272a37e0d29bd0671e9b..8531751b28dd7fbfe76be10442b5edc09b471963 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -614,7 +614,7 @@ vhost_scsi_get_cmd(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, return ERR_PTR(-EIO); } - tag = sbitmap_get(&svq->scsi_tags, 0, false); + tag = sbitmap_get(&svq->scsi_tags); if (tag < 0) { pr_err("Unable to obtain tag for vhost_scsi_cmd\n"); return ERR_PTR(-ENOMEM); @@ -1512,7 +1512,7 @@ static int vhost_scsi_setup_vq_cmds(struct vhost_virtqueue *vq, int max_cmds) return 0; if (sbitmap_init_node(&svq->scsi_tags, max_cmds, -1, GFP_KERNEL, - NUMA_NO_NODE)) + NUMA_NO_NODE, false, true)) return -ENOMEM; svq->max_cmds = max_cmds; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 289ec99e376afaa329c9572c8a2eeda8b37aaef8..24ea13c4e88d3aeb5d2b15d180d51951b853ecb3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -333,12 +333,21 @@ struct blk_mq_ops { * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ - bool (*get_budget)(struct request_queue *); + int (*get_budget)(struct request_queue *); /** * @put_budget: Release the reserved budget. */ - void (*put_budget)(struct request_queue *); + void (*put_budget)(struct request_queue *, int); + + /** + * @set_rq_budget_token: store rq's budget token + */ + void (*set_rq_budget_token)(struct request *, int); + /** + * @get_rq_budget_token: retrieve rq's budget token + */ + int (*get_rq_budget_token)(struct request *); /** * @timeout: Called on request timeout. diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index a0c8bd1c83ad5ecbe3e8f4a0068d44cd724b4e30..00789ea0f77c916916af4d48790861171c479657 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -40,6 +40,21 @@ struct sbitmap_word { spinlock_t swap_lock; } ____cacheline_aligned_in_smp; +struct sbitmap_extend { + /** + * @alloc_hint: Cache of last successfully allocated or freed bit. + */ + bool round_robin; + + /** + * This is per-cpu, which allows multiple users to stick to different + * cachelines until the map is exhausted. + * + * @round_robin: Allocate bits in strict round-robin order. + */ + unsigned int __percpu *alloc_hint; +}; + /** * struct sbitmap - Scalable bitmap. * @@ -67,7 +82,7 @@ struct sbitmap { */ struct sbitmap_word *map; - KABI_RESERVE(1) + KABI_USE(1, struct sbitmap_extend *extend) }; #define SBQ_WAIT_QUEUES 8 @@ -103,13 +118,7 @@ struct sbitmap_queue { */ struct sbitmap sb; - /* - * @alloc_hint: Cache of last successfully allocated or freed bit. - * - * This is per-cpu, which allows multiple users to stick to different - * cachelines until the map is exhausted. - */ - unsigned int __percpu *alloc_hint; + KABI_DEPRECATE(unsigned int __percpu *, alloc_hint) /** * @wake_batch: Number of bits which must be freed before we wake up any @@ -132,10 +141,7 @@ struct sbitmap_queue { */ atomic_t ws_active; - /** - * @round_robin: Allocate bits in strict round-robin order. - */ - bool round_robin; + KABI_DEPRECATE(bool, round_robin) /** * @min_shallow_depth: The minimum shallow depth which may be passed to @@ -152,11 +158,16 @@ struct sbitmap_queue { * given, a good default is chosen. * @flags: Allocation flags. * @node: Memory node to allocate on. + * @round_robin: If true, be stricter about allocation order; always allocate + * starting from the last allocated bit. This is less efficient + * than the default behavior (false). + * @alloc_hint: If true, apply percpu hint for where to start searching for + * a free bit. * * Return: Zero on success or negative errno on failure. */ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, - gfp_t flags, int node); + gfp_t flags, int node, bool round_robin, bool alloc_hint); /** * sbitmap_free() - Free memory used by a &struct sbitmap. @@ -164,6 +175,9 @@ int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, */ static inline void sbitmap_free(struct sbitmap *sb) { + free_percpu(sb->extend->alloc_hint); + kfree(sb->extend); + sb->extend = NULL; kfree(sb->map); sb->map = NULL; } @@ -181,22 +195,17 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); /** * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. * @sb: Bitmap to allocate from. - * @alloc_hint: Hint for where to start searching for a free bit. - * @round_robin: If true, be stricter about allocation order; always allocate - * starting from the last allocated bit. This is less efficient - * than the default behavior (false). * * This operation provides acquire barrier semantics if it succeeds. * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); +int sbitmap_get(struct sbitmap *sb); /** * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, * limiting the depth used from each word. * @sb: Bitmap to allocate from. - * @alloc_hint: Hint for where to start searching for a free bit. * @shallow_depth: The maximum number of bits to allocate from a single word. * * This rather specific operation allows for having multiple users with @@ -208,8 +217,7 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, - unsigned long shallow_depth); +int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth); /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. @@ -323,10 +331,16 @@ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int b set_bit(SB_NR_TO_BIT(sb, bitnr), addr); } -static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, - unsigned int bitnr) +/* + * Pair of sbitmap_get, and this one applies both cleared bit and + * allocation hint. + */ +static inline void sbitmap_put(struct sbitmap *sb, unsigned int bitnr) { - clear_bit_unlock(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); + sbitmap_deferred_clear_bit(sb, bitnr); + + if (likely(sb->extend->alloc_hint && !sb->extend->round_robin && bitnr < sb->depth)) + *raw_cpu_ptr(sb->extend->alloc_hint) = bitnr; } static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) @@ -334,6 +348,24 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } +static inline int sbitmap_calculate_shift(unsigned int depth) +{ + int shift = ilog2(BITS_PER_LONG); + + /* + * If the bitmap is small, shrink the number of bits per word so + * we spread over a few cachelines, at least. If less than 4 + * bits, just forget about it, it's not going to work optimally + * anyway. + */ + if (depth >= 4) { + while ((4U << shift) > depth) + shift--; + } + + return shift; +} + /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. @@ -343,6 +375,16 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) */ void sbitmap_show(struct sbitmap *sb, struct seq_file *m); + +/** + * sbitmap_weight() - Return how many set and not cleared bits in a &struct + * sbitmap. + * @sb: Bitmap to check. + * + * Return: How many set and not cleared bits set + */ +unsigned int sbitmap_weight(const struct sbitmap *sb); + /** * sbitmap_bitmap_show() - Write a hex dump of a &struct sbitmap to a &struct * seq_file. @@ -377,7 +419,6 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); - free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); } diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 89999ebbb2911b082a32742b95a1f8bb068cc100..49e238cd477efc104b497bcbb071aa2fdafbf0b3 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -76,6 +76,8 @@ struct scsi_cmnd { int eh_eflags; /* Used by error handlr */ + int budget_token; + /* * This is set to jiffies as it was when the command was first * allocated. It is used to time how long the command has diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 8b9d1aac62d10a49602ecbb06816b79d3f889377..30cdc73175b59cb0eb28bcc121ccda320f8ab6cb 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -9,6 +9,7 @@ #include #include #include +#include struct device; struct request_queue; @@ -107,7 +108,7 @@ struct scsi_device { struct list_head siblings; /* list of all devices on this host */ struct list_head same_target_siblings; /* just the devices sharing same target id */ - atomic_t device_busy; /* commands actually active on LLDD */ + KABI_DEPRECATE(atomic_t, device_busy) atomic_t device_blocked; /* Device returned QUEUE_FULL. */ atomic_t restarts; @@ -240,7 +241,7 @@ struct scsi_device { enum scsi_device_state sdev_state; struct task_struct *quiesced_by; - KABI_RESERVE(1) + KABI_USE(1, struct sbitmap *budget_map) KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) @@ -459,6 +460,9 @@ extern int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, unsigned char *sense, struct scsi_sense_hdr *sshdr, int timeout, int retries, u64 flags, req_flags_t rq_flags, int *resid); + +extern void scsi_free_budget_map(struct scsi_device *sdev); + /* Make sure any sense buffer is the correct size. */ #define scsi_execute(sdev, cmd, data_direction, buffer, bufflen, sense, \ sshdr, timeout, retries, flags, rq_flags, resid) \ @@ -606,6 +610,11 @@ static inline int scsi_device_supports_vpd(struct scsi_device *sdev) return 0; } +static inline int scsi_device_busy(struct scsi_device *sdev) +{ + return sbitmap_weight(sdev->budget_map); +} + #define MODULE_ALIAS_SCSI_DEVICE(type) \ MODULE_ALIAS("scsi:t-" __stringify(type) "*") #define SCSI_DEVICE_MODALIAS_FMT "scsi:t-0x%02x" diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 3a3cc3fe9a87f8ff24ca267aaf9c4deffe51b4da..ca5634c213738248818c893f8dbd272cf48b8603 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -31,40 +31,15 @@ struct scsi_transport_template; #define MODE_TARGET 0x02 struct scsi_host_template { - struct module *module; - const char *name; - /* - * The info function will return whatever useful information the - * developer sees fit. If not provided, then the name field will - * be used instead. - * - * Status: OPTIONAL + * Put fields referenced in IO submission path together in + * same cacheline */ - const char *(* info)(struct Scsi_Host *); /* - * Ioctl interface - * - * Status: OPTIONAL - */ - int (*ioctl)(struct scsi_device *dev, unsigned int cmd, - void __user *arg); - - -#ifdef CONFIG_COMPAT - /* - * Compat handler. Handle 32bit ABI. - * When unknown ioctl is passed return -ENOIOCTLCMD. - * - * Status: OPTIONAL + * Additional per-command data allocated for the driver. */ - int (*compat_ioctl)(struct scsi_device *dev, unsigned int cmd, - void __user *arg); -#endif - - int (*init_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd); - int (*exit_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd); + unsigned int cmd_size; /* * The queuecommand function is used to queue up a scsi @@ -112,6 +87,41 @@ struct scsi_host_template { */ void (*commit_rqs)(struct Scsi_Host *, u16); + struct module *module; + const char *name; + + /* + * The info function will return whatever useful information the + * developer sees fit. If not provided, then the name field will + * be used instead. + * + * Status: OPTIONAL + */ + const char *(*info)(struct Scsi_Host *); + + /* + * Ioctl interface + * + * Status: OPTIONAL + */ + int (*ioctl)(struct scsi_device *dev, unsigned int cmd, + void __user *arg); + + +#ifdef CONFIG_COMPAT + /* + * Compat handler. Handle 32bit ABI. + * When unknown ioctl is passed return -ENOIOCTLCMD. + * + * Status: OPTIONAL + */ + int (*compat_ioctl)(struct scsi_device *dev, unsigned int cmd, + void __user *arg); +#endif + + int (*init_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd); + int (*exit_cmd_priv)(struct Scsi_Host *shost, struct scsi_cmnd *cmd); + /* * This is an error handling strategy routine. You don't need to * define one of these if you don't want to - there is a default @@ -479,10 +489,6 @@ struct scsi_host_template { */ u64 vendor_id; - /* - * Additional per-command data allocated for the driver. - */ - unsigned int cmd_size; struct scsi_host_cmd_pool *cmd_pool; /* Delay for runtime autosuspend */ diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 904697c64961a7eb8eea47af30f8f28a8667d97d..9fe2aebc13da81bd8a41a00483f6afedfb1bb04d 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -9,73 +9,131 @@ #include #include +static int init_alloc_hint(struct sbitmap *sb, gfp_t flags) +{ + unsigned depth = sb->depth; + + sb->extend->alloc_hint = alloc_percpu_gfp(unsigned int, flags); + if (!sb->extend->alloc_hint) + return -ENOMEM; + + if (depth && !sb->extend->round_robin) { + int i; + + for_each_possible_cpu(i) + *per_cpu_ptr(sb->extend->alloc_hint, i) = prandom_u32() % depth; + } + return 0; +} + +static inline unsigned update_alloc_hint_before_get(struct sbitmap *sb, + unsigned int depth) +{ + unsigned hint; + + hint = this_cpu_read(*sb->extend->alloc_hint); + if (unlikely(hint >= depth)) { + hint = depth ? prandom_u32() % depth : 0; + this_cpu_write(*sb->extend->alloc_hint, hint); + } + + return hint; +} + +static inline void update_alloc_hint_after_get(struct sbitmap *sb, + unsigned int depth, + unsigned int hint, + unsigned int nr) +{ + if (nr == -1) { + /* If the map is full, a hint won't do us much good. */ + this_cpu_write(*sb->extend->alloc_hint, 0); + } else if (nr == hint || unlikely(sb->extend->round_robin)) { + /* Only update the hint if we used it. */ + hint = nr + 1; + if (hint >= depth - 1) + hint = 0; + this_cpu_write(*sb->extend->alloc_hint, hint); + } +} + /* * See if we have deferred clears that we can batch move */ -static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) +static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) { unsigned long mask, val; bool ret = false; unsigned long flags; - spin_lock_irqsave(&sb->map[index].swap_lock, flags); + spin_lock_irqsave(&map->swap_lock, flags); - if (!sb->map[index].cleared) + if (!map->cleared) goto out_unlock; /* * First get a stable cleared mask, setting the old mask to 0. */ - mask = xchg(&sb->map[index].cleared, 0); + mask = xchg(&map->cleared, 0); /* * Now clear the masked bits in our free word */ do { - val = sb->map[index].word; - } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); + val = map->word; + } while (cmpxchg(&map->word, val, val & ~mask) != val); ret = true; out_unlock: - spin_unlock_irqrestore(&sb->map[index].swap_lock, flags); + spin_unlock_irqrestore(&map->swap_lock, flags); return ret; } int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, - gfp_t flags, int node) + gfp_t flags, int node, bool round_robin, + bool alloc_hint) { unsigned int bits_per_word; unsigned int i; - if (shift < 0) { - shift = ilog2(BITS_PER_LONG); - /* - * If the bitmap is small, shrink the number of bits per word so - * we spread over a few cachelines, at least. If less than 4 - * bits, just forget about it, it's not going to work optimally - * anyway. - */ - if (depth >= 4) { - while ((4U << shift) > depth) - shift--; - } - } + if (shift < 0) + shift = sbitmap_calculate_shift(depth); + bits_per_word = 1U << shift; if (bits_per_word > BITS_PER_LONG) return -EINVAL; + sb->extend = kzalloc(sizeof(*sb->extend), flags); + if (!sb->extend) + return -ENOMEM; + sb->shift = shift; sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); + sb->extend->round_robin = round_robin; if (depth == 0) { sb->map = NULL; return 0; } + if (alloc_hint) { + if (init_alloc_hint(sb, flags)) { + kfree(sb->extend); + sb->extend = NULL; + return -ENOMEM; + } + } else { + sb->extend->alloc_hint = NULL; + } + sb->map = kcalloc_node(sb->map_nr, sizeof(*sb->map), flags, node); - if (!sb->map) + if (!sb->map) { + free_percpu(sb->extend->alloc_hint); + kfree(sb->extend); + sb->extend = NULL; return -ENOMEM; + } for (i = 0; i < sb->map_nr; i++) { sb->map[i].depth = min(depth, bits_per_word); @@ -92,7 +150,7 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth) unsigned int i; for (i = 0; i < sb->map_nr; i++) - sbitmap_deferred_clear(sb, i); + sbitmap_deferred_clear(&sb->map[i]); sb->depth = depth; sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); @@ -137,24 +195,24 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, } static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, - unsigned int alloc_hint, bool round_robin) + unsigned int alloc_hint) { + struct sbitmap_word *map = &sb->map[index]; int nr; do { - nr = __sbitmap_get_word(&sb->map[index].word, - sb->map[index].depth, alloc_hint, - !round_robin); + nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, + !sb->extend->round_robin); if (nr != -1) break; - if (!sbitmap_deferred_clear(sb, index)) + if (!sbitmap_deferred_clear(map)) break; } while (1); return nr; } -int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) +static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) { unsigned int i, index; int nr = -1; @@ -166,14 +224,13 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) * alloc_hint to find the right word index. No point in looping * twice in find_next_zero_bit() for that case. */ - if (round_robin) + if (sb->extend->round_robin) alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); else alloc_hint = 0; for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_index(sb, index, alloc_hint, - round_robin); + nr = sbitmap_find_bit_in_index(sb, index, alloc_hint); if (nr != -1) { nr += index << sb->shift; break; @@ -187,10 +244,27 @@ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin) return nr; } + +int sbitmap_get(struct sbitmap *sb) +{ + int nr; + unsigned int hint, depth; + + if (WARN_ON_ONCE(unlikely(!sb->extend->alloc_hint))) + return -1; + + depth = READ_ONCE(sb->depth); + hint = update_alloc_hint_before_get(sb, depth); + nr = __sbitmap_get(sb, hint); + update_alloc_hint_after_get(sb, depth, hint, nr); + + return nr; +} EXPORT_SYMBOL_GPL(sbitmap_get); -int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, - unsigned long shallow_depth) +static int __sbitmap_get_shallow(struct sbitmap *sb, + unsigned int alloc_hint, + unsigned long shallow_depth) { unsigned int i, index; int nr = -1; @@ -207,7 +281,7 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, break; } - if (sbitmap_deferred_clear(sb, index)) + if (sbitmap_deferred_clear(&sb->map[index])) goto again; /* Jump to next index. */ @@ -222,6 +296,22 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, return nr; } + +int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) +{ + int nr; + unsigned int hint, depth; + + if (WARN_ON_ONCE(unlikely(!sb->extend->alloc_hint))) + return -1; + + depth = READ_ONCE(sb->depth); + hint = update_alloc_hint_before_get(sb, depth); + nr = __sbitmap_get_shallow(sb, hint, shallow_depth); + update_alloc_hint_after_get(sb, depth, hint, nr); + + return nr; +} EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) @@ -251,20 +341,21 @@ static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set) return weight; } -static unsigned int sbitmap_weight(const struct sbitmap *sb) +static unsigned int sbitmap_cleared(const struct sbitmap *sb) { - return __sbitmap_weight(sb, true); + return __sbitmap_weight(sb, false); } -static unsigned int sbitmap_cleared(const struct sbitmap *sb) +unsigned int sbitmap_weight(const struct sbitmap *sb) { - return __sbitmap_weight(sb, false); + return __sbitmap_weight(sb, true) - sbitmap_cleared(sb); } +EXPORT_SYMBOL_GPL(sbitmap_weight); void sbitmap_show(struct sbitmap *sb, struct seq_file *m) { seq_printf(m, "depth=%u\n", sb->depth); - seq_printf(m, "busy=%u\n", sbitmap_weight(sb) - sbitmap_cleared(sb)); + seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); seq_printf(m, "cleared=%u\n", sbitmap_cleared(sb)); seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); seq_printf(m, "map_nr=%u\n", sb->map_nr); @@ -358,21 +449,11 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, int ret; int i; - ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node); + ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node, + round_robin, true); if (ret) return ret; - sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags); - if (!sbq->alloc_hint) { - sbitmap_free(&sbq->sb); - return -ENOMEM; - } - - if (depth && !round_robin) { - for_each_possible_cpu(i) - *per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth; - } - sbq->min_shallow_depth = UINT_MAX; sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); @@ -380,7 +461,6 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { - free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); return -ENOMEM; } @@ -390,7 +470,6 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch); } - sbq->round_robin = round_robin; return 0; } EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); @@ -443,60 +522,16 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_resize); int __sbitmap_queue_get(struct sbitmap_queue *sbq) { - unsigned int hint, depth; - int nr; - - hint = this_cpu_read(*sbq->alloc_hint); - depth = READ_ONCE(sbq->sb.depth); - if (unlikely(hint >= depth)) { - hint = depth ? prandom_u32() % depth : 0; - this_cpu_write(*sbq->alloc_hint, hint); - } - nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin); - - if (nr == -1) { - /* If the map is full, a hint won't do us much good. */ - this_cpu_write(*sbq->alloc_hint, 0); - } else if (nr == hint || unlikely(sbq->round_robin)) { - /* Only update the hint if we used it. */ - hint = nr + 1; - if (hint >= depth - 1) - hint = 0; - this_cpu_write(*sbq->alloc_hint, hint); - } - - return nr; + return sbitmap_get(&sbq->sb); } EXPORT_SYMBOL_GPL(__sbitmap_queue_get); int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, unsigned int shallow_depth) { - unsigned int hint, depth; - int nr; - WARN_ON_ONCE(shallow_depth < sbq->min_shallow_depth); - hint = this_cpu_read(*sbq->alloc_hint); - depth = READ_ONCE(sbq->sb.depth); - if (unlikely(hint >= depth)) { - hint = depth ? prandom_u32() % depth : 0; - this_cpu_write(*sbq->alloc_hint, hint); - } - nr = sbitmap_get_shallow(&sbq->sb, hint, shallow_depth); - - if (nr == -1) { - /* If the map is full, a hint won't do us much good. */ - this_cpu_write(*sbq->alloc_hint, 0); - } else if (nr == hint || unlikely(sbq->round_robin)) { - /* Only update the hint if we used it. */ - hint = nr + 1; - if (hint >= depth - 1) - hint = 0; - this_cpu_write(*sbq->alloc_hint, hint); - } - - return nr; + return sbitmap_get_shallow(&sbq->sb, shallow_depth); } EXPORT_SYMBOL_GPL(__sbitmap_queue_get_shallow); @@ -627,8 +662,8 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, smp_mb__after_atomic(); sbitmap_queue_wake_up(sbq); - if (likely(!sbq->round_robin && nr < sbq->sb.depth)) - *per_cpu_ptr(sbq->alloc_hint, cpu) = nr; + if (likely(!sbq->sb.extend->round_robin && nr < sbq->sb.depth)) + *per_cpu_ptr(sbq->sb.extend->alloc_hint, cpu) = nr; } EXPORT_SYMBOL_GPL(sbitmap_queue_clear); @@ -666,7 +701,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) if (!first) seq_puts(m, ", "); first = false; - seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); + seq_printf(m, "%u", *per_cpu_ptr(sbq->sb.extend->alloc_hint, i)); } seq_puts(m, "}\n"); @@ -684,7 +719,7 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) } seq_puts(m, "}\n"); - seq_printf(m, "round_robin=%d\n", sbq->round_robin); + seq_printf(m, "round_robin=%d\n", sbq->sb.extend->round_robin); seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show);