diff options
| author | Mike Christie <michael.christie@oracle.com> | 2025-04-23 22:26:33 -0500 |
|---|---|---|
| committer | Martin K. Petersen <martin.petersen@oracle.com> | 2025-04-28 21:47:55 -0400 |
| commit | 268975a87c7b6f6b0ceb62df236c1e1b08b89379 (patch) | |
| tree | c15403df63b052c45e9df54253adaaaca7d956e8 /drivers/target/target_core_device.c | |
| parent | 9cf2317b795d6cde0fccb8744b5a080a9586020e (diff) | |
scsi: target: Move delayed/ordered tracking to per CPU
The atomic use from the delayed/ordered tracking is causing perf issues
when using higher perf backend devices and multiple queues. This moves
the values to a per CPU counter. Combined with the per CPU stats patch,
this improves IOPS by up to 33% for 8K IOS when using 4 or more queues
from the initiator.
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20250424032741.16216-3-michael.christie@oracle.com
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/target/target_core_device.c')
| -rw-r--r-- | drivers/target/target_core_device.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 39aad464c0bf..7bb711b24c0d 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -700,6 +700,18 @@ static void scsi_dump_inquiry(struct se_device *dev) pr_debug(" Type: %s ", scsi_device_type(device_type)); } +static void target_non_ordered_release(struct percpu_ref *ref) +{ + struct se_device *dev = container_of(ref, struct se_device, + non_ordered); + unsigned long flags; + + spin_lock_irqsave(&dev->delayed_cmd_lock, flags); + if (!list_empty(&dev->delayed_cmd_list)) + schedule_work(&dev->delayed_cmd_work); + spin_unlock_irqrestore(&dev->delayed_cmd_lock, flags); +} + struct se_device *target_alloc_device(struct se_hba *hba, const char *name) { struct se_device *dev; @@ -730,6 +742,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_WORK(&q->sq.work, target_queued_submit_work); } + if (percpu_ref_init(&dev->non_ordered, target_non_ordered_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) + goto free_queues; dev->se_hba = hba; dev->transport = hba->backend->ops; @@ -816,6 +831,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) return dev; +free_queues: + kfree(dev->queues); free_stats: free_percpu(dev->stats); free_device: @@ -1010,6 +1027,9 @@ void target_free_device(struct se_device *dev) WARN_ON(!list_empty(&dev->dev_sep_list)); + percpu_ref_exit(&dev->non_ordered); + cancel_work_sync(&dev->delayed_cmd_work); + if (target_dev_configured(dev)) { dev->transport->destroy_device(dev); |