summaryrefslogtreecommitdiff
path: root/drivers/target/target_core_device.c
diff options
context:
space:
mode:
authorMike Christie <michael.christie@oracle.com>2025-04-23 22:26:33 -0500
committerMartin K. Petersen <martin.petersen@oracle.com>2025-04-28 21:47:55 -0400
commit268975a87c7b6f6b0ceb62df236c1e1b08b89379 (patch)
treec15403df63b052c45e9df54253adaaaca7d956e8 /drivers/target/target_core_device.c
parent9cf2317b795d6cde0fccb8744b5a080a9586020e (diff)
scsi: target: Move delayed/ordered tracking to per CPU
The atomic use from the delayed/ordered tracking is causing perf issues when using higher perf backend devices and multiple queues. This moves the values to a per CPU counter. Combined with the per CPU stats patch, this improves IOPS by up to 33% for 8K IOS when using 4 or more queues from the initiator. Signed-off-by: Mike Christie <michael.christie@oracle.com> Link: https://lore.kernel.org/r/20250424032741.16216-3-michael.christie@oracle.com Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/target/target_core_device.c')
-rw-r--r--drivers/target/target_core_device.c20
1 files changed, 20 insertions, 0 deletions
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 39aad464c0bf..7bb711b24c0d 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -700,6 +700,18 @@ static void scsi_dump_inquiry(struct se_device *dev)
pr_debug(" Type: %s ", scsi_device_type(device_type));
}
+static void target_non_ordered_release(struct percpu_ref *ref)
+{
+ struct se_device *dev = container_of(ref, struct se_device,
+ non_ordered);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->delayed_cmd_lock, flags);
+ if (!list_empty(&dev->delayed_cmd_list))
+ schedule_work(&dev->delayed_cmd_work);
+ spin_unlock_irqrestore(&dev->delayed_cmd_lock, flags);
+}
+
struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
{
struct se_device *dev;
@@ -730,6 +742,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
INIT_WORK(&q->sq.work, target_queued_submit_work);
}
+ if (percpu_ref_init(&dev->non_ordered, target_non_ordered_release,
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
+ goto free_queues;
dev->se_hba = hba;
dev->transport = hba->backend->ops;
@@ -816,6 +831,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
return dev;
+free_queues:
+ kfree(dev->queues);
free_stats:
free_percpu(dev->stats);
free_device:
@@ -1010,6 +1027,9 @@ void target_free_device(struct se_device *dev)
WARN_ON(!list_empty(&dev->dev_sep_list));
+ percpu_ref_exit(&dev->non_ordered);
+ cancel_work_sync(&dev->delayed_cmd_work);
+
if (target_dev_configured(dev)) {
dev->transport->destroy_device(dev);