summaryrefslogtreecommitdiff
path: root/drivers/target/target_core_device.c
diff options
context:
space:
mode:
authorMike Christie <michael.christie@oracle.com>2025-04-23 22:26:32 -0500
committerMartin K. Petersen <martin.petersen@oracle.com>2025-04-28 21:47:55 -0400
commit9cf2317b795d6cde0fccb8744b5a080a9586020e (patch)
tree3081c6a4f985736babdceda2ff8596ab000d5752 /drivers/target/target_core_device.c
parent0af2f6be1b4281385b618cb86ad946eded089ac8 (diff)
scsi: target: Move I/O path stats to per CPU
The atomic use in the main I/O path is causing perf issues when using higher performance backend devices and multiple queues. This moves the stats to per CPU. Combined with the next patch that moves the non_ordered/delayed_cmd_count to per CPU, IOPS by up to 33% for 8K IOS when using 4 or more queues. Signed-off-by: Mike Christie <michael.christie@oracle.com> Link: https://lore.kernel.org/r/20250424032741.16216-2-michael.christie@oracle.com Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/target/target_core_device.c')
-rw-r--r--drivers/target/target_core_device.c69
1 files changed, 50 insertions, 19 deletions
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index cc2da086f96e..39aad464c0bf 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -55,14 +55,14 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd)
rcu_read_lock();
deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun);
if (deve) {
- atomic_long_inc(&deve->total_cmds);
+ this_cpu_inc(deve->stats->total_cmds);
if (se_cmd->data_direction == DMA_TO_DEVICE)
- atomic_long_add(se_cmd->data_length,
- &deve->write_bytes);
+ this_cpu_add(deve->stats->write_bytes,
+ se_cmd->data_length);
else if (se_cmd->data_direction == DMA_FROM_DEVICE)
- atomic_long_add(se_cmd->data_length,
- &deve->read_bytes);
+ this_cpu_add(deve->stats->read_bytes,
+ se_cmd->data_length);
if ((se_cmd->data_direction == DMA_TO_DEVICE) &&
deve->lun_access_ro) {
@@ -126,14 +126,14 @@ out_unlock:
* target_core_fabric_configfs.c:target_fabric_port_release
*/
se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev);
- atomic_long_inc(&se_cmd->se_dev->num_cmds);
+ this_cpu_inc(se_cmd->se_dev->stats->total_cmds);
if (se_cmd->data_direction == DMA_TO_DEVICE)
- atomic_long_add(se_cmd->data_length,
- &se_cmd->se_dev->write_bytes);
+ this_cpu_add(se_cmd->se_dev->stats->write_bytes,
+ se_cmd->data_length);
else if (se_cmd->data_direction == DMA_FROM_DEVICE)
- atomic_long_add(se_cmd->data_length,
- &se_cmd->se_dev->read_bytes);
+ this_cpu_add(se_cmd->se_dev->stats->read_bytes,
+ se_cmd->data_length);
return ret;
}
@@ -322,6 +322,7 @@ int core_enable_device_list_for_node(
struct se_portal_group *tpg)
{
struct se_dev_entry *orig, *new;
+ int ret = 0;
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new) {
@@ -329,6 +330,12 @@ int core_enable_device_list_for_node(
return -ENOMEM;
}
+ new->stats = alloc_percpu(struct se_dev_entry_io_stats);
+ if (!new->stats) {
+ ret = -ENOMEM;
+ goto free_deve;
+ }
+
spin_lock_init(&new->ua_lock);
INIT_LIST_HEAD(&new->ua_list);
INIT_LIST_HEAD(&new->lun_link);
@@ -351,8 +358,8 @@ int core_enable_device_list_for_node(
" for dynamic -> explicit NodeACL conversion:"
" %s\n", nacl->initiatorname);
mutex_unlock(&nacl->lun_entry_mutex);
- kfree(new);
- return -EINVAL;
+ ret = -EINVAL;
+ goto free_stats;
}
if (orig->se_lun_acl != NULL) {
pr_warn_ratelimited("Detected existing explicit"
@@ -360,8 +367,8 @@ int core_enable_device_list_for_node(
" mapped_lun: %llu, failing\n",
nacl->initiatorname, mapped_lun);
mutex_unlock(&nacl->lun_entry_mutex);
- kfree(new);
- return -EINVAL;
+ ret = -EINVAL;
+ goto free_stats;
}
new->se_lun = lun;
@@ -394,6 +401,20 @@ int core_enable_device_list_for_node(
target_luns_data_has_changed(nacl, new, true);
return 0;
+
+free_stats:
+ free_percpu(new->stats);
+free_deve:
+ kfree(new);
+ return ret;
+}
+
+static void target_free_dev_entry(struct rcu_head *head)
+{
+ struct se_dev_entry *deve = container_of(head, struct se_dev_entry,
+ rcu_head);
+ free_percpu(deve->stats);
+ kfree(deve);
}
void core_disable_device_list_for_node(
@@ -443,7 +464,7 @@ void core_disable_device_list_for_node(
kref_put(&orig->pr_kref, target_pr_kref_release);
wait_for_completion(&orig->pr_comp);
- kfree_rcu(orig, rcu_head);
+ call_rcu(&orig->rcu_head, target_free_dev_entry);
core_scsi3_free_pr_reg_from_nacl(dev, nacl);
target_luns_data_has_changed(nacl, NULL, false);
@@ -689,11 +710,13 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
if (!dev)
return NULL;
+ dev->stats = alloc_percpu(struct se_dev_io_stats);
+ if (!dev->stats)
+ goto free_device;
+
dev->queues = kcalloc(nr_cpu_ids, sizeof(*dev->queues), GFP_KERNEL);
- if (!dev->queues) {
- hba->backend->ops->free_device(dev);
- return NULL;
- }
+ if (!dev->queues)
+ goto free_stats;
dev->queue_cnt = nr_cpu_ids;
for (i = 0; i < dev->queue_cnt; i++) {
@@ -707,6 +730,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
INIT_WORK(&q->sq.work, target_queued_submit_work);
}
+
dev->se_hba = hba;
dev->transport = hba->backend->ops;
dev->transport_flags = dev->transport->transport_flags_default;
@@ -791,6 +815,12 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
sizeof(dev->t10_wwn.revision));
return dev;
+
+free_stats:
+ free_percpu(dev->stats);
+free_device:
+ hba->backend->ops->free_device(dev);
+ return NULL;
}
/*
@@ -1001,6 +1031,7 @@ void target_free_device(struct se_device *dev)
dev->transport->free_prot(dev);
kfree(dev->queues);
+ free_percpu(dev->stats);
dev->transport->free_device(dev);
}