diff options
| author | Mike Christie <michael.christie@oracle.com> | 2025-04-23 22:26:32 -0500 |
|---|---|---|
| committer | Martin K. Petersen <martin.petersen@oracle.com> | 2025-04-28 21:47:55 -0400 |
| commit | 9cf2317b795d6cde0fccb8744b5a080a9586020e (patch) | |
| tree | 3081c6a4f985736babdceda2ff8596ab000d5752 /drivers/target/target_core_device.c | |
| parent | 0af2f6be1b4281385b618cb86ad946eded089ac8 (diff) | |
scsi: target: Move I/O path stats to per CPU
The atomic use in the main I/O path is causing perf issues when using
higher performance backend devices and multiple queues. This moves the
stats to per CPU. Combined with the next patch that moves the
non_ordered/delayed_cmd_count to per CPU, IOPS by up to 33% for 8K IOS
when using 4 or more queues.
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Link: https://lore.kernel.org/r/20250424032741.16216-2-michael.christie@oracle.com
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/target/target_core_device.c')
| -rw-r--r-- | drivers/target/target_core_device.c | 69 |
1 files changed, 50 insertions, 19 deletions
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index cc2da086f96e..39aad464c0bf 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -55,14 +55,14 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd) rcu_read_lock(); deve = target_nacl_find_deve(nacl, se_cmd->orig_fe_lun); if (deve) { - atomic_long_inc(&deve->total_cmds); + this_cpu_inc(deve->stats->total_cmds); if (se_cmd->data_direction == DMA_TO_DEVICE) - atomic_long_add(se_cmd->data_length, - &deve->write_bytes); + this_cpu_add(deve->stats->write_bytes, + se_cmd->data_length); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - atomic_long_add(se_cmd->data_length, - &deve->read_bytes); + this_cpu_add(deve->stats->read_bytes, + se_cmd->data_length); if ((se_cmd->data_direction == DMA_TO_DEVICE) && deve->lun_access_ro) { @@ -126,14 +126,14 @@ out_unlock: * target_core_fabric_configfs.c:target_fabric_port_release */ se_cmd->se_dev = rcu_dereference_raw(se_lun->lun_se_dev); - atomic_long_inc(&se_cmd->se_dev->num_cmds); + this_cpu_inc(se_cmd->se_dev->stats->total_cmds); if (se_cmd->data_direction == DMA_TO_DEVICE) - atomic_long_add(se_cmd->data_length, - &se_cmd->se_dev->write_bytes); + this_cpu_add(se_cmd->se_dev->stats->write_bytes, + se_cmd->data_length); else if (se_cmd->data_direction == DMA_FROM_DEVICE) - atomic_long_add(se_cmd->data_length, - &se_cmd->se_dev->read_bytes); + this_cpu_add(se_cmd->se_dev->stats->read_bytes, + se_cmd->data_length); return ret; } @@ -322,6 +322,7 @@ int core_enable_device_list_for_node( struct se_portal_group *tpg) { struct se_dev_entry *orig, *new; + int ret = 0; new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) { @@ -329,6 +330,12 @@ int core_enable_device_list_for_node( return -ENOMEM; } + new->stats = alloc_percpu(struct se_dev_entry_io_stats); + if (!new->stats) { + ret = -ENOMEM; + goto free_deve; + } + spin_lock_init(&new->ua_lock); INIT_LIST_HEAD(&new->ua_list); INIT_LIST_HEAD(&new->lun_link); @@ -351,8 +358,8 @@ int core_enable_device_list_for_node( " for dynamic -> explicit NodeACL conversion:" " %s\n", nacl->initiatorname); mutex_unlock(&nacl->lun_entry_mutex); - kfree(new); - return -EINVAL; + ret = -EINVAL; + goto free_stats; } if (orig->se_lun_acl != NULL) { pr_warn_ratelimited("Detected existing explicit" @@ -360,8 +367,8 @@ int core_enable_device_list_for_node( " mapped_lun: %llu, failing\n", nacl->initiatorname, mapped_lun); mutex_unlock(&nacl->lun_entry_mutex); - kfree(new); - return -EINVAL; + ret = -EINVAL; + goto free_stats; } new->se_lun = lun; @@ -394,6 +401,20 @@ int core_enable_device_list_for_node( target_luns_data_has_changed(nacl, new, true); return 0; + +free_stats: + free_percpu(new->stats); +free_deve: + kfree(new); + return ret; +} + +static void target_free_dev_entry(struct rcu_head *head) +{ + struct se_dev_entry *deve = container_of(head, struct se_dev_entry, + rcu_head); + free_percpu(deve->stats); + kfree(deve); } void core_disable_device_list_for_node( @@ -443,7 +464,7 @@ void core_disable_device_list_for_node( kref_put(&orig->pr_kref, target_pr_kref_release); wait_for_completion(&orig->pr_comp); - kfree_rcu(orig, rcu_head); + call_rcu(&orig->rcu_head, target_free_dev_entry); core_scsi3_free_pr_reg_from_nacl(dev, nacl); target_luns_data_has_changed(nacl, NULL, false); @@ -689,11 +710,13 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) if (!dev) return NULL; + dev->stats = alloc_percpu(struct se_dev_io_stats); + if (!dev->stats) + goto free_device; + dev->queues = kcalloc(nr_cpu_ids, sizeof(*dev->queues), GFP_KERNEL); - if (!dev->queues) { - hba->backend->ops->free_device(dev); - return NULL; - } + if (!dev->queues) + goto free_stats; dev->queue_cnt = nr_cpu_ids; for (i = 0; i < dev->queue_cnt; i++) { @@ -707,6 +730,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_WORK(&q->sq.work, target_queued_submit_work); } + dev->se_hba = hba; dev->transport = hba->backend->ops; dev->transport_flags = dev->transport->transport_flags_default; @@ -791,6 +815,12 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) sizeof(dev->t10_wwn.revision)); return dev; + +free_stats: + free_percpu(dev->stats); +free_device: + hba->backend->ops->free_device(dev); + return NULL; } /* @@ -1001,6 +1031,7 @@ void target_free_device(struct se_device *dev) dev->transport->free_prot(dev); kfree(dev->queues); + free_percpu(dev->stats); dev->transport->free_device(dev); } |