summaryrefslogtreecommitdiff
path: root/drivers/nvme/target
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/target')
-rw-r--r--drivers/nvme/target/core.c40
-rw-r--r--drivers/nvme/target/nvmet.h15
-rw-r--r--drivers/nvme/target/pci-epf.c67
-rw-r--r--drivers/nvme/target/rdma.c33
-rw-r--r--drivers/nvme/target/tcp.c15
5 files changed, 103 insertions, 67 deletions
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index cdc4a09a6e8a..2e741696f371 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -606,6 +606,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
goto out_dev_put;
}
+ if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL))
+ goto out_pr_exit;
+
nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED);
@@ -613,6 +616,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
+out_pr_exit:
+ if (ns->pr.enable)
+ nvmet_pr_exit_ns(ns);
out_dev_put:
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
@@ -638,6 +644,19 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
mutex_unlock(&subsys->lock);
+ /*
+ * Now that we removed the namespaces from the lookup list, we
+ * can kill the per_cpu ref and wait for any remaining references
+ * to be dropped, as well as a RCU grace period for anyone only
+ * using the namepace under rcu_read_lock(). Note that we can't
+ * use call_rcu here as we need to ensure the namespaces have
+ * been fully destroyed before unloading the module.
+ */
+ percpu_ref_kill(&ns->ref);
+ synchronize_rcu();
+ wait_for_completion(&ns->disable_done);
+ percpu_ref_exit(&ns->ref);
+
if (ns->pr.enable)
nvmet_pr_exit_ns(ns);
@@ -660,22 +679,6 @@ void nvmet_ns_free(struct nvmet_ns *ns)
if (ns->nsid == subsys->max_nsid)
subsys->max_nsid = nvmet_max_nsid(subsys);
- mutex_unlock(&subsys->lock);
-
- /*
- * Now that we removed the namespaces from the lookup list, we
- * can kill the per_cpu ref and wait for any remaining references
- * to be dropped, as well as a RCU grace period for anyone only
- * using the namepace under rcu_read_lock(). Note that we can't
- * use call_rcu here as we need to ensure the namespaces have
- * been fully destroyed before unloading the module.
- */
- percpu_ref_kill(&ns->ref);
- synchronize_rcu();
- wait_for_completion(&ns->disable_done);
- percpu_ref_exit(&ns->ref);
-
- mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
mutex_unlock(&subsys->lock);
@@ -705,9 +708,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid;
ns->subsys = subsys;
- if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL))
- goto out_free;
-
if (ns->nsid > subsys->max_nsid)
subsys->max_nsid = nsid;
@@ -730,8 +730,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
return ns;
out_exit:
subsys->max_nsid = nvmet_max_nsid(subsys);
- percpu_ref_exit(&ns->ref);
-out_free:
kfree(ns);
out_unlock:
mutex_unlock(&subsys->lock);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 4be8d22d2d8d..fcf4f460dc9a 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -647,7 +647,6 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
struct nvmet_host *host);
void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
u8 event_info, u8 log_page);
-bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid);
#define NVMET_MIN_QUEUE_SIZE 16
#define NVMET_MAX_QUEUE_SIZE 1024
@@ -784,37 +783,37 @@ u16 nvmet_report_invalid_opcode(struct nvmet_req *req);
static inline bool nvmet_cc_en(u32 cc)
{
- return (cc >> NVME_CC_EN_SHIFT) & 0x1;
+ return (cc & NVME_CC_ENABLE) >> NVME_CC_EN_SHIFT;
}
static inline u8 nvmet_cc_css(u32 cc)
{
- return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
+ return (cc & NVME_CC_CSS_MASK) >> NVME_CC_CSS_SHIFT;
}
static inline u8 nvmet_cc_mps(u32 cc)
{
- return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
+ return (cc & NVME_CC_MPS_MASK) >> NVME_CC_MPS_SHIFT;
}
static inline u8 nvmet_cc_ams(u32 cc)
{
- return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
+ return (cc & NVME_CC_AMS_MASK) >> NVME_CC_AMS_SHIFT;
}
static inline u8 nvmet_cc_shn(u32 cc)
{
- return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
+ return (cc & NVME_CC_SHN_MASK) >> NVME_CC_SHN_SHIFT;
}
static inline u8 nvmet_cc_iosqes(u32 cc)
{
- return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
+ return (cc & NVME_CC_IOSQES_MASK) >> NVME_CC_IOSQES_SHIFT;
}
static inline u8 nvmet_cc_iocqes(u32 cc)
{
- return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
+ return (cc & NVME_CC_IOCQES_MASK) >> NVME_CC_IOCQES_SHIFT;
}
/* Convert a 32-bit number to a 16-bit 0's based number */
diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c
index ac30b42cc622..b1e31483f157 100644
--- a/drivers/nvme/target/pci-epf.c
+++ b/drivers/nvme/target/pci-epf.c
@@ -46,7 +46,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex);
/*
* BAR CC register and SQ polling intervals.
*/
-#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(5)
+#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(10)
#define NVMET_PCI_EPF_SQ_POLL_INTERVAL msecs_to_jiffies(5)
#define NVMET_PCI_EPF_SQ_POLL_IDLE msecs_to_jiffies(5000)
@@ -1265,15 +1265,12 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
- if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
+ if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
if (!(flags & NVME_QUEUE_PHYS_CONTIG))
return NVME_SC_INVALID_QUEUE | NVME_STATUS_DNR;
- if (flags & NVME_CQ_IRQ_ENABLED)
- set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags);
-
cq->pci_addr = pci_addr;
cq->qid = cqid;
cq->depth = qsize + 1;
@@ -1290,24 +1287,27 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
cq->qes = ctrl->io_cqes;
cq->pci_size = cq->qes * cq->depth;
- cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector);
- if (!cq->iv) {
- status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
- goto err;
+ if (flags & NVME_CQ_IRQ_ENABLED) {
+ cq->iv = nvmet_pci_epf_add_irq_vector(ctrl, vector);
+ if (!cq->iv)
+ return NVME_SC_INTERNAL | NVME_STATUS_DNR;
+ set_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags);
}
status = nvmet_cq_create(tctrl, &cq->nvme_cq, cqid, cq->depth);
if (status != NVME_SC_SUCCESS)
goto err;
+ set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
+
dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
cqid, qsize, cq->qes, cq->vector);
return NVME_SC_SUCCESS;
err:
- clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags);
- clear_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
+ if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
+ nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
return status;
}
@@ -1333,7 +1333,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_queue *sq = &ctrl->sq[sqid];
u16 status;
- if (test_and_set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags))
+ if (test_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
if (!(flags & NVME_QUEUE_PHYS_CONTIG))
@@ -1355,7 +1355,7 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
status = nvmet_sq_create(tctrl, &sq->nvme_sq, sqid, sq->depth);
if (status != NVME_SC_SUCCESS)
- goto out_clear_bit;
+ return status;
sq->iod_wq = alloc_workqueue("sq%d_wq", WQ_UNBOUND,
min_t(int, sq->depth, WQ_MAX_ACTIVE), sqid);
@@ -1365,6 +1365,8 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
goto out_destroy_sq;
}
+ set_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags);
+
dev_dbg(ctrl->dev, "SQ[%u]: %u entries of %zu B\n",
sqid, qsize, sq->qes);
@@ -1372,8 +1374,6 @@ static u16 nvmet_pci_epf_create_sq(struct nvmet_ctrl *tctrl,
out_destroy_sq:
nvmet_sq_destroy(&sq->nvme_sq);
-out_clear_bit:
- clear_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags);
return status;
}
@@ -1694,6 +1694,7 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work)
struct nvmet_pci_epf_ctrl *ctrl =
container_of(work, struct nvmet_pci_epf_ctrl, poll_sqs.work);
struct nvmet_pci_epf_queue *sq;
+ unsigned long limit = jiffies;
unsigned long last = 0;
int i, nr_sqs;
@@ -1708,6 +1709,16 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work)
nr_sqs++;
}
+ /*
+ * If we have been running for a while, reschedule to let other
+ * tasks run and to avoid RCU stalls.
+ */
+ if (time_is_before_jiffies(limit + secs_to_jiffies(1))) {
+ cond_resched();
+ limit = jiffies;
+ continue;
+ }
+
if (nr_sqs) {
last = jiffies;
continue;
@@ -1822,14 +1833,14 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
if (ctrl->io_sqes < sizeof(struct nvme_command)) {
dev_err(ctrl->dev, "Unsupported I/O SQES %zu (need %zu)\n",
ctrl->io_sqes, sizeof(struct nvme_command));
- return -EINVAL;
+ goto err;
}
ctrl->io_cqes = 1UL << nvmet_cc_iocqes(ctrl->cc);
if (ctrl->io_cqes < sizeof(struct nvme_completion)) {
dev_err(ctrl->dev, "Unsupported I/O CQES %zu (need %zu)\n",
ctrl->io_sqes, sizeof(struct nvme_completion));
- return -EINVAL;
+ goto err;
}
/* Create the admin queue. */
@@ -1844,7 +1855,7 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
qsize, pci_addr, 0);
if (status != NVME_SC_SUCCESS) {
dev_err(ctrl->dev, "Failed to create admin completion queue\n");
- return -EINVAL;
+ goto err;
}
qsize = aqa & 0x00000fff;
@@ -1854,17 +1865,22 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
if (status != NVME_SC_SUCCESS) {
dev_err(ctrl->dev, "Failed to create admin submission queue\n");
nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
- return -EINVAL;
+ goto err;
}
ctrl->sq_ab = NVMET_PCI_EPF_SQ_AB;
ctrl->irq_vector_threshold = NVMET_PCI_EPF_IV_THRESHOLD;
ctrl->enabled = true;
+ ctrl->csts = NVME_CSTS_RDY;
/* Start polling the controller SQs. */
schedule_delayed_work(&ctrl->poll_sqs, 0);
return 0;
+
+err:
+ ctrl->csts = 0;
+ return -EINVAL;
}
static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
@@ -1889,6 +1905,8 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl)
/* Delete the admin queue last. */
nvmet_pci_epf_delete_sq(ctrl->tctrl, 0);
nvmet_pci_epf_delete_cq(ctrl->tctrl, 0);
+
+ ctrl->csts &= ~NVME_CSTS_RDY;
}
static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
@@ -1903,19 +1921,19 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
old_cc = ctrl->cc;
new_cc = nvmet_pci_epf_bar_read32(ctrl, NVME_REG_CC);
+ if (new_cc == old_cc)
+ goto reschedule_work;
+
ctrl->cc = new_cc;
if (nvmet_cc_en(new_cc) && !nvmet_cc_en(old_cc)) {
ret = nvmet_pci_epf_enable_ctrl(ctrl);
if (ret)
- return;
- ctrl->csts |= NVME_CSTS_RDY;
+ goto reschedule_work;
}
- if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) {
+ if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc))
nvmet_pci_epf_disable_ctrl(ctrl);
- ctrl->csts &= ~NVME_CSTS_RDY;
- }
if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) {
nvmet_pci_epf_disable_ctrl(ctrl);
@@ -1928,6 +1946,7 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work)
nvmet_update_cc(ctrl->tctrl, ctrl->cc);
nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts);
+reschedule_work:
schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL);
}
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 1afd93026f9b..2a4536ef6184 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -996,6 +996,27 @@ out_err:
nvmet_req_complete(&cmd->req, status);
}
+static bool nvmet_rdma_recv_not_live(struct nvmet_rdma_queue *queue,
+ struct nvmet_rdma_rsp *rsp)
+{
+ unsigned long flags;
+ bool ret = true;
+
+ spin_lock_irqsave(&queue->state_lock, flags);
+ /*
+ * recheck queue state is not live to prevent a race condition
+ * with RDMA_CM_EVENT_ESTABLISHED handler.
+ */
+ if (queue->state == NVMET_RDMA_Q_LIVE)
+ ret = false;
+ else if (queue->state == NVMET_RDMA_Q_CONNECTING)
+ list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
+ else
+ nvmet_rdma_put_rsp(rsp);
+ spin_unlock_irqrestore(&queue->state_lock, flags);
+ return ret;
+}
+
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct nvmet_rdma_cmd *cmd =
@@ -1038,17 +1059,9 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
rsp->n_rdma = 0;
rsp->invalidate_rkey = 0;
- if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) {
- unsigned long flags;
-
- spin_lock_irqsave(&queue->state_lock, flags);
- if (queue->state == NVMET_RDMA_Q_CONNECTING)
- list_add_tail(&rsp->wait_list, &queue->rsp_wait_list);
- else
- nvmet_rdma_put_rsp(rsp);
- spin_unlock_irqrestore(&queue->state_lock, flags);
+ if (unlikely(queue->state != NVMET_RDMA_Q_LIVE) &&
+ nvmet_rdma_recv_not_live(queue, rsp))
return;
- }
nvmet_rdma_handle_command(queue, rsp);
}
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 7c51c2a8c109..4f9cac8a5abe 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -571,10 +571,16 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
struct nvmet_tcp_cmd *cmd =
container_of(req, struct nvmet_tcp_cmd, req);
struct nvmet_tcp_queue *queue = cmd->queue;
+ enum nvmet_tcp_recv_state queue_state;
+ struct nvmet_tcp_cmd *queue_cmd;
struct nvme_sgl_desc *sgl;
u32 len;
- if (unlikely(cmd == queue->cmd)) {
+ /* Pairs with store_release in nvmet_prepare_receive_pdu() */
+ queue_state = smp_load_acquire(&queue->rcv_state);
+ queue_cmd = READ_ONCE(queue->cmd);
+
+ if (unlikely(cmd == queue_cmd)) {
sgl = &cmd->req.cmd->common.dptr.sgl;
len = le32_to_cpu(sgl->length);
@@ -583,7 +589,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
* Avoid using helpers, this might happen before
* nvmet_req_init is completed.
*/
- if (queue->rcv_state == NVMET_TCP_RECV_PDU &&
+ if (queue_state == NVMET_TCP_RECV_PDU &&
len && len <= cmd->req.port->inline_data_size &&
nvme_is_write(cmd->req.cmd))
return;
@@ -847,8 +853,9 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue)
{
queue->offset = 0;
queue->left = sizeof(struct nvme_tcp_hdr);
- queue->cmd = NULL;
- queue->rcv_state = NVMET_TCP_RECV_PDU;
+ WRITE_ONCE(queue->cmd, NULL);
+ /* Ensure rcv_state is visible only after queue->cmd is set */
+ smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU);
}
static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue)