diff options
| author | Justin Tee <justin.tee@broadcom.com> | 2025-11-06 14:46:36 -0800 |
|---|---|---|
| committer | Martin K. Petersen <martin.petersen@oracle.com> | 2025-11-08 13:18:00 -0500 |
| commit | 07caedc6a3887938813727beafea40f07c497705 (patch) | |
| tree | 407ce8c71c5253d2b17c4f174ef572f098aa1986 | |
| parent | 0b8b15a0b74dafe4b184dfae120d715172960ae5 (diff) | |
scsi: lpfc: Fix reusing an ndlp that is marked NLP_DROPPED during FLOGI
It's possible for an unstable link to repeatedly bounce allowing a FLOGI
retry, but then bounce again forcing an abort of the FLOGI. Ensure that
the initial reference count on the FLOGI ndlp is restored in this faulty
link scenario.
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://patch.msgid.link/20251106224639.139176-8-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
| -rw-r--r-- | drivers/scsi/lpfc/lpfc_els.c | 36 | ||||
| -rw-r--r-- | drivers/scsi/lpfc/lpfc_hbadisc.c | 4 |
2 files changed, 32 insertions, 8 deletions
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 00cfd4ac4ccd..0045c1e29619 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -934,10 +934,15 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Check to see if link went down during discovery */ if (lpfc_els_chk_latt(vport)) { /* One additional decrement on node reference count to - * trigger the release of the node + * trigger the release of the node. Make sure the ndlp + * is marked NLP_DROPPED. */ - if (!(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) + if (!test_bit(NLP_IN_DEV_LOSS, &ndlp->nlp_flag) && + !test_bit(NLP_DROPPED, &ndlp->nlp_flag) && + !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) { + set_bit(NLP_DROPPED, &ndlp->nlp_flag); lpfc_nlp_put(ndlp); + } goto out; } @@ -995,9 +1000,10 @@ stop_rr_fcf_flogi: IOERR_LOOP_OPEN_FAILURE))) lpfc_vlog_msg(vport, KERN_WARNING, LOG_ELS, "2858 FLOGI Status:x%x/x%x TMO" - ":x%x Data x%lx x%x\n", + ":x%x Data x%lx x%x x%lx x%x\n", ulp_status, ulp_word4, tmo, - phba->hba_flag, phba->fcf.fcf_flag); + phba->hba_flag, phba->fcf.fcf_flag, + ndlp->nlp_flag, ndlp->fc4_xpt_flags); /* Check for retry */ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { @@ -1015,14 +1021,17 @@ stop_rr_fcf_flogi: * reference to trigger node release. */ if (!test_bit(NLP_IN_DEV_LOSS, &ndlp->nlp_flag) && - !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) + !test_bit(NLP_DROPPED, &ndlp->nlp_flag) && + !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD)) { + set_bit(NLP_DROPPED, &ndlp->nlp_flag); lpfc_nlp_put(ndlp); + } lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, "0150 FLOGI Status:x%x/x%x " - "xri x%x TMO:x%x refcnt %d\n", + "xri x%x iotag x%x TMO:x%x refcnt %d\n", ulp_status, ulp_word4, cmdiocb->sli4_xritag, - tmo, kref_read(&ndlp->kref)); + cmdiocb->iotag, tmo, kref_read(&ndlp->kref)); /* If this is not a loop open failure, bail out */ if (!(ulp_status == IOSTAT_LOCAL_REJECT && @@ -1279,6 +1288,19 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint32_t tmo, did; int rc; + /* It's possible for lpfc to reissue a FLOGI on an ndlp that is marked + * NLP_DROPPED. This happens when the FLOGI completed with the XB bit + * set causing lpfc to reference the ndlp until the XRI_ABORTED CQE is + * issued. The time window for the XRI_ABORTED CQE can be as much as + * 2*2*RA_TOV allowing for ndlp reuse of this type when the link is + * cycling quickly. When true, restore the initial reference and remove + * the NLP_DROPPED flag as lpfc is retrying. + */ + if (test_and_clear_bit(NLP_DROPPED, &ndlp->nlp_flag)) { + if (!lpfc_nlp_get(ndlp)) + return 1; + } + cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, ndlp->nlp_DID, ELS_CMD_FLOGI); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 43d246c5c049..717ae56c8e4b 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -424,6 +424,7 @@ lpfc_check_nlp_post_devloss(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { if (test_and_clear_bit(NLP_IN_RECOV_POST_DEV_LOSS, &ndlp->save_flags)) { + clear_bit(NLP_DROPPED, &ndlp->nlp_flag); lpfc_nlp_get(ndlp); lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY | LOG_NODE, "8438 Devloss timeout reversed on DID x%x " @@ -566,7 +567,8 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) return fcf_inuse; } - lpfc_nlp_put(ndlp); + if (!test_and_set_bit(NLP_DROPPED, &ndlp->nlp_flag)) + lpfc_nlp_put(ndlp); return fcf_inuse; } |