On Jun 10 13:46, Jakub Jermář wrote: >An IRQ vector used by a completion queue cannot be deasserted without >first checking if the same vector does not need to stay asserted for >some other completion queue. > >Signed-off-by: Jakub Jermar >--- > hw/nvme/ctrl.c | 21 +++++++++++++++++++-- > 1 file changed, 19 insertions(+), 2 deletions(-) > >diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c >index 0bcaf7192f..c0980929eb 100644 >--- a/hw/nvme/ctrl.c >+++ b/hw/nvme/ctrl.c >@@ -473,6 +473,21 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq) > } > } > >+/* >+ * Check if the vector used by the cq can be deasserted, i.e. it needn't be >+ * asserted for some other cq. >+ */ >+static bool nvme_irq_can_deassert(NvmeCtrl *n, NvmeCQueue *cq) >+{ >+ for (unsigned qid = 0; qid < n->params.max_ioqpairs + 1; qid++) { >+ NvmeCQueue *q = n->cq[qid]; >+ >+ if (q && q->vector == cq->vector && q->head != q->tail) >+ return false; /* some queue needs this to stay asserted */ >+ } >+ return true; >+} >+ > static void nvme_req_clear(NvmeRequest *req) > { > req->ns = NULL; >@@ -4089,7 +4104,9 @@ static uint16_t nvme_del_cq(NvmeCtrl *n, NvmeRequest *req) > trace_pci_nvme_err_invalid_del_cq_notempty(qid); > return NVME_INVALID_QUEUE_DEL; > } >- nvme_irq_deassert(n, cq); >+ if (nvme_irq_can_deassert(n, cq)) { >+ nvme_irq_deassert(n, cq); >+ } > trace_pci_nvme_del_cq(qid); > nvme_free_cq(cq, n); > return NVME_SUCCESS; >@@ -5757,7 +5774,7 @@ static void nvme_process_db(NvmeCtrl *n, hwaddr addr, int val) > timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500); > } > >- if (cq->tail == cq->head) { >+ if (nvme_irq_can_deassert(n, cq)) { > nvme_irq_deassert(n, cq); > } > } else { >-- >2.31.1 > This is actually an artifact of commit ca247d35098d3 ("hw/block/nvme: fix pin-based interrupt behavior") that I did a year ago. Prior to that fix, the completion queue id was used to index the internal IS register (irq_status), which, while wrong spec-wise, had the effect of... actually working. Anyway, I agree that the logic is flawed right now, since we should only deassert when all outstanding cqe's have been acknowledged by the host. nvme_irq_can_deassert should be guarded with a check on msix_enabled(), but in any case I am not happy about looping over all completion queues on each cq doorbell write. I think this can be ref counted? I.e. decrement when cq->tail == cq->head on the cq doorbell write and increment only when going from empty to non-empty in nvme_post_cqes().