From: Frank Chang <frank.chang@sifive.com>
To: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Cc: qemu-devel@nongnu.org, qemu-riscv@nongnu.org,
alistair.francis@wdc.com, bmeng@tinylab.org,
liwei1518@gmail.com, zhiwei_liu@linux.alibaba.com,
palmer@rivosinc.com, ajones@ventanamicro.com,
tjeznach@rivosinc.com
Subject: Re: [PATCH v2 15/15] hw/misc: EDU: add ATS/PRI capability
Date: Tue, 7 May 2024 23:32:34 +0800 [thread overview]
Message-ID: <CANzO1D3sxSqXS6x1WkQDpyZ=T9nLQwHJCJRm=2FJn5OtrVN_mw@mail.gmail.com> (raw)
In-Reply-To: <20240307160319.675044-16-dbarboza@ventanamicro.com>
Hi Daniel,
Daniel Henrique Barboza <dbarboza@ventanamicro.com> 於 2024年3月8日 週五 上午12:05寫道:
>
> From: Tomasz Jeznach <tjeznach@rivosinc.com>
>
> Mimic ATS interface with IOMMU translate request with IOMMU_NONE. If
> mapping exists, translation service will return current permission
> flags, otherwise will report no permissions.
>
> Implement and register the IOMMU memory region listener to be notified
> whenever an ATS invalidation request is sent from the IOMMU.
>
> Implement and register the IOMMU memory region listener to be notified
> whenever an ATS page request group response is triggered from the IOMMU.
>
> Introduces a retry mechanism to the timer design so that any page that's
> not available should be only accessed after the PRGR notification has
> been received.
>
> Signed-off-by: Tomasz Jeznach <tjeznach@rivosinc.com>
> Signed-off-by: Sebastien Boeuf <seb@rivosinc.com>
> ---
> hw/misc/edu.c | 258 ++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 251 insertions(+), 7 deletions(-)
>
> diff --git a/hw/misc/edu.c b/hw/misc/edu.c
> index 522cec85b3..f4f6c15ec6 100644
> --- a/hw/misc/edu.c
> +++ b/hw/misc/edu.c
> @@ -45,6 +45,14 @@ DECLARE_INSTANCE_CHECKER(EduState, EDU,
> #define DMA_START 0x40000
> #define DMA_SIZE 4096
>
> +/*
> + * Number of tries before giving up on page request group response.
> + * Given the timer callback is scheduled to be run again after 100ms,
> + * 10 tries give roughly a second for the PRGR notification to be
> + * received.
> + */
> +#define NUM_TRIES 10
> +
> struct EduState {
> PCIDevice pdev;
> MemoryRegion mmio;
> @@ -55,6 +63,7 @@ struct EduState {
> bool stopping;
>
> bool enable_pasid;
> + uint32_t try;
>
> uint32_t addr4;
> uint32_t fact;
> @@ -81,6 +90,20 @@ struct EduState {
> QEMUTimer dma_timer;
> char dma_buf[DMA_SIZE];
> uint64_t dma_mask;
> +
> + MemoryListener iommu_listener;
> + QLIST_HEAD(, edu_iommu) iommu_list;
> +
> + bool prgr_rcvd;
> + bool prgr_success;
> +};
> +
> +struct edu_iommu {
> + EduState *edu;
> + IOMMUMemoryRegion *iommu_mr;
> + hwaddr iommu_offset;
> + IOMMUNotifier n;
> + QLIST_ENTRY(edu_iommu) iommu_next;
> };
>
> static bool edu_msi_enabled(EduState *edu)
> @@ -136,11 +159,65 @@ static dma_addr_t edu_clamp_addr(const EduState *edu, dma_addr_t addr)
> return res;
> }
>
> +static bool __find_iommu_mr_cb(Int128 start, Int128 len, const MemoryRegion *mr,
> + hwaddr offset_in_region, void *opaque)
> +{
> + IOMMUMemoryRegion **iommu_mr = opaque;
> + *iommu_mr = memory_region_get_iommu((MemoryRegion *)mr);
> + return *iommu_mr != NULL;
> +}
> +
> +static int pci_dma_perm(PCIDevice *pdev, dma_addr_t iova, MemTxAttrs attrs)
> +{
> + IOMMUMemoryRegion *iommu_mr = NULL;
> + IOMMUMemoryRegionClass *imrc;
> + int iommu_idx;
> + FlatView *fv;
> + EduState *edu = EDU(pdev);
> + struct edu_iommu *iommu;
> +
> + RCU_READ_LOCK_GUARD();
> +
> + fv = address_space_to_flatview(pci_get_address_space(pdev));
> +
> + /* Find first IOMMUMemoryRegion */
> + flatview_for_each_range(fv, __find_iommu_mr_cb, &iommu_mr);
> +
> + if (iommu_mr) {
> + imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
> +
> + /* IOMMU Index is mapping to memory attributes (PASID, etc) */
> + iommu_idx = imrc->attrs_to_index ?
> + imrc->attrs_to_index(iommu_mr, attrs) : 0;
> +
> + /* Update IOMMU notifiers with proper index */
> + QLIST_FOREACH(iommu, &edu->iommu_list, iommu_next) {
> + if (iommu->iommu_mr == iommu_mr &&
> + iommu->n.iommu_idx != iommu_idx) {
> + memory_region_unregister_iommu_notifier(
> + MEMORY_REGION(iommu->iommu_mr), &iommu->n);
> + iommu->n.iommu_idx = iommu_idx;
> + memory_region_register_iommu_notifier(
> + MEMORY_REGION(iommu->iommu_mr), &iommu->n, NULL);
> + }
> + }
> +
> + /* Translate request with IOMMU_NONE is an ATS request */
> + IOMMUTLBEntry iotlb = imrc->translate(iommu_mr, iova, IOMMU_NONE,
> + iommu_idx);
> +
> + return iotlb.perm;
> + }
> +
> + return IOMMU_NONE;
> +}
> +
> static void edu_dma_timer(void *opaque)
> {
> EduState *edu = opaque;
> bool raise_irq = false;
> MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
> + MemTxResult res;
>
> if (!(edu->dma.cmd & EDU_DMA_RUN)) {
> return;
> @@ -155,18 +232,70 @@ static void edu_dma_timer(void *opaque)
>
> if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_FROM_PCI) {
> uint64_t dst = edu->dma.dst;
> + uint64_t src = edu_clamp_addr(edu, edu->dma.src);
> edu_check_range(dst, edu->dma.cnt, DMA_START, DMA_SIZE);
> dst -= DMA_START;
> - pci_dma_rw(&edu->pdev, edu_clamp_addr(edu, edu->dma.src),
> - edu->dma_buf + dst, edu->dma.cnt,
> - DMA_DIRECTION_TO_DEVICE, attrs);
> + if (edu->try-- == NUM_TRIES) {
> + edu->prgr_rcvd = false;
> + if (!(pci_dma_perm(&edu->pdev, src, attrs) & IOMMU_RO)) {
> + timer_mod(&edu->dma_timer,
> + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> + return;
> + }
> + } else if (edu->try) {
> + if (!edu->prgr_rcvd) {
> + timer_mod(&edu->dma_timer,
> + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> + return;
> + }
> + if (!edu->prgr_success) {
> + /* PRGR failure, fail DMA. */
> + edu->dma.cmd &= ~EDU_DMA_RUN;
> + return;
> + }
> + } else {
> + /* timeout, fail DMA. */
> + edu->dma.cmd &= ~EDU_DMA_RUN;
> + return;
> + }
> + res = pci_dma_rw(&edu->pdev, src, edu->dma_buf + dst, edu->dma.cnt,
> + DMA_DIRECTION_TO_DEVICE, attrs);
> + if (res != MEMTX_OK) {
> + hw_error("EDU: DMA transfer TO 0x%"PRIx64" failed.\n", dst);
> + }
> } else {
> uint64_t src = edu->dma.src;
> + uint64_t dst = edu_clamp_addr(edu, edu->dma.dst);
> edu_check_range(src, edu->dma.cnt, DMA_START, DMA_SIZE);
> src -= DMA_START;
> - pci_dma_rw(&edu->pdev, edu_clamp_addr(edu, edu->dma.dst),
> - edu->dma_buf + src, edu->dma.cnt,
> - DMA_DIRECTION_FROM_DEVICE, attrs);
> + if (edu->try-- == NUM_TRIES) {
> + edu->prgr_rcvd = false;
> + if (!(pci_dma_perm(&edu->pdev, dst, attrs) & IOMMU_WO)) {
> + timer_mod(&edu->dma_timer,
> + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> + return;
> + }
> + } else if (edu->try) {
> + if (!edu->prgr_rcvd) {
> + timer_mod(&edu->dma_timer,
> + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> + return;
> + }
> + if (!edu->prgr_success) {
> + /* PRGR failure, fail DMA. */
> + edu->dma.cmd &= ~EDU_DMA_RUN;
> + return;
> + }
> + } else {
> + /* timeout, fail DMA. */
> + edu->dma.cmd &= ~EDU_DMA_RUN;
> + return;
> + }
> + res = pci_dma_rw(&edu->pdev, dst, edu->dma_buf + src, edu->dma.cnt,
> + DMA_DIRECTION_FROM_DEVICE, attrs);
> + if (res != MEMTX_OK) {
> + hw_error("EDU: DMA transfer FROM 0x%"PRIx64" failed.\n", src);
> + }
> }
>
> edu->dma.cmd &= ~EDU_DMA_RUN;
> @@ -193,6 +322,7 @@ static void dma_rw(EduState *edu, bool write, dma_addr_t *val, dma_addr_t *dma,
> }
>
> if (timer) {
> + edu->try = NUM_TRIES;
> timer_mod(&edu->dma_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> }
> }
> @@ -376,9 +506,92 @@ static void *edu_fact_thread(void *opaque)
> return NULL;
> }
>
> +static void edu_iommu_ats_prgr_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +{
> + struct edu_iommu *iommu = container_of(n, struct edu_iommu, n);
> + EduState *edu = iommu->edu;
> + edu->prgr_success = (iotlb->perm != IOMMU_NONE);
> + barrier();
> + edu->prgr_rcvd = true;
> +}
> +
> +static void edu_iommu_ats_inval_notify(IOMMUNotifier *n,
> + IOMMUTLBEntry *iotlb)
> +{
> +
> +}
> +
> +static void edu_iommu_region_add(MemoryListener *listener,
> + MemoryRegionSection *section)
> +{
> + EduState *edu = container_of(listener, EduState, iommu_listener);
> + struct edu_iommu *iommu;
> + Int128 end;
> + int iommu_idx;
> + IOMMUMemoryRegion *iommu_mr;
> +
> + if (!memory_region_is_iommu(section->mr)) {
> + return;
> + }
> +
> + iommu_mr = IOMMU_MEMORY_REGION(section->mr);
> +
> + /* Register ATS.INVAL notifier */
> + iommu = g_malloc0(sizeof(*iommu));
> + iommu->iommu_mr = iommu_mr;
> + iommu->iommu_offset = section->offset_within_address_space -
> + section->offset_within_region;
> + iommu->edu = edu;
> + end = int128_add(int128_make64(section->offset_within_region),
> + section->size);
> + end = int128_sub(end, int128_one());
> + iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
> + MEMTXATTRS_UNSPECIFIED);
> + iommu_notifier_init(&iommu->n, edu_iommu_ats_inval_notify,
> + IOMMU_NOTIFIER_DEVIOTLB_UNMAP,
> + section->offset_within_region,
> + int128_get64(end),
> + iommu_idx);
> + memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> + QLIST_INSERT_HEAD(&edu->iommu_list, iommu, iommu_next);
> +
> + /* Register ATS.PRGR notifier */
> + iommu = g_memdup2(iommu, sizeof(*iommu));
> + iommu_notifier_init(&iommu->n, edu_iommu_ats_prgr_notify,
> + IOMMU_NOTIFIER_MAP,
> + section->offset_within_region,
> + int128_get64(end),
> + iommu_idx);
> + memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> + QLIST_INSERT_HEAD(&edu->iommu_list, iommu, iommu_next);
> +}
> +
> +static void edu_iommu_region_del(MemoryListener *listener,
> + MemoryRegionSection *section)
> +{
> + EduState *edu = container_of(listener, EduState, iommu_listener);
> + struct edu_iommu *iommu;
> +
> + if (!memory_region_is_iommu(section->mr)) {
> + return;
> + }
> +
> + QLIST_FOREACH(iommu, &edu->iommu_list, iommu_next) {
> + if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
> + iommu->n.start == section->offset_within_region) {
> + memory_region_unregister_iommu_notifier(section->mr,
> + &iommu->n);
> + QLIST_REMOVE(iommu, iommu_next);
> + g_free(iommu);
> + break;
> + }
> + }
> +}
> +
> static void pci_edu_realize(PCIDevice *pdev, Error **errp)
> {
> EduState *edu = EDU(pdev);
> + AddressSpace *dma_as = NULL;
> uint8_t *pci_conf = pdev->config;
> int pos;
>
> @@ -390,9 +603,28 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
> pos = PCI_CONFIG_SPACE_SIZE;
> if (edu->enable_pasid) {
> /* PCIe Spec 7.8.9 PASID Extended Capability Structure */
> - pcie_add_capability(pdev, 0x1b, 1, pos, 8);
> + pcie_add_capability(pdev, PCI_EXT_CAP_ID_PASID, 1, pos, 8);
This should be included in the 14th commit.
> pci_set_long(pdev->config + pos + 4, 0x00001400);
> pci_set_long(pdev->wmask + pos + 4, 0xfff0ffff);
> + pos += 8;
> +
> + /* ATS Capability */
> + pcie_ats_init(pdev, pos, true);
> + pos += PCI_EXT_CAP_ATS_SIZEOF;
> +
> + /* PRI Capability */
> + pcie_add_capability(pdev, PCI_EXT_CAP_ID_PRI, 1, pos, 16);
> + /* PRI STOPPED */
> + pci_set_long(pdev->config + pos + 4, 0x01000000);
> + /* PRI ENABLE bit writable */
> + pci_set_long(pdev->wmask + pos + 4, 0x00000001);
> + /* PRI Capacity Supported */
> + pci_set_long(pdev->config + pos + 8, 0x00000080);
> + /* PRI Allocations Allowed, 32 */
> + pci_set_long(pdev->config + pos + 12, 0x00000040);
> + pci_set_long(pdev->wmask + pos + 12, 0x0000007f);
We should use the defines declared in
include/standard-headers/linux/pci_regs.h for readability,
though some of the bitfields are not defined in the header file.
Regards,
Frank Chang
> +
> + pos += 8;
> }
>
> if (msi_init(pdev, 0, 1, true, false, errp)) {
> @@ -409,12 +641,24 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
> memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
> "edu-mmio", 1 * MiB);
> pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
> +
> + /* Register IOMMU listener */
> + edu->iommu_listener = (MemoryListener) {
> + .name = "edu-iommu",
> + .region_add = edu_iommu_region_add,
> + .region_del = edu_iommu_region_del,
> + };
> +
> + dma_as = pci_device_iommu_address_space(pdev);
> + memory_listener_register(&edu->iommu_listener, dma_as);
> }
>
> static void pci_edu_uninit(PCIDevice *pdev)
> {
> EduState *edu = EDU(pdev);
>
> + memory_listener_unregister(&edu->iommu_listener);
> +
> qemu_mutex_lock(&edu->thr_mutex);
> edu->stopping = true;
> qemu_mutex_unlock(&edu->thr_mutex);
> --
> 2.43.2
>
>
next prev parent reply other threads:[~2024-05-07 15:33 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-03-07 16:03 [PATCH v2 00/15] riscv: QEMU RISC-V IOMMU Support Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 01/15] exec/memtxattr: add process identifier to the transaction attributes Daniel Henrique Barboza
2024-04-23 16:33 ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 02/15] hw/riscv: add riscv-iommu-bits.h Daniel Henrique Barboza
2024-05-10 11:01 ` Frank Chang
2024-05-15 10:02 ` Eric Cheng
2024-05-15 14:28 ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation Daniel Henrique Barboza
2024-05-01 11:57 ` Jason Chien
2024-05-14 20:06 ` Daniel Henrique Barboza
2024-05-02 11:37 ` Frank Chang
2024-05-08 11:15 ` Daniel Henrique Barboza
2024-05-10 10:58 ` Frank Chang
2024-05-13 12:41 ` Daniel Henrique Barboza
2024-05-13 12:37 ` Daniel Henrique Barboza
2024-05-16 7:13 ` Frank Chang
2024-05-20 16:17 ` Daniel Henrique Barboza
2024-05-21 10:52 ` Frank Chang
2024-05-21 12:28 ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 04/15] hw/riscv: add riscv-iommu-pci device Daniel Henrique Barboza
2024-04-29 7:21 ` Frank Chang
2024-05-02 9:37 ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 05/15] hw/riscv: add riscv-iommu-sys platform device Daniel Henrique Barboza
2024-04-30 1:35 ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 06/15] hw/riscv/virt.c: support for RISC-V IOMMU PCIDevice hotplug Daniel Henrique Barboza
2024-04-30 2:17 ` Frank Chang
2024-05-15 6:25 ` Eric Cheng
2024-05-15 7:16 ` Andrew Jones
2024-03-07 16:03 ` [PATCH v2 07/15] test/qtest: add riscv-iommu-pci tests Daniel Henrique Barboza
2024-04-30 3:33 ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 08/15] hw/riscv/riscv-iommu: add Address Translation Cache (IOATC) Daniel Henrique Barboza
2024-05-08 7:26 ` Frank Chang
2024-05-16 21:45 ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 09/15] hw/riscv/riscv-iommu: add s-stage and g-stage support Daniel Henrique Barboza
2024-05-10 10:36 ` Frank Chang
2024-05-10 11:14 ` Andrew Jones
2024-05-16 19:41 ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 10/15] hw/riscv/riscv-iommu: add ATS support Daniel Henrique Barboza
2024-05-08 2:57 ` Frank Chang
2024-05-17 9:29 ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 11/15] hw/riscv/riscv-iommu: add DBG support Daniel Henrique Barboza
2024-05-06 4:09 ` Frank Chang
2024-05-06 13:05 ` Daniel Henrique Barboza
2024-05-10 10:59 ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 12/15] hw/riscv/riscv-iommu: Add another irq for mrif notifications Daniel Henrique Barboza
2024-05-06 6:12 ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 13/15] qtest/riscv-iommu-test: add init queues test Daniel Henrique Barboza
2024-05-07 8:01 ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 14/15] hw/misc: EDU: added PASID support Daniel Henrique Barboza
2024-05-07 9:06 ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 15/15] hw/misc: EDU: add ATS/PRI capability Daniel Henrique Barboza
2024-05-07 15:32 ` Frank Chang [this message]
2024-05-16 13:59 ` Daniel Henrique Barboza
2024-05-10 11:14 ` [PATCH v2 00/15] riscv: QEMU RISC-V IOMMU Support Frank Chang
2024-05-20 16:26 ` Daniel Henrique Barboza
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CANzO1D3sxSqXS6x1WkQDpyZ=T9nLQwHJCJRm=2FJn5OtrVN_mw@mail.gmail.com' \
--to=frank.chang@sifive.com \
--cc=ajones@ventanamicro.com \
--cc=alistair.francis@wdc.com \
--cc=bmeng@tinylab.org \
--cc=dbarboza@ventanamicro.com \
--cc=liwei1518@gmail.com \
--cc=palmer@rivosinc.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-riscv@nongnu.org \
--cc=tjeznach@rivosinc.com \
--cc=zhiwei_liu@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).