QEMU-Devel Archive mirror
 help / color / mirror / Atom feed
From: Frank Chang <frank.chang@sifive.com>
To: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
Cc: qemu-devel@nongnu.org, qemu-riscv@nongnu.org,
	alistair.francis@wdc.com,  bmeng@tinylab.org,
	liwei1518@gmail.com, zhiwei_liu@linux.alibaba.com,
	 palmer@rivosinc.com, ajones@ventanamicro.com,
	tjeznach@rivosinc.com
Subject: Re: [PATCH v2 15/15] hw/misc: EDU: add ATS/PRI capability
Date: Tue, 7 May 2024 23:32:34 +0800	[thread overview]
Message-ID: <CANzO1D3sxSqXS6x1WkQDpyZ=T9nLQwHJCJRm=2FJn5OtrVN_mw@mail.gmail.com> (raw)
In-Reply-To: <20240307160319.675044-16-dbarboza@ventanamicro.com>

Hi Daniel,

Daniel Henrique Barboza <dbarboza@ventanamicro.com> 於 2024年3月8日 週五 上午12:05寫道:
>
> From: Tomasz Jeznach <tjeznach@rivosinc.com>
>
> Mimic ATS interface with IOMMU translate request with IOMMU_NONE.  If
> mapping exists, translation service will return current permission
> flags, otherwise will report no permissions.
>
> Implement and register the IOMMU memory region listener to be notified
> whenever an ATS invalidation request is sent from the IOMMU.
>
> Implement and register the IOMMU memory region listener to be notified
> whenever an ATS page request group response is triggered from the IOMMU.
>
> Introduces a retry mechanism to the timer design so that any page that's
> not available should be only accessed after the PRGR notification has
> been received.
>
> Signed-off-by: Tomasz Jeznach <tjeznach@rivosinc.com>
> Signed-off-by: Sebastien Boeuf <seb@rivosinc.com>
> ---
>  hw/misc/edu.c | 258 ++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 251 insertions(+), 7 deletions(-)
>
> diff --git a/hw/misc/edu.c b/hw/misc/edu.c
> index 522cec85b3..f4f6c15ec6 100644
> --- a/hw/misc/edu.c
> +++ b/hw/misc/edu.c
> @@ -45,6 +45,14 @@ DECLARE_INSTANCE_CHECKER(EduState, EDU,
>  #define DMA_START       0x40000
>  #define DMA_SIZE        4096
>
> +/*
> + * Number of tries before giving up on page request group response.
> + * Given the timer callback is scheduled to be run again after 100ms,
> + * 10 tries give roughly a second for the PRGR notification to be
> + * received.
> + */
> +#define NUM_TRIES       10
> +
>  struct EduState {
>      PCIDevice pdev;
>      MemoryRegion mmio;
> @@ -55,6 +63,7 @@ struct EduState {
>      bool stopping;
>
>      bool enable_pasid;
> +    uint32_t try;
>
>      uint32_t addr4;
>      uint32_t fact;
> @@ -81,6 +90,20 @@ struct EduState {
>      QEMUTimer dma_timer;
>      char dma_buf[DMA_SIZE];
>      uint64_t dma_mask;
> +
> +    MemoryListener iommu_listener;
> +    QLIST_HEAD(, edu_iommu) iommu_list;
> +
> +    bool prgr_rcvd;
> +    bool prgr_success;
> +};
> +
> +struct edu_iommu {
> +    EduState *edu;
> +    IOMMUMemoryRegion *iommu_mr;
> +    hwaddr iommu_offset;
> +    IOMMUNotifier n;
> +    QLIST_ENTRY(edu_iommu) iommu_next;
>  };
>
>  static bool edu_msi_enabled(EduState *edu)
> @@ -136,11 +159,65 @@ static dma_addr_t edu_clamp_addr(const EduState *edu, dma_addr_t addr)
>      return res;
>  }
>
> +static bool __find_iommu_mr_cb(Int128 start, Int128 len, const MemoryRegion *mr,
> +    hwaddr offset_in_region, void *opaque)
> +{
> +    IOMMUMemoryRegion **iommu_mr = opaque;
> +    *iommu_mr = memory_region_get_iommu((MemoryRegion *)mr);
> +    return *iommu_mr != NULL;
> +}
> +
> +static int pci_dma_perm(PCIDevice *pdev, dma_addr_t iova, MemTxAttrs attrs)
> +{
> +    IOMMUMemoryRegion *iommu_mr = NULL;
> +    IOMMUMemoryRegionClass *imrc;
> +    int iommu_idx;
> +    FlatView *fv;
> +    EduState *edu = EDU(pdev);
> +    struct edu_iommu *iommu;
> +
> +    RCU_READ_LOCK_GUARD();
> +
> +    fv = address_space_to_flatview(pci_get_address_space(pdev));
> +
> +    /* Find first IOMMUMemoryRegion */
> +    flatview_for_each_range(fv, __find_iommu_mr_cb, &iommu_mr);
> +
> +    if (iommu_mr) {
> +        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
> +
> +        /* IOMMU Index is mapping to memory attributes (PASID, etc) */
> +        iommu_idx = imrc->attrs_to_index ?
> +                    imrc->attrs_to_index(iommu_mr, attrs) : 0;
> +
> +        /* Update IOMMU notifiers with proper index */
> +        QLIST_FOREACH(iommu, &edu->iommu_list, iommu_next) {
> +            if (iommu->iommu_mr == iommu_mr &&
> +                iommu->n.iommu_idx != iommu_idx) {
> +                memory_region_unregister_iommu_notifier(
> +                    MEMORY_REGION(iommu->iommu_mr), &iommu->n);
> +                iommu->n.iommu_idx = iommu_idx;
> +                memory_region_register_iommu_notifier(
> +                    MEMORY_REGION(iommu->iommu_mr), &iommu->n, NULL);
> +            }
> +        }
> +
> +        /* Translate request with IOMMU_NONE is an ATS request */
> +        IOMMUTLBEntry iotlb = imrc->translate(iommu_mr, iova, IOMMU_NONE,
> +                                              iommu_idx);
> +
> +        return iotlb.perm;
> +    }
> +
> +    return IOMMU_NONE;
> +}
> +
>  static void edu_dma_timer(void *opaque)
>  {
>      EduState *edu = opaque;
>      bool raise_irq = false;
>      MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
> +    MemTxResult res;
>
>      if (!(edu->dma.cmd & EDU_DMA_RUN)) {
>          return;
> @@ -155,18 +232,70 @@ static void edu_dma_timer(void *opaque)
>
>      if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_FROM_PCI) {
>          uint64_t dst = edu->dma.dst;
> +        uint64_t src = edu_clamp_addr(edu, edu->dma.src);
>          edu_check_range(dst, edu->dma.cnt, DMA_START, DMA_SIZE);
>          dst -= DMA_START;
> -        pci_dma_rw(&edu->pdev, edu_clamp_addr(edu, edu->dma.src),
> -                edu->dma_buf + dst, edu->dma.cnt,
> -                DMA_DIRECTION_TO_DEVICE, attrs);
> +        if (edu->try-- == NUM_TRIES) {
> +            edu->prgr_rcvd = false;
> +            if (!(pci_dma_perm(&edu->pdev, src, attrs) & IOMMU_RO)) {
> +                timer_mod(&edu->dma_timer,
> +                          qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> +                return;
> +            }
> +        } else if (edu->try) {
> +            if (!edu->prgr_rcvd) {
> +                timer_mod(&edu->dma_timer,
> +                          qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> +                return;
> +            }
> +            if (!edu->prgr_success) {
> +                /* PRGR failure, fail DMA. */
> +                edu->dma.cmd &= ~EDU_DMA_RUN;
> +                return;
> +            }
> +        } else {
> +            /* timeout, fail DMA. */
> +            edu->dma.cmd &= ~EDU_DMA_RUN;
> +            return;
> +        }
> +        res = pci_dma_rw(&edu->pdev, src, edu->dma_buf + dst, edu->dma.cnt,
> +            DMA_DIRECTION_TO_DEVICE, attrs);
> +        if (res != MEMTX_OK) {
> +            hw_error("EDU: DMA transfer TO 0x%"PRIx64" failed.\n", dst);
> +        }
>      } else {
>          uint64_t src = edu->dma.src;
> +        uint64_t dst = edu_clamp_addr(edu, edu->dma.dst);
>          edu_check_range(src, edu->dma.cnt, DMA_START, DMA_SIZE);
>          src -= DMA_START;
> -        pci_dma_rw(&edu->pdev, edu_clamp_addr(edu, edu->dma.dst),
> -                edu->dma_buf + src, edu->dma.cnt,
> -                DMA_DIRECTION_FROM_DEVICE, attrs);
> +        if (edu->try-- == NUM_TRIES) {
> +            edu->prgr_rcvd = false;
> +            if (!(pci_dma_perm(&edu->pdev, dst, attrs) & IOMMU_WO)) {
> +                timer_mod(&edu->dma_timer,
> +                          qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> +                return;
> +            }
> +        } else if (edu->try) {
> +            if (!edu->prgr_rcvd) {
> +                timer_mod(&edu->dma_timer,
> +                          qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
> +                return;
> +            }
> +            if (!edu->prgr_success) {
> +                /* PRGR failure, fail DMA. */
> +                edu->dma.cmd &= ~EDU_DMA_RUN;
> +                return;
> +            }
> +        } else {
> +            /* timeout, fail DMA. */
> +            edu->dma.cmd &= ~EDU_DMA_RUN;
> +            return;
> +        }
> +        res = pci_dma_rw(&edu->pdev, dst, edu->dma_buf + src, edu->dma.cnt,
> +            DMA_DIRECTION_FROM_DEVICE, attrs);
> +        if (res != MEMTX_OK) {
> +            hw_error("EDU: DMA transfer FROM 0x%"PRIx64" failed.\n", src);
> +        }
>      }
>
>      edu->dma.cmd &= ~EDU_DMA_RUN;
> @@ -193,6 +322,7 @@ static void dma_rw(EduState *edu, bool write, dma_addr_t *val, dma_addr_t *dma,
>      }
>
>      if (timer) {
> +        edu->try = NUM_TRIES;
>          timer_mod(&edu->dma_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
>      }
>  }
> @@ -376,9 +506,92 @@ static void *edu_fact_thread(void *opaque)
>      return NULL;
>  }
>
> +static void edu_iommu_ats_prgr_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> +{
> +    struct edu_iommu *iommu = container_of(n, struct edu_iommu, n);
> +    EduState *edu = iommu->edu;
> +    edu->prgr_success = (iotlb->perm != IOMMU_NONE);
> +    barrier();
> +    edu->prgr_rcvd = true;
> +}
> +
> +static void edu_iommu_ats_inval_notify(IOMMUNotifier *n,
> +                                       IOMMUTLBEntry *iotlb)
> +{
> +
> +}
> +
> +static void edu_iommu_region_add(MemoryListener *listener,
> +                                   MemoryRegionSection *section)
> +{
> +    EduState *edu = container_of(listener, EduState, iommu_listener);
> +    struct edu_iommu *iommu;
> +    Int128 end;
> +    int iommu_idx;
> +    IOMMUMemoryRegion *iommu_mr;
> +
> +    if (!memory_region_is_iommu(section->mr)) {
> +        return;
> +    }
> +
> +    iommu_mr = IOMMU_MEMORY_REGION(section->mr);
> +
> +    /* Register ATS.INVAL notifier */
> +    iommu = g_malloc0(sizeof(*iommu));
> +    iommu->iommu_mr = iommu_mr;
> +    iommu->iommu_offset = section->offset_within_address_space -
> +                          section->offset_within_region;
> +    iommu->edu = edu;
> +    end = int128_add(int128_make64(section->offset_within_region),
> +                     section->size);
> +    end = int128_sub(end, int128_one());
> +    iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr,
> +                                                   MEMTXATTRS_UNSPECIFIED);
> +    iommu_notifier_init(&iommu->n, edu_iommu_ats_inval_notify,
> +                        IOMMU_NOTIFIER_DEVIOTLB_UNMAP,
> +                        section->offset_within_region,
> +                        int128_get64(end),
> +                        iommu_idx);
> +    memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> +    QLIST_INSERT_HEAD(&edu->iommu_list, iommu, iommu_next);
> +
> +    /* Register ATS.PRGR notifier */
> +    iommu = g_memdup2(iommu, sizeof(*iommu));
> +    iommu_notifier_init(&iommu->n, edu_iommu_ats_prgr_notify,
> +                        IOMMU_NOTIFIER_MAP,
> +                        section->offset_within_region,
> +                        int128_get64(end),
> +                        iommu_idx);
> +    memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL);
> +    QLIST_INSERT_HEAD(&edu->iommu_list, iommu, iommu_next);
> +}
> +
> +static void edu_iommu_region_del(MemoryListener *listener,
> +                                   MemoryRegionSection *section)
> +{
> +    EduState *edu = container_of(listener, EduState, iommu_listener);
> +    struct edu_iommu *iommu;
> +
> +    if (!memory_region_is_iommu(section->mr)) {
> +        return;
> +    }
> +
> +    QLIST_FOREACH(iommu, &edu->iommu_list, iommu_next) {
> +        if (MEMORY_REGION(iommu->iommu_mr) == section->mr &&
> +            iommu->n.start == section->offset_within_region) {
> +            memory_region_unregister_iommu_notifier(section->mr,
> +                                                    &iommu->n);
> +            QLIST_REMOVE(iommu, iommu_next);
> +            g_free(iommu);
> +            break;
> +        }
> +    }
> +}
> +
>  static void pci_edu_realize(PCIDevice *pdev, Error **errp)
>  {
>      EduState *edu = EDU(pdev);
> +    AddressSpace *dma_as = NULL;
>      uint8_t *pci_conf = pdev->config;
>      int pos;
>
> @@ -390,9 +603,28 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
>      pos = PCI_CONFIG_SPACE_SIZE;
>      if (edu->enable_pasid) {
>          /* PCIe Spec 7.8.9 PASID Extended Capability Structure */
> -        pcie_add_capability(pdev, 0x1b, 1, pos, 8);
> +        pcie_add_capability(pdev, PCI_EXT_CAP_ID_PASID, 1, pos, 8);

This should be included in the 14th commit.

>          pci_set_long(pdev->config + pos + 4, 0x00001400);
>          pci_set_long(pdev->wmask + pos + 4,  0xfff0ffff);
> +        pos += 8;
> +
> +        /* ATS Capability */
> +        pcie_ats_init(pdev, pos, true);
> +        pos += PCI_EXT_CAP_ATS_SIZEOF;
> +
> +        /* PRI Capability */
> +        pcie_add_capability(pdev, PCI_EXT_CAP_ID_PRI, 1, pos, 16);
> +        /* PRI STOPPED */
> +        pci_set_long(pdev->config + pos +  4, 0x01000000);
> +        /* PRI ENABLE bit writable */
> +        pci_set_long(pdev->wmask  + pos +  4, 0x00000001);
> +        /* PRI Capacity Supported */
> +        pci_set_long(pdev->config + pos +  8, 0x00000080);
> +        /* PRI Allocations Allowed, 32 */
> +        pci_set_long(pdev->config + pos + 12, 0x00000040);
> +        pci_set_long(pdev->wmask  + pos + 12, 0x0000007f);

We should use the defines declared in
include/standard-headers/linux/pci_regs.h for readability,
though some of the bitfields are not defined in the header file.

Regards,
Frank Chang

> +
> +        pos += 8;
>      }
>
>      if (msi_init(pdev, 0, 1, true, false, errp)) {
> @@ -409,12 +641,24 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
>      memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
>                      "edu-mmio", 1 * MiB);
>      pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
> +
> +    /* Register IOMMU listener */
> +    edu->iommu_listener = (MemoryListener) {
> +        .name = "edu-iommu",
> +        .region_add = edu_iommu_region_add,
> +        .region_del = edu_iommu_region_del,
> +    };
> +
> +    dma_as = pci_device_iommu_address_space(pdev);
> +    memory_listener_register(&edu->iommu_listener, dma_as);
>  }
>
>  static void pci_edu_uninit(PCIDevice *pdev)
>  {
>      EduState *edu = EDU(pdev);
>
> +    memory_listener_unregister(&edu->iommu_listener);
> +
>      qemu_mutex_lock(&edu->thr_mutex);
>      edu->stopping = true;
>      qemu_mutex_unlock(&edu->thr_mutex);
> --
> 2.43.2
>
>


  reply	other threads:[~2024-05-07 15:33 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-07 16:03 [PATCH v2 00/15] riscv: QEMU RISC-V IOMMU Support Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 01/15] exec/memtxattr: add process identifier to the transaction attributes Daniel Henrique Barboza
2024-04-23 16:33   ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 02/15] hw/riscv: add riscv-iommu-bits.h Daniel Henrique Barboza
2024-05-10 11:01   ` Frank Chang
2024-05-15 10:02   ` Eric Cheng
2024-05-15 14:28     ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 03/15] hw/riscv: add RISC-V IOMMU base emulation Daniel Henrique Barboza
2024-05-01 11:57   ` Jason Chien
2024-05-14 20:06     ` Daniel Henrique Barboza
2024-05-02 11:37   ` Frank Chang
2024-05-08 11:15     ` Daniel Henrique Barboza
2024-05-10 10:58       ` Frank Chang
2024-05-13 12:41         ` Daniel Henrique Barboza
2024-05-13 12:37       ` Daniel Henrique Barboza
2024-05-16  7:13         ` Frank Chang
2024-05-20 16:17           ` Daniel Henrique Barboza
2024-05-21 10:52             ` Frank Chang
2024-05-21 12:28               ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 04/15] hw/riscv: add riscv-iommu-pci device Daniel Henrique Barboza
2024-04-29  7:21   ` Frank Chang
2024-05-02  9:37     ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 05/15] hw/riscv: add riscv-iommu-sys platform device Daniel Henrique Barboza
2024-04-30  1:35   ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 06/15] hw/riscv/virt.c: support for RISC-V IOMMU PCIDevice hotplug Daniel Henrique Barboza
2024-04-30  2:17   ` Frank Chang
2024-05-15  6:25   ` Eric Cheng
2024-05-15  7:16     ` Andrew Jones
2024-03-07 16:03 ` [PATCH v2 07/15] test/qtest: add riscv-iommu-pci tests Daniel Henrique Barboza
2024-04-30  3:33   ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 08/15] hw/riscv/riscv-iommu: add Address Translation Cache (IOATC) Daniel Henrique Barboza
2024-05-08  7:26   ` Frank Chang
2024-05-16 21:45     ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 09/15] hw/riscv/riscv-iommu: add s-stage and g-stage support Daniel Henrique Barboza
2024-05-10 10:36   ` Frank Chang
2024-05-10 11:14     ` Andrew Jones
2024-05-16 19:41       ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 10/15] hw/riscv/riscv-iommu: add ATS support Daniel Henrique Barboza
2024-05-08  2:57   ` Frank Chang
2024-05-17  9:29     ` Daniel Henrique Barboza
2024-03-07 16:03 ` [PATCH v2 11/15] hw/riscv/riscv-iommu: add DBG support Daniel Henrique Barboza
2024-05-06  4:09   ` Frank Chang
2024-05-06 13:05     ` Daniel Henrique Barboza
2024-05-10 10:59       ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 12/15] hw/riscv/riscv-iommu: Add another irq for mrif notifications Daniel Henrique Barboza
2024-05-06  6:12   ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 13/15] qtest/riscv-iommu-test: add init queues test Daniel Henrique Barboza
2024-05-07  8:01   ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 14/15] hw/misc: EDU: added PASID support Daniel Henrique Barboza
2024-05-07  9:06   ` Frank Chang
2024-03-07 16:03 ` [PATCH v2 15/15] hw/misc: EDU: add ATS/PRI capability Daniel Henrique Barboza
2024-05-07 15:32   ` Frank Chang [this message]
2024-05-16 13:59     ` Daniel Henrique Barboza
2024-05-10 11:14 ` [PATCH v2 00/15] riscv: QEMU RISC-V IOMMU Support Frank Chang
2024-05-20 16:26   ` Daniel Henrique Barboza

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CANzO1D3sxSqXS6x1WkQDpyZ=T9nLQwHJCJRm=2FJn5OtrVN_mw@mail.gmail.com' \
    --to=frank.chang@sifive.com \
    --cc=ajones@ventanamicro.com \
    --cc=alistair.francis@wdc.com \
    --cc=bmeng@tinylab.org \
    --cc=dbarboza@ventanamicro.com \
    --cc=liwei1518@gmail.com \
    --cc=palmer@rivosinc.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-riscv@nongnu.org \
    --cc=tjeznach@rivosinc.com \
    --cc=zhiwei_liu@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).