* [PATCH RFC v2 1/5] dma-mapping: replace zone_dma_bits by zone_dma_limit
2024-04-09 6:17 [PATCH RFC v2 0/5] arm64: support DMA zone starting above 4GB Baruch Siach
@ 2024-04-09 6:17 ` Baruch Siach
2024-06-18 18:26 ` Catalin Marinas
2024-04-09 6:17 ` [PATCH RFC v2 2/5] of: get dma area lower limit Baruch Siach
` (3 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Baruch Siach @ 2024-04-09 6:17 UTC (permalink / raw
To: Christoph Hellwig, Marek Szyprowski, Rob Herring, Saravana Kannan,
Catalin Marinas, Will Deacon
Cc: devicetree, Baruch Siach, Ramon Fried, linux-s390,
Petr Tesařík, linuxppc-dev, linux-kernel, iommu,
Elad Nachman, Robin Murphy, linux-arm-kernel
From: Catalin Marinas <catalin.marinas@arm.com>
Hardware DMA limit might not be power of 2. When RAM range starts above
0, say 4GB, DMA limit of 30 bits should end at 5GB. A single high bit
can not encode this limit.
Use direct phys_addr_t limit address for DMA zone limit.
Following commits will add explicit base address to DMA zone.
---
Catalin,
This is taken almost verbatim from your email:
https://lore.kernel.org/all/ZZ2HnHJV3gdzu1Aj@arm.com/
Would you provide your sign-off?
Thanks,
baruch
---
arch/arm64/mm/init.c | 32 ++++++++++----------------------
arch/powerpc/mm/mem.c | 9 ++++-----
arch/s390/mm/init.c | 2 +-
include/linux/dma-direct.h | 2 +-
kernel/dma/direct.c | 6 +++---
kernel/dma/pool.c | 2 +-
kernel/dma/swiotlb.c | 4 ++--
7 files changed, 22 insertions(+), 35 deletions(-)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 03efd86dce0a..00508c69ca9e 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -113,36 +113,24 @@ static void __init arch_reserve_crashkernel(void)
low_size, high);
}
-/*
- * Return the maximum physical address for a zone accessible by the given bits
- * limit. If DRAM starts above 32-bit, expand the zone to the maximum
- * available memory, otherwise cap it at 32-bit.
- */
-static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
+static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit)
{
- phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits);
- phys_addr_t phys_start = memblock_start_of_DRAM();
-
- if (phys_start > U32_MAX)
- zone_mask = PHYS_ADDR_MAX;
- else if (phys_start > zone_mask)
- zone_mask = U32_MAX;
-
- return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
+ return min(zone_limit, memblock_end_of_DRAM() - 1) + 1;
}
static void __init zone_sizes_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
- unsigned int __maybe_unused acpi_zone_dma_bits;
- unsigned int __maybe_unused dt_zone_dma_bits;
- phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32);
+ phys_addr_t __maybe_unused acpi_zone_dma_limit;
+ phys_addr_t __maybe_unused dt_zone_dma_limit;
+ phys_addr_t __maybe_unused dma32_phys_limit =
+ max_zone_phys(DMA_BIT_MASK(32));
#ifdef CONFIG_ZONE_DMA
- acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
- dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
- zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits);
- arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
+ acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address();
+ dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL);
+ zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit);
+ arm64_dma_phys_limit = max_zone_phys(zone_dma_limit);
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
#endif
#ifdef CONFIG_ZONE_DMA32
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 3a440004b97d..4d6f575fd354 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -214,7 +214,7 @@ static int __init mark_nonram_nosave(void)
* everything else. GFP_DMA32 page allocations automatically fall back to
* ZONE_DMA.
*
- * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * By using 31-bit unconditionally, we can exploit zone_dma_limit to inform the
* generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU
* anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
* ZONE_DMA.
@@ -250,13 +250,12 @@ void __init paging_init(void)
* powerbooks.
*/
if (IS_ENABLED(CONFIG_PPC32))
- zone_dma_bits = 30;
+ zone_dma_limit = DMA_BIT_MASK(30);
else
- zone_dma_bits = 31;
+ zone_dma_limit = DMA_BIT_MASK(31);
#ifdef CONFIG_ZONE_DMA
- max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (zone_dma_bits - PAGE_SHIFT));
+ max_zone_pfns[ZONE_DMA] = min(max_low_pfn, zone_dma_limit >> PAGE_SHIFT);
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f6391442c0c2..5feaa60933b7 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -95,7 +95,7 @@ void __init paging_init(void)
vmem_map_init();
sparse_init();
- zone_dma_bits = 31;
+ zone_dma_limit = DMA_BIT_MASK(31);
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 3eb3589ff43e..7cf76f1d3239 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -12,7 +12,7 @@
#include <linux/mem_encrypt.h>
#include <linux/swiotlb.h>
-extern unsigned int zone_dma_bits;
+extern phys_addr_t zone_dma_limit;
/*
* Record the mapping of CPU physical to DMA addresses for a given region.
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 4d543b1e9d57..3b2ebcd4f576 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -20,7 +20,7 @@
* it for entirely different regions. In that case the arch code needs to
* override the variable below for dma-direct to work properly.
*/
-unsigned int zone_dma_bits __ro_after_init = 24;
+phys_addr_t zone_dma_limit __ro_after_init = DMA_BIT_MASK(24);
static inline dma_addr_t phys_to_dma_direct(struct device *dev,
phys_addr_t phys)
@@ -59,7 +59,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit)
* zones.
*/
*phys_limit = dma_to_phys(dev, dma_limit);
- if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
+ if (*phys_limit <= zone_dma_limit)
return GFP_DMA;
if (*phys_limit <= DMA_BIT_MASK(32))
return GFP_DMA32;
@@ -584,7 +584,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
* part of the check.
*/
if (IS_ENABLED(CONFIG_ZONE_DMA))
- min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits));
+ min_mask = min_t(u64, min_mask, zone_dma_limit);
return mask >= phys_to_dma_unencrypted(dev, min_mask);
}
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index d10613eb0f63..410a7b40e496 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -70,7 +70,7 @@ static bool cma_in_zone(gfp_t gfp)
/* CMA can't cross zone boundaries, see cma_activate_area() */
end = cma_get_base(cma) + size - 1;
if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
- return end <= DMA_BIT_MASK(zone_dma_bits);
+ return end <= zone_dma_limit;
if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
return end <= DMA_BIT_MASK(32);
return true;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 86fe172b5958..96d6eee7d215 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -446,7 +446,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
if (!remap)
io_tlb_default_mem.can_grow = true;
if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA))
- io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits);
+ io_tlb_default_mem.phys_limit = zone_dma_limit;
else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32))
io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32);
else
@@ -625,7 +625,7 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
}
gfp &= ~GFP_ZONEMASK;
- if (phys_limit <= DMA_BIT_MASK(zone_dma_bits))
+ if (phys_limit <= zone_dma_limit)
gfp |= __GFP_DMA;
else if (phys_limit <= DMA_BIT_MASK(32))
gfp |= __GFP_DMA32;
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH RFC v2 1/5] dma-mapping: replace zone_dma_bits by zone_dma_limit
2024-04-09 6:17 ` [PATCH RFC v2 1/5] dma-mapping: replace zone_dma_bits by zone_dma_limit Baruch Siach
@ 2024-06-18 18:26 ` Catalin Marinas
0 siblings, 0 replies; 9+ messages in thread
From: Catalin Marinas @ 2024-06-18 18:26 UTC (permalink / raw
To: Baruch Siach
Cc: Rob Herring, linux-s390, Ramon Fried, Saravana Kannan, devicetree,
Petr Tesařík, Will Deacon, linuxppc-dev, linux-kernel,
iommu, Elad Nachman, Robin Murphy, Christoph Hellwig,
linux-arm-kernel, Marek Szyprowski
(finally getting around to looking at this series, sorry for the delay)
On Tue, Apr 09, 2024 at 09:17:54AM +0300, Baruch Siach wrote:
> From: Catalin Marinas <catalin.marinas@arm.com>
>
> Hardware DMA limit might not be power of 2. When RAM range starts above
> 0, say 4GB, DMA limit of 30 bits should end at 5GB. A single high bit
> can not encode this limit.
>
> Use direct phys_addr_t limit address for DMA zone limit.
>
> Following commits will add explicit base address to DMA zone.
>
> ---
> Catalin,
>
> This is taken almost verbatim from your email:
>
> https://lore.kernel.org/all/ZZ2HnHJV3gdzu1Aj@arm.com/
>
> Would you provide your sign-off?
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Thanks for writing a commit log. However, I think more work is needed.
See below.
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 03efd86dce0a..00508c69ca9e 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -113,36 +113,24 @@ static void __init arch_reserve_crashkernel(void)
> low_size, high);
> }
>
> -/*
> - * Return the maximum physical address for a zone accessible by the given bits
> - * limit. If DRAM starts above 32-bit, expand the zone to the maximum
> - * available memory, otherwise cap it at 32-bit.
> - */
> -static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
> +static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit)
> {
> - phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits);
> - phys_addr_t phys_start = memblock_start_of_DRAM();
> -
> - if (phys_start > U32_MAX)
> - zone_mask = PHYS_ADDR_MAX;
> - else if (phys_start > zone_mask)
> - zone_mask = U32_MAX;
> -
> - return min(zone_mask, memblock_end_of_DRAM() - 1) + 1;
> + return min(zone_limit, memblock_end_of_DRAM() - 1) + 1;
> }
>
> static void __init zone_sizes_init(void)
> {
> unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
> - unsigned int __maybe_unused acpi_zone_dma_bits;
> - unsigned int __maybe_unused dt_zone_dma_bits;
> - phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32);
> + phys_addr_t __maybe_unused acpi_zone_dma_limit;
> + phys_addr_t __maybe_unused dt_zone_dma_limit;
> + phys_addr_t __maybe_unused dma32_phys_limit =
> + max_zone_phys(DMA_BIT_MASK(32));
>
> #ifdef CONFIG_ZONE_DMA
> - acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
> - dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL));
> - zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits);
> - arm64_dma_phys_limit = max_zone_phys(zone_dma_bits);
> + acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address();
> + dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL);
> + zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit);
> + arm64_dma_phys_limit = max_zone_phys(zone_dma_limit);
> max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
> #endif
> #ifdef CONFIG_ZONE_DMA32
I think this goes wrong if zone_dma_limit ends up above 32-bit (e.g. no
restrictive dma-ranges properties) but the start of RAM is below 4G.
We'd simply reduce ZONE_DMA32 to zero and ZONE_DMA potentially covering
the whole RAM. Prior to this change, we capped zone_dma_bits to 32 via
min3(). I think we should maintain this cap if memblock_start_of_DRAM()
is below 4G.
We could fix this up in max_zone_phys() above:
if (memblock_start_of_DRAM() < U32_MAX)
zone_limit = min(U32_MAX, zone_limit);
return min(zone_limit, memblock_end_of_DRAM() - 1) + 1;
> diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
> index 4d543b1e9d57..3b2ebcd4f576 100644
> --- a/kernel/dma/direct.c
> +++ b/kernel/dma/direct.c
> @@ -20,7 +20,7 @@
> * it for entirely different regions. In that case the arch code needs to
> * override the variable below for dma-direct to work properly.
> */
> -unsigned int zone_dma_bits __ro_after_init = 24;
> +phys_addr_t zone_dma_limit __ro_after_init = DMA_BIT_MASK(24);
>
> static inline dma_addr_t phys_to_dma_direct(struct device *dev,
> phys_addr_t phys)
> @@ -59,7 +59,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit)
> * zones.
> */
> *phys_limit = dma_to_phys(dev, dma_limit);
> - if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
> + if (*phys_limit <= zone_dma_limit)
> return GFP_DMA;
> if (*phys_limit <= DMA_BIT_MASK(32))
> return GFP_DMA32;
It's worth noting that if ZONE_DMA ends up entirely above 32-bit, there
won't be any ZONE_DMA32. Thinking about it, this could be a potential
problem. For example, if a device has a 32-bit DMA mask and an offset
that lifts this into the 32-36G range, the above may fail to set
GFP_DMA32.
Actually, I think these checks can go wrong even with the current
implementation, assuming RAM below 4G and no DMA offsets. For example,
we have two devices, one with a coherent mask of 30 bits, the other 31
bits. zone_dma_bits would be set to the smaller of the two, so 30 bit
(as per of_dma_get_max_cpu_address()). For the second device, phys_limit
would be ((1 << 31) - 1) but that's higher than DMA_BIT_MASK(30) so we
fail to set GFP_DMA. We do set GFP_DMA32 because of the second test but
that's not sufficient since that's 32-bit rather than 31-bit as the
device needs. Similarly if we have some weird device with a 33-bit DMA
coherent mask but the RAM is addressed by more bits. We'd fail to set
GFP_DMA32.
Ignoring this patch, I think the checks above in mainline should be
something like:
if (*phys_limit < DMA_BIT_MASK(32))
return GFP_DMA;
if (*phys_limit < memblock_end_of_DRAM())
return GFP_DMA32;
IOW, zone_dma_bits is pretty useless for this check IMHO. It gives us
the minimum hence not sufficient to test for devices that fall between
ZONE_DMA and ZONE_DMA32 coherent masks.
With your series, the above test wouldn't work since we don't have a
zone_dma32_limit and zone_dma_limit is above DMA_BIT_MASK(32). We might
need to introduce zone_dma32_limit and maybe drop zone_dma_limit
altogether.
--
Catalin
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH RFC v2 2/5] of: get dma area lower limit
2024-04-09 6:17 [PATCH RFC v2 0/5] arm64: support DMA zone starting above 4GB Baruch Siach
2024-04-09 6:17 ` [PATCH RFC v2 1/5] dma-mapping: replace zone_dma_bits by zone_dma_limit Baruch Siach
@ 2024-04-09 6:17 ` Baruch Siach
2024-06-18 21:38 ` Catalin Marinas
2024-04-09 6:17 ` [PATCH RFC v2 3/5] of: unittest: add test for of_dma_get_cpu_limits() 'min' param Baruch Siach
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Baruch Siach @ 2024-04-09 6:17 UTC (permalink / raw
To: Christoph Hellwig, Marek Szyprowski, Rob Herring, Saravana Kannan,
Catalin Marinas, Will Deacon
Cc: devicetree, Baruch Siach, Ramon Fried, linux-s390,
Petr Tesařík, linuxppc-dev, linux-kernel, iommu,
Elad Nachman, Robin Murphy, linux-arm-kernel
of_dma_get_max_cpu_address() returns the highest CPU address that
devices can use for DMA. The implicit assumption is that all CPU
addresses below that limit are suitable for DMA. However the
'dma-ranges' property this code uses also encodes a lower limit for DMA
that is potentially non zero.
Rename to of_dma_get_cpu_limits(), and extend to retrieve also the lower
limit for the same 'dma-ranges' property describing the high limit.
Update callers of of_dma_get_max_cpu_address(). No functional change
intended.
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
---
arch/arm64/mm/init.c | 2 +-
drivers/of/address.c | 38 +++++++++++++++++++++++++++-----------
drivers/of/unittest.c | 8 ++++----
include/linux/of.h | 11 ++++++++---
4 files changed, 40 insertions(+), 19 deletions(-)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 00508c69ca9e..77e942ca578b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -128,7 +128,7 @@ static void __init zone_sizes_init(void)
#ifdef CONFIG_ZONE_DMA
acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address();
- dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL);
+ of_dma_get_cpu_limits(NULL, &dt_zone_dma_limit, NULL);
zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit);
arm64_dma_phys_limit = max_zone_phys(zone_dma_limit);
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
diff --git a/drivers/of/address.c b/drivers/of/address.c
index ae46a3605904..ac009b3bb63b 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -964,21 +964,25 @@ int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map)
#endif /* CONFIG_HAS_DMA */
/**
- * of_dma_get_max_cpu_address - Gets highest CPU address suitable for DMA
+ * of_dma_get_cpu_limits - Gets highest CPU address suitable for DMA
* @np: The node to start searching from or NULL to start from the root
+ * @max: Pointer to high address limit or NULL if not needed
+ * @min: Pointer to low address limit or NULL if not needed
*
* Gets the highest CPU physical address that is addressable by all DMA masters
- * in the sub-tree pointed by np, or the whole tree if NULL is passed. If no
- * DMA constrained device is found, it returns PHYS_ADDR_MAX.
+ * in the sub-tree pointed by np, or the whole tree if @np in NULL. If no
+ * DMA constrained device is found, @*max is PHYS_ADDR_MAX, and @*low is 0.
*/
-phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
+void __init of_dma_get_cpu_limits(struct device_node *np,
+ phys_addr_t *max, phys_addr_t *min)
{
phys_addr_t max_cpu_addr = PHYS_ADDR_MAX;
struct of_range_parser parser;
- phys_addr_t subtree_max_addr;
+ phys_addr_t min_cpu_addr = 0;
struct device_node *child;
struct of_range range;
const __be32 *ranges;
+ u64 cpu_start = 0;
u64 cpu_end = 0;
int len;
@@ -988,21 +992,33 @@ phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np)
ranges = of_get_property(np, "dma-ranges", &len);
if (ranges && len) {
of_dma_range_parser_init(&parser, np);
- for_each_of_range(&parser, &range)
- if (range.cpu_addr + range.size > cpu_end)
+ for_each_of_range(&parser, &range) {
+ if (range.cpu_addr + range.size > cpu_end) {
cpu_end = range.cpu_addr + range.size - 1;
+ cpu_start = range.cpu_addr;
+ }
+ }
- if (max_cpu_addr > cpu_end)
+ if (max_cpu_addr > cpu_end) {
max_cpu_addr = cpu_end;
+ min_cpu_addr = cpu_start;
+ }
}
for_each_available_child_of_node(np, child) {
- subtree_max_addr = of_dma_get_max_cpu_address(child);
- if (max_cpu_addr > subtree_max_addr)
+ phys_addr_t subtree_max_addr, subtree_min_addr;
+
+ of_dma_get_cpu_limits(child, &subtree_max_addr, &subtree_min_addr);
+ if (max_cpu_addr > subtree_max_addr) {
max_cpu_addr = subtree_max_addr;
+ min_cpu_addr = subtree_min_addr;
+ }
}
- return max_cpu_addr;
+ if (max)
+ *max = max_cpu_addr;
+ if (min)
+ *min = min_cpu_addr;
}
/**
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 6b5c36b6a758..2d632d4ec5b1 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -921,7 +921,7 @@ static void __init of_unittest_changeset(void)
#endif
}
-static void __init of_unittest_dma_get_max_cpu_address(void)
+static void __init of_unittest_dma_get_cpu_limits(void)
{
struct device_node *np;
phys_addr_t cpu_addr;
@@ -935,9 +935,9 @@ static void __init of_unittest_dma_get_max_cpu_address(void)
return;
}
- cpu_addr = of_dma_get_max_cpu_address(np);
+ of_dma_get_cpu_limits(np, &cpu_addr, NULL);
unittest(cpu_addr == 0x4fffffff,
- "of_dma_get_max_cpu_address: wrong CPU addr %pad (expecting %x)\n",
+ "of_dma_get_cpu_limits: wrong CPU addr %pad (expecting %x)\n",
&cpu_addr, 0x4fffffff);
}
@@ -4109,7 +4109,7 @@ static int __init of_unittest(void)
of_unittest_changeset();
of_unittest_parse_interrupts();
of_unittest_parse_interrupts_extended();
- of_unittest_dma_get_max_cpu_address();
+ of_unittest_dma_get_cpu_limits();
of_unittest_parse_dma_ranges();
of_unittest_pci_dma_ranges();
of_unittest_bus_ranges();
diff --git a/include/linux/of.h b/include/linux/of.h
index a0bedd038a05..7756441d3ce0 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -454,7 +454,8 @@ int of_map_id(struct device_node *np, u32 id,
const char *map_name, const char *map_mask_name,
struct device_node **target, u32 *id_out);
-phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);
+void of_dma_get_cpu_limits(struct device_node *np, phys_addr_t *max,
+ phys_addr_t *min);
struct kimage;
void *of_kexec_alloc_and_setup_fdt(const struct kimage *image,
@@ -880,9 +881,13 @@ static inline int of_map_id(struct device_node *np, u32 id,
return -EINVAL;
}
-static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
+static inline void of_dma_get_cpu_limits(struct device_node *np,
+ phys_addr_t *max, phys_addr_t *min)
{
- return PHYS_ADDR_MAX;
+ if (max)
+ *max = PHYS_ADDR_MAX;
+ if (min)
+ *min = 0;
}
static inline const void *of_device_get_match_data(const struct device *dev)
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH RFC v2 2/5] of: get dma area lower limit
2024-04-09 6:17 ` [PATCH RFC v2 2/5] of: get dma area lower limit Baruch Siach
@ 2024-06-18 21:38 ` Catalin Marinas
0 siblings, 0 replies; 9+ messages in thread
From: Catalin Marinas @ 2024-06-18 21:38 UTC (permalink / raw
To: Baruch Siach
Cc: Rob Herring, linux-s390, Ramon Fried, Saravana Kannan, devicetree,
Petr Tesařík, Will Deacon, linuxppc-dev, linux-kernel,
iommu, Elad Nachman, Robin Murphy, Christoph Hellwig,
linux-arm-kernel, Marek Szyprowski
On Tue, Apr 09, 2024 at 09:17:55AM +0300, Baruch Siach wrote:
> of_dma_get_max_cpu_address() returns the highest CPU address that
> devices can use for DMA. The implicit assumption is that all CPU
> addresses below that limit are suitable for DMA. However the
> 'dma-ranges' property this code uses also encodes a lower limit for DMA
> that is potentially non zero.
>
> Rename to of_dma_get_cpu_limits(), and extend to retrieve also the lower
> limit for the same 'dma-ranges' property describing the high limit.
I don't understand the reason for the lower limit. The way the Linux
zones work is that ZONE_DMA always starts from the start of the RAM. It
doesn't matter whether it's 0 or not, you'd not allocate below the start
of RAM anyway. If you have a device that cannot use the bottom of the
RAM, it is pretty broken and not supported by Linux.
I think you added this limit before we tried to move away from
zone_dma_bits to a non-power-of-two limit (zone_dma_limit). With the
latter, we no longer need tricks with the lower limit,
of_dma_get_max_cpu_address() should capture the smallest upper CPU
address limit supported by all devices (and that's where ZONE_DMA should
end).
--
Catalin
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH RFC v2 3/5] of: unittest: add test for of_dma_get_cpu_limits() 'min' param
2024-04-09 6:17 [PATCH RFC v2 0/5] arm64: support DMA zone starting above 4GB Baruch Siach
2024-04-09 6:17 ` [PATCH RFC v2 1/5] dma-mapping: replace zone_dma_bits by zone_dma_limit Baruch Siach
2024-04-09 6:17 ` [PATCH RFC v2 2/5] of: get dma area lower limit Baruch Siach
@ 2024-04-09 6:17 ` Baruch Siach
2024-04-09 6:17 ` [PATCH RFC v2 4/5] dma-direct: add base offset to zone_dma_bits Baruch Siach
2024-04-09 6:17 ` [PATCH RFC v2 5/5] arm64: mm: take DMA zone offset into account Baruch Siach
4 siblings, 0 replies; 9+ messages in thread
From: Baruch Siach @ 2024-04-09 6:17 UTC (permalink / raw
To: Christoph Hellwig, Marek Szyprowski, Rob Herring, Saravana Kannan,
Catalin Marinas, Will Deacon
Cc: devicetree, Baruch Siach, Ramon Fried, linux-s390,
Petr Tesařík, linuxppc-dev, linux-kernel, iommu,
Elad Nachman, Robin Murphy, linux-arm-kernel
Verify that of_dma_get_cpu_limits() sets this new parameter to the
expected result.
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
---
drivers/of/unittest.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 2d632d4ec5b1..8fabb445a62a 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -924,7 +924,7 @@ static void __init of_unittest_changeset(void)
static void __init of_unittest_dma_get_cpu_limits(void)
{
struct device_node *np;
- phys_addr_t cpu_addr;
+ phys_addr_t cpu_addr_max, cpu_addr_min;
if (!IS_ENABLED(CONFIG_OF_ADDRESS))
return;
@@ -935,10 +935,13 @@ static void __init of_unittest_dma_get_cpu_limits(void)
return;
}
- of_dma_get_cpu_limits(np, &cpu_addr, NULL);
- unittest(cpu_addr == 0x4fffffff,
- "of_dma_get_cpu_limits: wrong CPU addr %pad (expecting %x)\n",
- &cpu_addr, 0x4fffffff);
+ of_dma_get_cpu_limits(np, &cpu_addr_max, &cpu_addr_min);
+ unittest(cpu_addr_max == 0x4fffffff,
+ "of_dma_get_cpu_limits: wrong CPU max addr %pad (expecting %x)\n",
+ &cpu_addr_max, 0x4fffffff);
+ unittest(cpu_addr_min == 0x40000000,
+ "of_dma_get_cpu_limits: wrong CPU min addr %pad (expecting %x)\n",
+ &cpu_addr_min, 0x40000000);
}
static void __init of_unittest_dma_ranges_one(const char *path,
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC v2 4/5] dma-direct: add base offset to zone_dma_bits
2024-04-09 6:17 [PATCH RFC v2 0/5] arm64: support DMA zone starting above 4GB Baruch Siach
` (2 preceding siblings ...)
2024-04-09 6:17 ` [PATCH RFC v2 3/5] of: unittest: add test for of_dma_get_cpu_limits() 'min' param Baruch Siach
@ 2024-04-09 6:17 ` Baruch Siach
2024-06-18 21:40 ` Catalin Marinas
2024-04-09 6:17 ` [PATCH RFC v2 5/5] arm64: mm: take DMA zone offset into account Baruch Siach
4 siblings, 1 reply; 9+ messages in thread
From: Baruch Siach @ 2024-04-09 6:17 UTC (permalink / raw
To: Christoph Hellwig, Marek Szyprowski, Rob Herring, Saravana Kannan,
Catalin Marinas, Will Deacon
Cc: devicetree, Baruch Siach, Ramon Fried, linux-s390,
Petr Tesařík, linuxppc-dev, linux-kernel, iommu,
Elad Nachman, Robin Murphy, linux-arm-kernel
Current code using zone_dma_bits assume that all addresses range in the
bits mask are suitable for DMA. For some existing platforms this
assumption is not correct. DMA range might have non zero lower limit.
Add 'zone_dma_base' for platform code to set base address for DMA zone.
Rename the dma_direct_supported() local 'min_mask' variable to better
describe its use as limit.
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
---
include/linux/dma-direct.h | 1 +
kernel/dma/direct.c | 9 +++++----
kernel/dma/pool.c | 2 +-
kernel/dma/swiotlb.c | 4 ++--
4 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 7cf76f1d3239..dd0330cbef81 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -13,6 +13,7 @@
#include <linux/swiotlb.h>
extern phys_addr_t zone_dma_limit;
+extern phys_addr_t zone_dma_base;
/*
* Record the mapping of CPU physical to DMA addresses for a given region.
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 3b2ebcd4f576..92bb241645d6 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -21,6 +21,7 @@
* override the variable below for dma-direct to work properly.
*/
phys_addr_t zone_dma_limit __ro_after_init = DMA_BIT_MASK(24);
+phys_addr_t zone_dma_base __ro_after_init;
static inline dma_addr_t phys_to_dma_direct(struct device *dev,
phys_addr_t phys)
@@ -59,7 +60,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit)
* zones.
*/
*phys_limit = dma_to_phys(dev, dma_limit);
- if (*phys_limit <= zone_dma_limit)
+ if (*phys_limit <= zone_dma_base + zone_dma_limit)
return GFP_DMA;
if (*phys_limit <= DMA_BIT_MASK(32))
return GFP_DMA32;
@@ -567,7 +568,7 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
int dma_direct_supported(struct device *dev, u64 mask)
{
- u64 min_mask = (max_pfn - 1) << PAGE_SHIFT;
+ u64 min_limit = (max_pfn - 1) << PAGE_SHIFT;
/*
* Because 32-bit DMA masks are so common we expect every architecture
@@ -584,8 +585,8 @@ int dma_direct_supported(struct device *dev, u64 mask)
* part of the check.
*/
if (IS_ENABLED(CONFIG_ZONE_DMA))
- min_mask = min_t(u64, min_mask, zone_dma_limit);
- return mask >= phys_to_dma_unencrypted(dev, min_mask);
+ min_limit = min_t(u64, min_limit, zone_dma_base + zone_dma_limit);
+ return mask >= phys_to_dma_unencrypted(dev, min_limit);
}
/*
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 410a7b40e496..61a86f3d83ae 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -70,7 +70,7 @@ static bool cma_in_zone(gfp_t gfp)
/* CMA can't cross zone boundaries, see cma_activate_area() */
end = cma_get_base(cma) + size - 1;
if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
- return end <= zone_dma_limit;
+ return end <= zone_dma_base + zone_dma_limit;
if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
return end <= DMA_BIT_MASK(32);
return true;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 96d6eee7d215..814052df07c5 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -446,7 +446,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
if (!remap)
io_tlb_default_mem.can_grow = true;
if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA))
- io_tlb_default_mem.phys_limit = zone_dma_limit;
+ io_tlb_default_mem.phys_limit = zone_dma_base + zone_dma_limit;
else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32))
io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32);
else
@@ -625,7 +625,7 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
}
gfp &= ~GFP_ZONEMASK;
- if (phys_limit <= zone_dma_limit)
+ if (phys_limit <= zone_dma_base + zone_dma_limit)
gfp |= __GFP_DMA;
else if (phys_limit <= DMA_BIT_MASK(32))
gfp |= __GFP_DMA32;
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH RFC v2 4/5] dma-direct: add base offset to zone_dma_bits
2024-04-09 6:17 ` [PATCH RFC v2 4/5] dma-direct: add base offset to zone_dma_bits Baruch Siach
@ 2024-06-18 21:40 ` Catalin Marinas
0 siblings, 0 replies; 9+ messages in thread
From: Catalin Marinas @ 2024-06-18 21:40 UTC (permalink / raw
To: Baruch Siach
Cc: Rob Herring, linux-s390, Ramon Fried, Saravana Kannan, devicetree,
Petr Tesařík, Will Deacon, linuxppc-dev, linux-kernel,
iommu, Elad Nachman, Robin Murphy, Christoph Hellwig,
linux-arm-kernel, Marek Szyprowski
On Tue, Apr 09, 2024 at 09:17:57AM +0300, Baruch Siach wrote:
> Current code using zone_dma_bits assume that all addresses range in the
> bits mask are suitable for DMA. For some existing platforms this
> assumption is not correct. DMA range might have non zero lower limit.
[...]
> @@ -59,7 +60,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit)
> * zones.
> */
> *phys_limit = dma_to_phys(dev, dma_limit);
> - if (*phys_limit <= zone_dma_limit)
> + if (*phys_limit <= zone_dma_base + zone_dma_limit)
> return GFP_DMA;
> if (*phys_limit <= DMA_BIT_MASK(32))
> return GFP_DMA32;
As I said previously, we no longer have zone_dma_bits after the first
patch, so adding this limit no longer make sense. In v1, you wanted a
limit like 32G to be added to the 30-bit zone_dma_bits to give you 33G
upper limit for ZONE_DMA. But since the first patch sets zone_dma_limit
to 33G already, this is no longer needed.
--
Catalin
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH RFC v2 5/5] arm64: mm: take DMA zone offset into account
2024-04-09 6:17 [PATCH RFC v2 0/5] arm64: support DMA zone starting above 4GB Baruch Siach
` (3 preceding siblings ...)
2024-04-09 6:17 ` [PATCH RFC v2 4/5] dma-direct: add base offset to zone_dma_bits Baruch Siach
@ 2024-04-09 6:17 ` Baruch Siach
4 siblings, 0 replies; 9+ messages in thread
From: Baruch Siach @ 2024-04-09 6:17 UTC (permalink / raw
To: Christoph Hellwig, Marek Szyprowski, Rob Herring, Saravana Kannan,
Catalin Marinas, Will Deacon
Cc: devicetree, Baruch Siach, Ramon Fried, linux-s390,
Petr Tesařík, linuxppc-dev, linux-kernel, iommu,
Elad Nachman, Robin Murphy, linux-arm-kernel
Commit 791ab8b2e3db ("arm64: Ignore any DMA offsets in the
max_zone_phys() calculation") made DMA/DMA32 zones span the entire RAM
when RAM starts above 32-bits. This breaks hardware with DMA area that
start above 32-bits. But the commit log says that "we haven't noticed
any such hardware". It turns out that such hardware does exist.
One such platform has RAM starting at 32GB with an internal bus that has
the following DMA limits:
#address-cells = <2>;
#size-cells = <2>;
dma-ranges = <0x00 0xc0000000 0x08 0x00000000 0x00 0x40000000>;
Devices under this bus can see 1GB of DMA range between 3GB-4GB in each
device address space. This range is mapped to CPU memory at 32GB-33GB.
With current code DMA allocations for devices under this bus are not
limited to DMA area, leading to run-time allocation failure.
Modify 'zone_dma_bits' calculation (via dt_zone_dma_bits) to only cover
the actual DMA area starting at 'zone_dma_off'. Use the newly introduced
'min' parameter of of_dma_get_cpu_limits() to set 'zone_dma_off'.
DMA32 zone is useless in this configuration, so make its limit the same
as the DMA zone when the lower DMA limit is higher than 32-bits.
The result is DMA zone that properly reflects the hardware constraints
as follows:
[ 0.000000] Zone ranges:
[ 0.000000] DMA [mem 0x0000000800000000-0x000000083fffffff]
[ 0.000000] DMA32 empty
[ 0.000000] Normal [mem 0x0000000840000000-0x0000000bffffffff]
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
---
arch/arm64/mm/init.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 77e942ca578b..cd283ae0178d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -128,9 +128,11 @@ static void __init zone_sizes_init(void)
#ifdef CONFIG_ZONE_DMA
acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address();
- of_dma_get_cpu_limits(NULL, &dt_zone_dma_limit, NULL);
+ of_dma_get_cpu_limits(NULL, &dt_zone_dma_limit, &zone_dma_base);
zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit);
arm64_dma_phys_limit = max_zone_phys(zone_dma_limit);
+ if (zone_dma_base > U32_MAX)
+ dma32_phys_limit = arm64_dma_phys_limit;
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
#endif
#ifdef CONFIG_ZONE_DMA32
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread