LinuxPPC-Dev Archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity
@ 2024-04-24 11:09 Hari Bathini
  2024-04-24 11:09 ` [PATCH 2/2] radix/kfence: support late __kfence_pool allocation Hari Bathini
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Hari Bathini @ 2024-04-24 11:09 UTC (permalink / raw
  To: linuxppc-dev, Michael Ellerman
  Cc: Marco Elver, Aneesh Kumar K.V, Nicholas Piggin,
	Alexander Potapenko, Naveen N. Rao, Dmitry Vyukov

When KFENCE is enabled, total system memory is mapped at page level
granularity. But in radix MMU mode, ~3GB additional memory is needed
to map 100GB of system memory at page level granularity when compared
to using 2MB direct mapping. This is not desired considering KFENCE is
designed to be enabled in production kernels [1]. Also, mapping memory
allocated for KFENCE pool at page granularity seems sufficient enough
to enable KFENCE support. So, allocate __kfence_pool during bootup and
map it at page granularity instead of mapping all system memory at
page granularity.

Without patch:
    # cat /proc/meminfo
    MemTotal:       101201920 kB

With patch:
    # cat /proc/meminfo
    MemTotal:       104483904 kB

All kfence_test.c testcases passed with this patch.

[1] https://lore.kernel.org/all/20201103175841.3495947-2-elver@google.com/

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/include/asm/kfence.h        |  5 ++++
 arch/powerpc/mm/book3s64/radix_pgtable.c | 34 ++++++++++++++++++------
 arch/powerpc/mm/init_64.c                | 14 ++++++++++
 3 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
index 424ceef82ae6..18ec2b06ba1e 100644
--- a/arch/powerpc/include/asm/kfence.h
+++ b/arch/powerpc/include/asm/kfence.h
@@ -8,6 +8,7 @@
 #ifndef __ASM_POWERPC_KFENCE_H
 #define __ASM_POWERPC_KFENCE_H
 
+#include <linux/kfence.h>
 #include <linux/mm.h>
 #include <asm/pgtable.h>
 
@@ -15,6 +16,10 @@
 #define ARCH_FUNC_PREFIX "."
 #endif
 
+#ifdef CONFIG_KFENCE
+extern bool kfence_early_init;
+#endif
+
 static inline bool arch_kfence_init_pool(void)
 {
 	return true;
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 15e88f1439ec..fccbf92f279b 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -31,6 +31,7 @@
 #include <asm/uaccess.h>
 #include <asm/ultravisor.h>
 #include <asm/set_memory.h>
+#include <asm/kfence.h>
 
 #include <trace/events/thp.h>
 
@@ -291,9 +292,8 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
 	return end;
 }
 
-static int __meminit create_physical_mapping(unsigned long start,
-					     unsigned long end,
-					     int nid, pgprot_t _prot)
+static int __meminit create_physical_mapping(unsigned long start, unsigned long end, int nid,
+					     pgprot_t _prot, unsigned long mapping_sz_limit)
 {
 	unsigned long vaddr, addr, mapping_size = 0;
 	bool prev_exec, exec = false;
@@ -301,7 +301,10 @@ static int __meminit create_physical_mapping(unsigned long start,
 	int psize;
 	unsigned long max_mapping_size = memory_block_size;
 
-	if (debug_pagealloc_enabled_or_kfence())
+	if (mapping_sz_limit < max_mapping_size)
+		max_mapping_size = mapping_sz_limit;
+
+	if (debug_pagealloc_enabled())
 		max_mapping_size = PAGE_SIZE;
 
 	start = ALIGN(start, PAGE_SIZE);
@@ -358,6 +361,7 @@ static int __meminit create_physical_mapping(unsigned long start,
 
 static void __init radix_init_pgtable(void)
 {
+	phys_addr_t kfence_pool __maybe_unused;
 	unsigned long rts_field;
 	phys_addr_t start, end;
 	u64 i;
@@ -365,6 +369,13 @@ static void __init radix_init_pgtable(void)
 	/* We don't support slb for radix */
 	slb_set_size(0);
 
+#ifdef CONFIG_KFENCE
+	if (kfence_early_init) {
+		kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+	}
+#endif
+
 	/*
 	 * Create the linear mapping
 	 */
@@ -380,10 +391,18 @@ static void __init radix_init_pgtable(void)
 			continue;
 		}
 
-		WARN_ON(create_physical_mapping(start, end,
-						-1, PAGE_KERNEL));
+		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
 	}
 
+#ifdef CONFIG_KFENCE
+	if (kfence_early_init) {
+		create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, -1,
+					PAGE_KERNEL, PAGE_SIZE);
+		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+		__kfence_pool = __va(kfence_pool);
+	}
+#endif
+
 	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
 			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
 		/*
@@ -874,8 +893,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
 		return -1;
 	}
 
-	return create_physical_mapping(__pa(start), __pa(end),
-				       nid, prot);
+	return create_physical_mapping(__pa(start), __pa(end), nid, prot, ~0UL);
 }
 
 int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index d96bbc001e73..8155bfd6c16b 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -64,6 +64,20 @@
 
 #include <mm/mmu_decl.h>
 
+#ifdef CONFIG_KFENCE
+bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
+
+static int __init parse_kfence_early_init(char *arg)
+{
+	int val;
+
+	if (get_option(&arg, &val))
+		kfence_early_init = !!val;
+	return 0;
+}
+early_param("kfence.sample_interval", parse_kfence_early_init);
+#endif
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * Given an address within the vmemmap, determine the page that
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] radix/kfence: support late __kfence_pool allocation
  2024-04-24 11:09 [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity Hari Bathini
@ 2024-04-24 11:09 ` Hari Bathini
  2024-05-01  7:03   ` Ritesh Harjani
  2024-05-01  5:45 ` [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity Ritesh Harjani
  2024-05-07 12:33 ` Christophe Leroy
  2 siblings, 1 reply; 5+ messages in thread
From: Hari Bathini @ 2024-04-24 11:09 UTC (permalink / raw
  To: linuxppc-dev, Michael Ellerman
  Cc: Marco Elver, Aneesh Kumar K.V, Nicholas Piggin,
	Alexander Potapenko, Naveen N. Rao, Dmitry Vyukov

With commit b33f778bba5ef ("kfence: alloc kfence_pool after system
startup"), KFENCE pool can be allocated after system startup via the
page allocator. This can lead to problems as all memory is not mapped
at page granularity anymore with CONFIG_KFENCE. Address this by direct
mapping all memory at PMD level and split the mapping for PMD pages
that overlap with __kfence_pool to page level granularity if and when
__kfence_pool is allocated after system startup.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h |  2 +
 arch/powerpc/include/asm/kfence.h          | 14 +++++-
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 50 +++++++++++++++++++++-
 3 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 8f55ff74bb68..0423ddbcf73c 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -340,6 +340,8 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
 extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 				 pgprot_t flags, unsigned int psz);
 
+extern bool radix_kfence_init_pool(void);
+
 static inline unsigned long radix__get_tree_size(void)
 {
 	unsigned long rts_field;
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
index 18ec2b06ba1e..c5d2fb2f9ecb 100644
--- a/arch/powerpc/include/asm/kfence.h
+++ b/arch/powerpc/include/asm/kfence.h
@@ -18,12 +18,24 @@
 
 #ifdef CONFIG_KFENCE
 extern bool kfence_early_init;
-#endif
+
+static inline bool kfence_alloc_pool_late(void)
+{
+	return !kfence_early_init;
+}
 
 static inline bool arch_kfence_init_pool(void)
 {
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (radix_enabled())
+		return radix_kfence_init_pool();
+#endif
+
 	return true;
 }
+#else
+static inline bool kfence_alloc_pool_late(void) { return false; }
+#endif
 
 #ifdef CONFIG_PPC64
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index fccbf92f279b..f4374e3e31e1 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -253,6 +253,53 @@ void radix__mark_initmem_nx(void)
 }
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
+#ifdef CONFIG_KFENCE
+static inline int radix_split_pmd_page(pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte = pte_alloc_one_kernel(&init_mm);
+	unsigned long pfn = PFN_DOWN(__pa(addr));
+	int i;
+
+	if (!pte)
+		return -ENOMEM;
+
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		__set_pte_at(&init_mm, addr, pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0);
+		asm volatile("ptesync": : :"memory");
+	}
+	pmd_populate_kernel(&init_mm, pmd, pte);
+
+	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+	return 0;
+}
+
+bool radix_kfence_init_pool(void)
+{
+	unsigned int page_psize, pmd_psize;
+	unsigned long addr;
+	pmd_t *pmd;
+
+	if (!kfence_alloc_pool_late())
+		return true;
+
+	page_psize = shift_to_mmu_psize(PAGE_SHIFT);
+	pmd_psize = shift_to_mmu_psize(PMD_SHIFT);
+	for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+	     addr += PAGE_SIZE) {
+		pmd = pmd_off_k(addr);
+
+		if (pmd_leaf(*pmd)) {
+			if (radix_split_pmd_page(pmd, addr & PMD_MASK))
+				return false;
+			update_page_count(pmd_psize, -1);
+			update_page_count(page_psize, PTRS_PER_PTE);
+		}
+	}
+
+	return true;
+}
+#endif
+
 static inline void __meminit
 print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
 {
@@ -391,7 +438,8 @@ static void __init radix_init_pgtable(void)
 			continue;
 		}
 
-		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
+		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL,
+						kfence_alloc_pool_late() ? PMD_SIZE : ~0UL));
 	}
 
 #ifdef CONFIG_KFENCE
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity
  2024-04-24 11:09 [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity Hari Bathini
  2024-04-24 11:09 ` [PATCH 2/2] radix/kfence: support late __kfence_pool allocation Hari Bathini
@ 2024-05-01  5:45 ` Ritesh Harjani
  2024-05-07 12:33 ` Christophe Leroy
  2 siblings, 0 replies; 5+ messages in thread
From: Ritesh Harjani @ 2024-05-01  5:45 UTC (permalink / raw
  To: Hari Bathini, linuxppc-dev, Michael Ellerman
  Cc: Marco Elver, Aneesh Kumar K.V, Nicholas Piggin,
	Alexander Potapenko, Naveen N. Rao, Dmitry Vyukov

Hari Bathini <hbathini@linux.ibm.com> writes:

> When KFENCE is enabled, total system memory is mapped at page level
> granularity. But in radix MMU mode, ~3GB additional memory is needed
> to map 100GB of system memory at page level granularity when compared
> to using 2MB direct mapping. This is not desired considering KFENCE is
> designed to be enabled in production kernels [1]. Also, mapping memory
> allocated for KFENCE pool at page granularity seems sufficient enough
> to enable KFENCE support. So, allocate __kfence_pool during bootup and
> map it at page granularity instead of mapping all system memory at
> page granularity.
>
> Without patch:
>     # cat /proc/meminfo
>     MemTotal:       101201920 kB
>
> With patch:
>     # cat /proc/meminfo
>     MemTotal:       104483904 kB
>
> All kfence_test.c testcases passed with this patch.
>
> [1] https://lore.kernel.org/all/20201103175841.3495947-2-elver@google.com/
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/kfence.h        |  5 ++++
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 34 ++++++++++++++++++------
>  arch/powerpc/mm/init_64.c                | 14 ++++++++++

New at this. But the patch looked interesting, hence my review comments.

>  3 files changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 424ceef82ae6..18ec2b06ba1e 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -8,6 +8,7 @@
>  #ifndef __ASM_POWERPC_KFENCE_H
>  #define __ASM_POWERPC_KFENCE_H
>  
> +#include <linux/kfence.h>
>  #include <linux/mm.h>
>  #include <asm/pgtable.h>
>  
> @@ -15,6 +16,10 @@
>  #define ARCH_FUNC_PREFIX "."
>  #endif
>  
> +#ifdef CONFIG_KFENCE
> +extern bool kfence_early_init;
> +#endif
> +
>  static inline bool arch_kfence_init_pool(void)
>  {
>  	return true;

Shouldn't we return false for !kfence_early_init?
Because otherwise, this patch may break the late init case which your
next patch is fixing, and maybe git bisect will break?


> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 15e88f1439ec..fccbf92f279b 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -31,6 +31,7 @@
>  #include <asm/uaccess.h>
>  #include <asm/ultravisor.h>
>  #include <asm/set_memory.h>
> +#include <asm/kfence.h>
>  
>  #include <trace/events/thp.h>
>  
> @@ -291,9 +292,8 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
>  	return end;
>  }
>  
> -static int __meminit create_physical_mapping(unsigned long start,
> -					     unsigned long end,
> -					     int nid, pgprot_t _prot)
> +static int __meminit create_physical_mapping(unsigned long start, unsigned long end, int nid,
> +					     pgprot_t _prot, unsigned long mapping_sz_limit)

lines over 80 chars.

>  {
>  	unsigned long vaddr, addr, mapping_size = 0;
>  	bool prev_exec, exec = false;
> @@ -301,7 +301,10 @@ static int __meminit create_physical_mapping(unsigned long start,
>  	int psize;
>  	unsigned long max_mapping_size = memory_block_size;
>  
> -	if (debug_pagealloc_enabled_or_kfence())
> +	if (mapping_sz_limit < max_mapping_size)
> +		max_mapping_size = mapping_sz_limit;
> +
> +	if (debug_pagealloc_enabled())
>  		max_mapping_size = PAGE_SIZE;
>  
>  	start = ALIGN(start, PAGE_SIZE);
> @@ -358,6 +361,7 @@ static int __meminit create_physical_mapping(unsigned long start,
>  
>  static void __init radix_init_pgtable(void)
>  {
> +	phys_addr_t kfence_pool __maybe_unused;
>  	unsigned long rts_field;
>  	phys_addr_t start, end;
>  	u64 i;
> @@ -365,6 +369,13 @@ static void __init radix_init_pgtable(void)
>  	/* We don't support slb for radix */
>  	slb_set_size(0);
>  
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);

What if memblock_phys_alloc() failed? error handling?
> +		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +	}
> +#endif
> +

Instead of #ifdef CONFIG_KFENCE in the function,
maybe we can define radix_kfence_alloc_pool()? Then we won't need
__maybe_unused too.

>  	/*
>  	 * Create the linear mapping
>  	 */
> @@ -380,10 +391,18 @@ static void __init radix_init_pgtable(void)
>  			continue;
>  		}
>  
> -		WARN_ON(create_physical_mapping(start, end,
> -						-1, PAGE_KERNEL));
> +		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
>  	}
>  
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, -1,
> +					PAGE_KERNEL, PAGE_SIZE);

Even this can return an error. Maybe WARN_ON_ONCE()? or disabling kfence
for an error?

> +		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +		__kfence_pool = __va(kfence_pool);
> +	}
> +#endif
> +

This #ifdef can be called as radix_kfence_map_pool() then?


>  	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
>  			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
>  		/*
> @@ -874,8 +893,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
>  		return -1;
>  	}
>  
> -	return create_physical_mapping(__pa(start), __pa(end),
> -				       nid, prot);
> +	return create_physical_mapping(__pa(start), __pa(end), nid, prot, ~0UL);
>  }
>  
>  int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index d96bbc001e73..8155bfd6c16b 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -64,6 +64,20 @@
>  
>  #include <mm/mmu_decl.h>
>  
> +#ifdef CONFIG_KFENCE
> +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
> +
> +static int __init parse_kfence_early_init(char *arg)
> +{
> +	int val;
> +
> +	if (get_option(&arg, &val))
> +		kfence_early_init = !!val;
> +	return 0;
> +}
> +early_param("kfence.sample_interval", parse_kfence_early_init);
> +#endif
> +
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP
>  /*
>   * Given an address within the vmemmap, determine the page that
> -- 
> 2.44.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] radix/kfence: support late __kfence_pool allocation
  2024-04-24 11:09 ` [PATCH 2/2] radix/kfence: support late __kfence_pool allocation Hari Bathini
@ 2024-05-01  7:03   ` Ritesh Harjani
  0 siblings, 0 replies; 5+ messages in thread
From: Ritesh Harjani @ 2024-05-01  7:03 UTC (permalink / raw
  To: Hari Bathini, linuxppc-dev, Michael Ellerman
  Cc: Marco Elver, Aneesh Kumar K.V, Nicholas Piggin,
	Alexander Potapenko, Naveen N. Rao, Dmitry Vyukov

Hari Bathini <hbathini@linux.ibm.com> writes:

> With commit b33f778bba5ef ("kfence: alloc kfence_pool after system
> startup"), KFENCE pool can be allocated after system startup via the
> page allocator. This can lead to problems as all memory is not mapped
> at page granularity anymore with CONFIG_KFENCE. Address this by direct
> mapping all memory at PMD level and split the mapping for PMD pages
> that overlap with __kfence_pool to page level granularity if and when
> __kfence_pool is allocated after system startup.
>
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/radix.h |  2 +
>  arch/powerpc/include/asm/kfence.h          | 14 +++++-
>  arch/powerpc/mm/book3s64/radix_pgtable.c   | 50 +++++++++++++++++++++-
>  3 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 8f55ff74bb68..0423ddbcf73c 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -340,6 +340,8 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
>  extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
>  				 pgprot_t flags, unsigned int psz);
>  
> +extern bool radix_kfence_init_pool(void);
> +
>  static inline unsigned long radix__get_tree_size(void)
>  {
>  	unsigned long rts_field;
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 18ec2b06ba1e..c5d2fb2f9ecb 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -18,12 +18,24 @@
>  
>  #ifdef CONFIG_KFENCE
>  extern bool kfence_early_init;
> -#endif
> +
> +static inline bool kfence_alloc_pool_late(void)
> +{
> +	return !kfence_early_init;
> +}

Minor nit, but do we need kfence_alloc_pool_late()?
The function name looks confusing. Can we not just use
!kfence_early_init? If not then maybe bool kfence_late_init?

>  
>  static inline bool arch_kfence_init_pool(void)
>  {
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	if (radix_enabled())
> +		return radix_kfence_init_pool();

Can we directly check...
        if (radix_enabled() && !kfence_early_init)
... instead of embedding the check inside radix_kfence_late_init_pool()

> +#endif
> +
>  	return true;
>  }
> +#else
> +static inline bool kfence_alloc_pool_late(void) { return false; }
> +#endif
>  
>  #ifdef CONFIG_PPC64
>  static inline bool kfence_protect_page(unsigned long addr, bool protect)
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index fccbf92f279b..f4374e3e31e1 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -253,6 +253,53 @@ void radix__mark_initmem_nx(void)
>  }
>  #endif /* CONFIG_STRICT_KERNEL_RWX */
>  
> +#ifdef CONFIG_KFENCE
> +static inline int radix_split_pmd_page(pmd_t *pmd, unsigned long addr)
> +{
> +	pte_t *pte = pte_alloc_one_kernel(&init_mm);
> +	unsigned long pfn = PFN_DOWN(__pa(addr));

Minor nit. Since addr will always be page aligned, so maybe PHYS_PFN() is better
suited. Although it does not matter.

> +	int i;
> +
> +	if (!pte)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < PTRS_PER_PTE; i++) {
> +		__set_pte_at(&init_mm, addr, pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0);
> +		asm volatile("ptesync": : :"memory");
> +	}

Maybe a comment above the loop on why __set_pte_at() is ok for late
kfence init? and why not pte_update()? [1]

[1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r.fsf@linux.ibm.com/


> +	pmd_populate_kernel(&init_mm, pmd, pte);
> +
> +	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
> +	return 0;
> +}
> +
> +bool radix_kfence_init_pool(void)
> +{
> +	unsigned int page_psize, pmd_psize;
> +	unsigned long addr;
> +	pmd_t *pmd;
> +
> +	if (!kfence_alloc_pool_late())
> +		return true;
> +
> +	page_psize = shift_to_mmu_psize(PAGE_SHIFT);
> +	pmd_psize = shift_to_mmu_psize(PMD_SHIFT);
> +	for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
> +	     addr += PAGE_SIZE) {
> +		pmd = pmd_off_k(addr);
> +
> +		if (pmd_leaf(*pmd)) {
> +			if (radix_split_pmd_page(pmd, addr & PMD_MASK))
> +				return false;
> +			update_page_count(pmd_psize, -1);
> +			update_page_count(page_psize, PTRS_PER_PTE);
> +		}
> +	}
> +
> +	return true;
> +}
> +#endif
> +
>  static inline void __meminit
>  print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
>  {
> @@ -391,7 +438,8 @@ static void __init radix_init_pgtable(void)
>  			continue;
>  		}
>  
> -		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
> +		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL,
> +						kfence_alloc_pool_late() ? PMD_SIZE : ~0UL));

So everytime we have !kfence_early_init to true, we always use PMD_SIZE. 
So do we never map 1G mapping for direct map? 

>  	}
>  
>  #ifdef CONFIG_KFENCE
> -- 
> 2.44.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity
  2024-04-24 11:09 [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity Hari Bathini
  2024-04-24 11:09 ` [PATCH 2/2] radix/kfence: support late __kfence_pool allocation Hari Bathini
  2024-05-01  5:45 ` [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity Ritesh Harjani
@ 2024-05-07 12:33 ` Christophe Leroy
  2 siblings, 0 replies; 5+ messages in thread
From: Christophe Leroy @ 2024-05-07 12:33 UTC (permalink / raw
  To: Hari Bathini, linuxppc-dev, Michael Ellerman
  Cc: Marco Elver, Aneesh Kumar K.V, Nicholas Piggin,
	Alexander Potapenko, Naveen N. Rao, Dmitry Vyukov



Le 24/04/2024 à 13:09, Hari Bathini a écrit :
> When KFENCE is enabled, total system memory is mapped at page level
> granularity. But in radix MMU mode, ~3GB additional memory is needed
> to map 100GB of system memory at page level granularity when compared
> to using 2MB direct mapping. This is not desired considering KFENCE is
> designed to be enabled in production kernels [1]. Also, mapping memory
> allocated for KFENCE pool at page granularity seems sufficient enough
> to enable KFENCE support. So, allocate __kfence_pool during bootup and
> map it at page granularity instead of mapping all system memory at
> page granularity.


That seems to be more or less copied from ARM64 ? Is that the best 
approach ?

Can't you implement arch_kfence_init_pool() instead ?

Also, it seems your patch only addresses PPC64. The same should be done 
for PPC32 and there are probably parts that should be common.

> 
> Without patch:
>      # cat /proc/meminfo
>      MemTotal:       101201920 kB
> 
> With patch:
>      # cat /proc/meminfo
>      MemTotal:       104483904 kB
> 
> All kfence_test.c testcases passed with this patch.
> 
> [1] https://lore.kernel.org/all/20201103175841.3495947-2-elver@google.com/
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/kfence.h        |  5 ++++
>   arch/powerpc/mm/book3s64/radix_pgtable.c | 34 ++++++++++++++++++------
>   arch/powerpc/mm/init_64.c                | 14 ++++++++++
>   3 files changed, 45 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 424ceef82ae6..18ec2b06ba1e 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -8,6 +8,7 @@
>   #ifndef __ASM_POWERPC_KFENCE_H
>   #define __ASM_POWERPC_KFENCE_H
>   
> +#include <linux/kfence.h>

Why do you need that ? It can't be needed by the extern bool you are 
adding below.

If it is needed by some C file that includes asm/kfence.h, it should 
include linux/kfence.h by itself, see for instance 
mm/kfence/kfence_test.c and mm/kfence/core.c

>   #include <linux/mm.h>
>   #include <asm/pgtable.h>
>   
> @@ -15,6 +16,10 @@
>   #define ARCH_FUNC_PREFIX "."
>   #endif
>   
> +#ifdef CONFIG_KFENCE
> +extern bool kfence_early_init;
> +#endif
> +
>   static inline bool arch_kfence_init_pool(void)
>   {
>   	return true;
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 15e88f1439ec..fccbf92f279b 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -31,6 +31,7 @@
>   #include <asm/uaccess.h>
>   #include <asm/ultravisor.h>
>   #include <asm/set_memory.h>
> +#include <asm/kfence.h>
>   
>   #include <trace/events/thp.h>
>   
> @@ -291,9 +292,8 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
>   	return end;
>   }
>   
> -static int __meminit create_physical_mapping(unsigned long start,
> -					     unsigned long end,
> -					     int nid, pgprot_t _prot)
> +static int __meminit create_physical_mapping(unsigned long start, unsigned long end, int nid,
> +					     pgprot_t _prot, unsigned long mapping_sz_limit)
>   {
>   	unsigned long vaddr, addr, mapping_size = 0;
>   	bool prev_exec, exec = false;
> @@ -301,7 +301,10 @@ static int __meminit create_physical_mapping(unsigned long start,
>   	int psize;
>   	unsigned long max_mapping_size = memory_block_size;
>   
> -	if (debug_pagealloc_enabled_or_kfence())
> +	if (mapping_sz_limit < max_mapping_size)
> +		max_mapping_size = mapping_sz_limit;
> +
> +	if (debug_pagealloc_enabled())
>   		max_mapping_size = PAGE_SIZE;
>   
>   	start = ALIGN(start, PAGE_SIZE);
> @@ -358,6 +361,7 @@ static int __meminit create_physical_mapping(unsigned long start,
>   
>   static void __init radix_init_pgtable(void)
>   {
> +	phys_addr_t kfence_pool __maybe_unused;

Don't do that. Avoid using __maybe_unused.

Instead, declare this var where it is used.

>   	unsigned long rts_field;
>   	phys_addr_t start, end;
>   	u64 i;
> @@ -365,6 +369,13 @@ static void __init radix_init_pgtable(void)
>   	/* We don't support slb for radix */
>   	slb_set_size(0);
>   
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {

Declare kfence_pool here.

> +		kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
> +		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +	}
> +#endif
> +
>   	/*
>   	 * Create the linear mapping
>   	 */
> @@ -380,10 +391,18 @@ static void __init radix_init_pgtable(void)
>   			continue;
>   		}
>   
> -		WARN_ON(create_physical_mapping(start, end,
> -						-1, PAGE_KERNEL));
> +		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
>   	}
>   
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, -1,
> +					PAGE_KERNEL, PAGE_SIZE);
> +		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +		__kfence_pool = __va(kfence_pool);
> +	}
> +#endif
> +
>   	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
>   			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
>   		/*
> @@ -874,8 +893,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
>   		return -1;
>   	}
>   
> -	return create_physical_mapping(__pa(start), __pa(end),
> -				       nid, prot);
> +	return create_physical_mapping(__pa(start), __pa(end), nid, prot, ~0UL);
>   }
>   
>   int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index d96bbc001e73..8155bfd6c16b 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -64,6 +64,20 @@
>   
>   #include <mm/mmu_decl.h>
>   
> +#ifdef CONFIG_KFENCE
> +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
> +
> +static int __init parse_kfence_early_init(char *arg)
> +{
> +	int val;
> +
> +	if (get_option(&arg, &val))
> +		kfence_early_init = !!val;
> +	return 0;
> +}
> +early_param("kfence.sample_interval", parse_kfence_early_init);
> +#endif
> +
>   #ifdef CONFIG_SPARSEMEM_VMEMMAP
>   /*
>    * Given an address within the vmemmap, determine the page that

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-05-07 12:35 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-24 11:09 [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity Hari Bathini
2024-04-24 11:09 ` [PATCH 2/2] radix/kfence: support late __kfence_pool allocation Hari Bathini
2024-05-01  7:03   ` Ritesh Harjani
2024-05-01  5:45 ` [PATCH 1/2] radix/kfence: map __kfence_pool at page granularity Ritesh Harjani
2024-05-07 12:33 ` Christophe Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).