All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB
@ 2023-03-30 14:04 ` Shanker Donthineni
  0 siblings, 0 replies; 6+ messages in thread
From: Shanker Donthineni @ 2023-03-30 14:04 UTC (permalink / raw
  To: Catalin Marinas, Will Deacon, Jonathan Corbet, Ard Biesheuvel,
	Mark Rutland
  Cc: Anshuman Khandual, Kalesh Singh, Zhou Guanghui, linux-arm-kernel,
	linux-doc, linux-kernel, Shanker Donthineni, Vikram Sethi,
	Thierry Reding

The allocation of modules occurs in two regions. The first region
is MODULES_VSIZE, which is 128MB in size and shared with the core
kernel when the KASLR feature is unavailable or disabled through
a boot parameter. The second region, which is 2GB in size, is
shared with the other vmalloc callers. Depending on the size of
the core kernel, the 128MB region may quickly fill up after
loading a few modules, causing the system to switch to the 2GB
region. Unfortunately, even the 2GB region can run out of space
if previously loaded modules and other kernel subsystems consume
the entire area, leaving no space for additional modules.

This issue usually occurs when the system has a large number of
CPU cores, PCIe host-brigde controllers, and I/O devices. For
instance, the ECAM region of one host-bridge controller can use
up to 256MB of vmalloc space, while eight controllers can occupy
the entire 2GB.

To address this problem, a possible solution would be to increase
the MODULES_VSIZE to 2GB. This would improve the system's ability
to accommodate a greater number of dynamically loaded modules and
drivers when KASLR is not enabled. However, prior to switching to
the 2GB region, it is advisable to allocate modules within the
128MB space that covers the core kernel, in order to benefit from
the direct branches.

Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
---
Changes since v1:
 - Included Ard's recommendations.
 - Revised the commit message.

dmesg:
 On a NVIDIA T241 system with Ubuntu-22.04, hitting boot failures
 due to vmalloc/vmap allocation errors when loading modules.

 [   64.181308] ipmi_ssif: IPMI SSIF Interface driver
 [   64.184494] usbcore: registered new interface driver r8152
 [   64.242492] vmap allocation for size 393216 failed: use vmalloc=<size> to increase size
 [   64.242499] systemd-udevd: vmalloc error: size 327680, vm_struct allocation failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-3
 [   64.242510] CPU: 32 PID: 2910 Comm: systemd-udevd Tainted: G           OE      6.2-generic-64k 
 [   64.242513] Hardware name: NVIDIA T241, BIOS v1.1.0 2023-03-18T21:32:31+00:00
 [   64.242515] Call trace:
 [   64.242516]  dump_backtrace+0xe0/0x130
 [   64.242523]  show_stack+0x20/0x60
 [   64.242525]  dump_stack_lvl+0x68/0x84
 [   64.242530]  dump_stack+0x18/0x34
 [   64.242532]  warn_alloc+0x11c/0x1b0
 [   64.242537]  __vmalloc_node_range+0xe0/0x20c
 [   64.242540]  module_alloc+0x118/0x160
 [   64.242543]  move_module+0x2c/0x190
 [   64.242546]  layout_and_allocate+0xfc/0x160
 [   64.242548]  load_module+0x260/0xbc4
 [   64.242549]  __do_sys_finit_module+0xac/0x130
 [   64.242551]  __arm64_sys_finit_module+0x28/0x34
 [   64.242552]  invoke_syscall+0x78/0x100
 [   64.242553]  el0_svc_common.constprop.0+0x170/0x194
 [   64.242555]  do_el0_svc+0x38/0x4c
 [   64.242556]  el0_svc+0x2c/0xc0
 [   64.242558]  el0t_64_sync_handler+0xbc/0x13c
 [   64.242560]  el0t_64_sync+0x1a0/0x1a4

 Documentation/arm64/memory.rst  | 8 ++++----
 arch/arm64/include/asm/memory.h | 2 +-
 arch/arm64/kernel/module.c      | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index 2a641ba7be3b..55a55f30eed8 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
   0000000000000000	0000ffffffffffff	 256TB		user
   ffff000000000000	ffff7fffffffffff	 128TB		kernel logical memory map
  [ffff600000000000	ffff7fffffffffff]	  32TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
@@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
   0000000000000000	000fffffffffffff	   4PB		user
   fff0000000000000	ffff7fffffffffff	  ~4PB		kernel logical memory map
  [fffd800000000000	ffff7fffffffffff]	 512TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 78e5163836a0..b58c3127323e 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
 #define KIMAGE_VADDR		(MODULES_END)
 #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR		(_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE		(SZ_128M)
+#define MODULES_VSIZE		(SZ_2G)
 #define VMEMMAP_START		(-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
 #define VMEMMAP_END		(VMEMMAP_START + VMEMMAP_SIZE)
 #define PCI_IO_END		(VMEMMAP_START - SZ_8M)
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975caeb5..b4affe775f23 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
 		/* don't exceed the static module region - see below */
 		module_alloc_end = MODULES_END;
 
-	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_end - SZ_128M,
 				module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
-- 
2.25.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB
@ 2023-03-30 14:04 ` Shanker Donthineni
  0 siblings, 0 replies; 6+ messages in thread
From: Shanker Donthineni @ 2023-03-30 14:04 UTC (permalink / raw
  To: Catalin Marinas, Will Deacon, Jonathan Corbet, Ard Biesheuvel,
	Mark Rutland
  Cc: Anshuman Khandual, Kalesh Singh, Zhou Guanghui, linux-arm-kernel,
	linux-doc, linux-kernel, Shanker Donthineni, Vikram Sethi,
	Thierry Reding

The allocation of modules occurs in two regions. The first region
is MODULES_VSIZE, which is 128MB in size and shared with the core
kernel when the KASLR feature is unavailable or disabled through
a boot parameter. The second region, which is 2GB in size, is
shared with the other vmalloc callers. Depending on the size of
the core kernel, the 128MB region may quickly fill up after
loading a few modules, causing the system to switch to the 2GB
region. Unfortunately, even the 2GB region can run out of space
if previously loaded modules and other kernel subsystems consume
the entire area, leaving no space for additional modules.

This issue usually occurs when the system has a large number of
CPU cores, PCIe host-brigde controllers, and I/O devices. For
instance, the ECAM region of one host-bridge controller can use
up to 256MB of vmalloc space, while eight controllers can occupy
the entire 2GB.

To address this problem, a possible solution would be to increase
the MODULES_VSIZE to 2GB. This would improve the system's ability
to accommodate a greater number of dynamically loaded modules and
drivers when KASLR is not enabled. However, prior to switching to
the 2GB region, it is advisable to allocate modules within the
128MB space that covers the core kernel, in order to benefit from
the direct branches.

Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
---
Changes since v1:
 - Included Ard's recommendations.
 - Revised the commit message.

dmesg:
 On a NVIDIA T241 system with Ubuntu-22.04, hitting boot failures
 due to vmalloc/vmap allocation errors when loading modules.

 [   64.181308] ipmi_ssif: IPMI SSIF Interface driver
 [   64.184494] usbcore: registered new interface driver r8152
 [   64.242492] vmap allocation for size 393216 failed: use vmalloc=<size> to increase size
 [   64.242499] systemd-udevd: vmalloc error: size 327680, vm_struct allocation failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-3
 [   64.242510] CPU: 32 PID: 2910 Comm: systemd-udevd Tainted: G           OE      6.2-generic-64k 
 [   64.242513] Hardware name: NVIDIA T241, BIOS v1.1.0 2023-03-18T21:32:31+00:00
 [   64.242515] Call trace:
 [   64.242516]  dump_backtrace+0xe0/0x130
 [   64.242523]  show_stack+0x20/0x60
 [   64.242525]  dump_stack_lvl+0x68/0x84
 [   64.242530]  dump_stack+0x18/0x34
 [   64.242532]  warn_alloc+0x11c/0x1b0
 [   64.242537]  __vmalloc_node_range+0xe0/0x20c
 [   64.242540]  module_alloc+0x118/0x160
 [   64.242543]  move_module+0x2c/0x190
 [   64.242546]  layout_and_allocate+0xfc/0x160
 [   64.242548]  load_module+0x260/0xbc4
 [   64.242549]  __do_sys_finit_module+0xac/0x130
 [   64.242551]  __arm64_sys_finit_module+0x28/0x34
 [   64.242552]  invoke_syscall+0x78/0x100
 [   64.242553]  el0_svc_common.constprop.0+0x170/0x194
 [   64.242555]  do_el0_svc+0x38/0x4c
 [   64.242556]  el0_svc+0x2c/0xc0
 [   64.242558]  el0t_64_sync_handler+0xbc/0x13c
 [   64.242560]  el0t_64_sync+0x1a0/0x1a4

 Documentation/arm64/memory.rst  | 8 ++++----
 arch/arm64/include/asm/memory.h | 2 +-
 arch/arm64/kernel/module.c      | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index 2a641ba7be3b..55a55f30eed8 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
   0000000000000000	0000ffffffffffff	 256TB		user
   ffff000000000000	ffff7fffffffffff	 128TB		kernel logical memory map
  [ffff600000000000	ffff7fffffffffff]	  32TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
@@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
   0000000000000000	000fffffffffffff	   4PB		user
   fff0000000000000	ffff7fffffffffff	  ~4PB		kernel logical memory map
  [fffd800000000000	ffff7fffffffffff]	 512TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 78e5163836a0..b58c3127323e 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
 #define KIMAGE_VADDR		(MODULES_END)
 #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR		(_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE		(SZ_128M)
+#define MODULES_VSIZE		(SZ_2G)
 #define VMEMMAP_START		(-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
 #define VMEMMAP_END		(VMEMMAP_START + VMEMMAP_SIZE)
 #define PCI_IO_END		(VMEMMAP_START - SZ_8M)
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975caeb5..b4affe775f23 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
 		/* don't exceed the static module region - see below */
 		module_alloc_end = MODULES_END;
 
-	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_end - SZ_128M,
 				module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB
  2023-03-30 14:04 ` Shanker Donthineni
@ 2023-03-31 21:48   ` Shanker Donthineni
  -1 siblings, 0 replies; 6+ messages in thread
From: Shanker Donthineni @ 2023-03-31 21:48 UTC (permalink / raw
  To: Catalin Marinas, Will Deacon, Jonathan Corbet, Ard Biesheuvel,
	Mark Rutland
  Cc: Anshuman Khandual, Kalesh Singh, Zhou Guanghui, linux-arm-kernel,
	linux-doc, linux-kernel, Vikram Sethi, Thierry Reding

Hi Ard,

On 3/30/23 09:04, Shanker Donthineni wrote:
> The allocation of modules occurs in two regions. The first region
> is MODULES_VSIZE, which is 128MB in size and shared with the core
> kernel when the KASLR feature is unavailable or disabled through
> a boot parameter. The second region, which is 2GB in size, is
> shared with the other vmalloc callers. Depending on the size of
> the core kernel, the 128MB region may quickly fill up after
> loading a few modules, causing the system to switch to the 2GB
> region. Unfortunately, even the 2GB region can run out of space
> if previously loaded modules and other kernel subsystems consume
> the entire area, leaving no space for additional modules.
> 
> This issue usually occurs when the system has a large number of
> CPU cores, PCIe host-brigde controllers, and I/O devices. For
> instance, the ECAM region of one host-bridge controller can use
> up to 256MB of vmalloc space, while eight controllers can occupy
> the entire 2GB.
> 
> To address this problem, a possible solution would be to increase
> the MODULES_VSIZE to 2GB. This would improve the system's ability
> to accommodate a greater number of dynamically loaded modules and
> drivers when KASLR is not enabled. However, prior to switching to
> the 2GB region, it is advisable to allocate modules within the
> 128MB space that covers the core kernel, in order to benefit from
> the direct branches.
> 
> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
> ---
> Changes since v1:
>   - Included Ard's recommendations.
>   - Revised the commit message.
> 
> dmesg:
>   On a NVIDIA T241 system with Ubuntu-22.04, hitting boot failures
>   due to vmalloc/vmap allocation errors when loading modules.
> 
>   [   64.181308] ipmi_ssif: IPMI SSIF Interface driver
>   [   64.184494] usbcore: registered new interface driver r8152
>   [   64.242492] vmap allocation for size 393216 failed: use vmalloc=<size> to increase size
>   [   64.242499] systemd-udevd: vmalloc error: size 327680, vm_struct allocation failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-3
>   [   64.242510] CPU: 32 PID: 2910 Comm: systemd-udevd Tainted: G           OE      6.2-generic-64k
>   [   64.242513] Hardware name: NVIDIA T241, BIOS v1.1.0 2023-03-18T21:32:31+00:00
>   [   64.242515] Call trace:
>   [   64.242516]  dump_backtrace+0xe0/0x130
>   [   64.242523]  show_stack+0x20/0x60
>   [   64.242525]  dump_stack_lvl+0x68/0x84
>   [   64.242530]  dump_stack+0x18/0x34
>   [   64.242532]  warn_alloc+0x11c/0x1b0
>   [   64.242537]  __vmalloc_node_range+0xe0/0x20c
>   [   64.242540]  module_alloc+0x118/0x160
>   [   64.242543]  move_module+0x2c/0x190
>   [   64.242546]  layout_and_allocate+0xfc/0x160
>   [   64.242548]  load_module+0x260/0xbc4
>   [   64.242549]  __do_sys_finit_module+0xac/0x130
>   [   64.242551]  __arm64_sys_finit_module+0x28/0x34
>   [   64.242552]  invoke_syscall+0x78/0x100
>   [   64.242553]  el0_svc_common.constprop.0+0x170/0x194
>   [   64.242555]  do_el0_svc+0x38/0x4c
>   [   64.242556]  el0_svc+0x2c/0xc0
>   [   64.242558]  el0t_64_sync_handler+0xbc/0x13c
>   [   64.242560]  el0t_64_sync+0x1a0/0x1a4
> 
>   Documentation/arm64/memory.rst  | 8 ++++----
>   arch/arm64/include/asm/memory.h | 2 +-
>   arch/arm64/kernel/module.c      | 2 +-
>   3 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
> index 2a641ba7be3b..55a55f30eed8 100644
> --- a/Documentation/arm64/memory.rst
> +++ b/Documentation/arm64/memory.rst
> @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
>     0000000000000000	0000ffffffffffff	 256TB		user
>     ffff000000000000	ffff7fffffffffff	 128TB		kernel logical memory map
>    [ffff600000000000	ffff7fffffffffff]	  32TB		[kasan shadow region]
> -  ffff800000000000	ffff800007ffffff	 128MB		modules
> -  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
> +  ffff800000000000	ffff80007fffffff	   2GB		modules
> +  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
>     fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
>     fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
>     fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
> @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
>     0000000000000000	000fffffffffffff	   4PB		user
>     fff0000000000000	ffff7fffffffffff	  ~4PB		kernel logical memory map
>    [fffd800000000000	ffff7fffffffffff]	 512TB		[kasan shadow region]
> -  ffff800000000000	ffff800007ffffff	 128MB		modules
> -  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
> +  ffff800000000000	ffff80007fffffff	   2GB		modules
> +  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
>     fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
>     fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
>     fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
> index 78e5163836a0..b58c3127323e 100644
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -46,7 +46,7 @@
>   #define KIMAGE_VADDR		(MODULES_END)
>   #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
>   #define MODULES_VADDR		(_PAGE_END(VA_BITS_MIN))
> -#define MODULES_VSIZE		(SZ_128M)
> +#define MODULES_VSIZE		(SZ_2G)
>   #define VMEMMAP_START		(-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
>   #define VMEMMAP_END		(VMEMMAP_START + VMEMMAP_SIZE)
>   #define PCI_IO_END		(VMEMMAP_START - SZ_8M)
> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 5af4975caeb5..b4affe775f23 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
>   		/* don't exceed the static module region - see below */
>   		module_alloc_end = MODULES_END;
>   
> -	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
> +	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_end - SZ_128M,
>   				module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
>   				NUMA_NO_NODE, __builtin_return_address(0));
>   
Occasionally encountering the "overflow in relocation type 261" error suggests that the
expression 'module_alloc_end - SZ_128M' may be outside the 2GB range starting from the
beginning of _stext. Used the following code to resolve the issue And also enable
randomization of module base within 128MB if CONFIG_RANDOMIZE_MODULE_REGION_FULL is not
defined.


--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
  #define KIMAGE_VADDR           (MODULES_END)
  #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
  #define MODULES_VADDR          (_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE          (SZ_128M)
+#define MODULES_VSIZE          (SZ_2G)
  #define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
  #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
  #define PCI_IO_END             (VMEMMAP_START - SZ_8M)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index e7477f21a4c9..4f92fc511b85 100644
@@ -70,18 +70,19 @@ static int __init kaslr_init(void)
                  * resolved normally.)
                  */
                 module_range = SZ_2G - (u64)(_end - _stext);
-               module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
+               module_alloc_base = max((u64)_etext - SZ_2G, (u64)MODULES_VADDR);
         } else {
                 /*
                  * Randomize the module region by setting module_alloc_base to
-                * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+                * a PAGE_SIZE multiple in the range [_etext - SZ_128M,
                  * _stext) . This guarantees that the resulting region still
                  * covers [_stext, _etext], and that all relative branches can
                  * be resolved without veneers unless this region is exhausted
                  * and we fall back to a larger 2GB window in module_alloc()
                  * when ARM64_MODULE_PLTS is enabled.
                  */
-               module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+               module_range = SZ_128M - (u64)(_etext - _stext);
+               module_alloc_base = (u64)_etext - SZ_128M;
         }

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975caeb5..b4affe775f23 100644
@@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
                 /* don't exceed the static module region - see below */
                 module_alloc_end = MODULES_END;

-       p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+       p = __vmalloc_node_range(size, MODULE_ALIGN, kaslr_enabled() ?
+                               module_alloc_base : module_alloc_end - SZ_128M,
                                 module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
                                 NUMA_NO_NODE, __builtin_return_address(0));



_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB
@ 2023-03-31 21:48   ` Shanker Donthineni
  0 siblings, 0 replies; 6+ messages in thread
From: Shanker Donthineni @ 2023-03-31 21:48 UTC (permalink / raw
  To: Catalin Marinas, Will Deacon, Jonathan Corbet, Ard Biesheuvel,
	Mark Rutland
  Cc: Anshuman Khandual, Kalesh Singh, Zhou Guanghui, linux-arm-kernel,
	linux-doc, linux-kernel, Vikram Sethi, Thierry Reding

Hi Ard,

On 3/30/23 09:04, Shanker Donthineni wrote:
> The allocation of modules occurs in two regions. The first region
> is MODULES_VSIZE, which is 128MB in size and shared with the core
> kernel when the KASLR feature is unavailable or disabled through
> a boot parameter. The second region, which is 2GB in size, is
> shared with the other vmalloc callers. Depending on the size of
> the core kernel, the 128MB region may quickly fill up after
> loading a few modules, causing the system to switch to the 2GB
> region. Unfortunately, even the 2GB region can run out of space
> if previously loaded modules and other kernel subsystems consume
> the entire area, leaving no space for additional modules.
> 
> This issue usually occurs when the system has a large number of
> CPU cores, PCIe host-brigde controllers, and I/O devices. For
> instance, the ECAM region of one host-bridge controller can use
> up to 256MB of vmalloc space, while eight controllers can occupy
> the entire 2GB.
> 
> To address this problem, a possible solution would be to increase
> the MODULES_VSIZE to 2GB. This would improve the system's ability
> to accommodate a greater number of dynamically loaded modules and
> drivers when KASLR is not enabled. However, prior to switching to
> the 2GB region, it is advisable to allocate modules within the
> 128MB space that covers the core kernel, in order to benefit from
> the direct branches.
> 
> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
> ---
> Changes since v1:
>   - Included Ard's recommendations.
>   - Revised the commit message.
> 
> dmesg:
>   On a NVIDIA T241 system with Ubuntu-22.04, hitting boot failures
>   due to vmalloc/vmap allocation errors when loading modules.
> 
>   [   64.181308] ipmi_ssif: IPMI SSIF Interface driver
>   [   64.184494] usbcore: registered new interface driver r8152
>   [   64.242492] vmap allocation for size 393216 failed: use vmalloc=<size> to increase size
>   [   64.242499] systemd-udevd: vmalloc error: size 327680, vm_struct allocation failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-3
>   [   64.242510] CPU: 32 PID: 2910 Comm: systemd-udevd Tainted: G           OE      6.2-generic-64k
>   [   64.242513] Hardware name: NVIDIA T241, BIOS v1.1.0 2023-03-18T21:32:31+00:00
>   [   64.242515] Call trace:
>   [   64.242516]  dump_backtrace+0xe0/0x130
>   [   64.242523]  show_stack+0x20/0x60
>   [   64.242525]  dump_stack_lvl+0x68/0x84
>   [   64.242530]  dump_stack+0x18/0x34
>   [   64.242532]  warn_alloc+0x11c/0x1b0
>   [   64.242537]  __vmalloc_node_range+0xe0/0x20c
>   [   64.242540]  module_alloc+0x118/0x160
>   [   64.242543]  move_module+0x2c/0x190
>   [   64.242546]  layout_and_allocate+0xfc/0x160
>   [   64.242548]  load_module+0x260/0xbc4
>   [   64.242549]  __do_sys_finit_module+0xac/0x130
>   [   64.242551]  __arm64_sys_finit_module+0x28/0x34
>   [   64.242552]  invoke_syscall+0x78/0x100
>   [   64.242553]  el0_svc_common.constprop.0+0x170/0x194
>   [   64.242555]  do_el0_svc+0x38/0x4c
>   [   64.242556]  el0_svc+0x2c/0xc0
>   [   64.242558]  el0t_64_sync_handler+0xbc/0x13c
>   [   64.242560]  el0t_64_sync+0x1a0/0x1a4
> 
>   Documentation/arm64/memory.rst  | 8 ++++----
>   arch/arm64/include/asm/memory.h | 2 +-
>   arch/arm64/kernel/module.c      | 2 +-
>   3 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
> index 2a641ba7be3b..55a55f30eed8 100644
> --- a/Documentation/arm64/memory.rst
> +++ b/Documentation/arm64/memory.rst
> @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
>     0000000000000000	0000ffffffffffff	 256TB		user
>     ffff000000000000	ffff7fffffffffff	 128TB		kernel logical memory map
>    [ffff600000000000	ffff7fffffffffff]	  32TB		[kasan shadow region]
> -  ffff800000000000	ffff800007ffffff	 128MB		modules
> -  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
> +  ffff800000000000	ffff80007fffffff	   2GB		modules
> +  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
>     fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
>     fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
>     fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
> @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
>     0000000000000000	000fffffffffffff	   4PB		user
>     fff0000000000000	ffff7fffffffffff	  ~4PB		kernel logical memory map
>    [fffd800000000000	ffff7fffffffffff]	 512TB		[kasan shadow region]
> -  ffff800000000000	ffff800007ffffff	 128MB		modules
> -  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
> +  ffff800000000000	ffff80007fffffff	   2GB		modules
> +  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
>     fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
>     fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
>     fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
> index 78e5163836a0..b58c3127323e 100644
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -46,7 +46,7 @@
>   #define KIMAGE_VADDR		(MODULES_END)
>   #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
>   #define MODULES_VADDR		(_PAGE_END(VA_BITS_MIN))
> -#define MODULES_VSIZE		(SZ_128M)
> +#define MODULES_VSIZE		(SZ_2G)
>   #define VMEMMAP_START		(-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
>   #define VMEMMAP_END		(VMEMMAP_START + VMEMMAP_SIZE)
>   #define PCI_IO_END		(VMEMMAP_START - SZ_8M)
> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 5af4975caeb5..b4affe775f23 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
>   		/* don't exceed the static module region - see below */
>   		module_alloc_end = MODULES_END;
>   
> -	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
> +	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_end - SZ_128M,
>   				module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
>   				NUMA_NO_NODE, __builtin_return_address(0));
>   
Occasionally encountering the "overflow in relocation type 261" error suggests that the
expression 'module_alloc_end - SZ_128M' may be outside the 2GB range starting from the
beginning of _stext. Used the following code to resolve the issue And also enable
randomization of module base within 128MB if CONFIG_RANDOMIZE_MODULE_REGION_FULL is not
defined.


--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
  #define KIMAGE_VADDR           (MODULES_END)
  #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
  #define MODULES_VADDR          (_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE          (SZ_128M)
+#define MODULES_VSIZE          (SZ_2G)
  #define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
  #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
  #define PCI_IO_END             (VMEMMAP_START - SZ_8M)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index e7477f21a4c9..4f92fc511b85 100644
@@ -70,18 +70,19 @@ static int __init kaslr_init(void)
                  * resolved normally.)
                  */
                 module_range = SZ_2G - (u64)(_end - _stext);
-               module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
+               module_alloc_base = max((u64)_etext - SZ_2G, (u64)MODULES_VADDR);
         } else {
                 /*
                  * Randomize the module region by setting module_alloc_base to
-                * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+                * a PAGE_SIZE multiple in the range [_etext - SZ_128M,
                  * _stext) . This guarantees that the resulting region still
                  * covers [_stext, _etext], and that all relative branches can
                  * be resolved without veneers unless this region is exhausted
                  * and we fall back to a larger 2GB window in module_alloc()
                  * when ARM64_MODULE_PLTS is enabled.
                  */
-               module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+               module_range = SZ_128M - (u64)(_etext - _stext);
+               module_alloc_base = (u64)_etext - SZ_128M;
         }

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975caeb5..b4affe775f23 100644
@@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
                 /* don't exceed the static module region - see below */
                 module_alloc_end = MODULES_END;

-       p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+       p = __vmalloc_node_range(size, MODULE_ALIGN, kaslr_enabled() ?
+                               module_alloc_base : module_alloc_end - SZ_128M,
                                 module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
                                 NUMA_NO_NODE, __builtin_return_address(0));



^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB
  2023-03-31 21:48   ` Shanker Donthineni
@ 2023-04-01 19:50     ` Shanker Donthineni
  -1 siblings, 0 replies; 6+ messages in thread
From: Shanker Donthineni @ 2023-04-01 19:50 UTC (permalink / raw
  To: Catalin Marinas, Will Deacon, Jonathan Corbet, Ard Biesheuvel,
	Mark Rutland
  Cc: Anshuman Khandual, Kalesh Singh, Zhou Guanghui, linux-arm-kernel,
	linux-doc, linux-kernel, Vikram Sethi, Thierry Reding

HI Ard,

On 3/31/23 16:48, Shanker Donthineni wrote:
> Hi Ard,
> 
> On 3/30/23 09:04, Shanker Donthineni wrote:
>> The allocation of modules occurs in two regions. The first region
>> is MODULES_VSIZE, which is 128MB in size and shared with the core
>> kernel when the KASLR feature is unavailable or disabled through
>> a boot parameter. The second region, which is 2GB in size, is
>> shared with the other vmalloc callers. Depending on the size of
>> the core kernel, the 128MB region may quickly fill up after
>> loading a few modules, causing the system to switch to the 2GB
>> region. Unfortunately, even the 2GB region can run out of space
>> if previously loaded modules and other kernel subsystems consume
>> the entire area, leaving no space for additional modules.
>>
>> This issue usually occurs when the system has a large number of
>> CPU cores, PCIe host-brigde controllers, and I/O devices. For
>> instance, the ECAM region of one host-bridge controller can use
>> up to 256MB of vmalloc space, while eight controllers can occupy
>> the entire 2GB.
>>
>> To address this problem, a possible solution would be to increase
>> the MODULES_VSIZE to 2GB. This would improve the system's ability
>> to accommodate a greater number of dynamically loaded modules and
>> drivers when KASLR is not enabled. However, prior to switching to
>> the 2GB region, it is advisable to allocate modules within the
>> 128MB space that covers the core kernel, in order to benefit from
>> the direct branches.
>>
>> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
>> ---
>> Changes since v1:
>>   - Included Ard's recommendations.
>>   - Revised the commit message.
>>
>> dmesg:
>>   On a NVIDIA T241 system with Ubuntu-22.04, hitting boot failures
>>   due to vmalloc/vmap allocation errors when loading modules.
>>
>>   [   64.181308] ipmi_ssif: IPMI SSIF Interface driver
>>   [   64.184494] usbcore: registered new interface driver r8152
>>   [   64.242492] vmap allocation for size 393216 failed: use vmalloc=<size> to increase size
>>   [   64.242499] systemd-udevd: vmalloc error: size 327680, vm_struct allocation failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-3
>>   [   64.242510] CPU: 32 PID: 2910 Comm: systemd-udevd Tainted: G           OE      6.2-generic-64k
>>   [   64.242513] Hardware name: NVIDIA T241, BIOS v1.1.0 2023-03-18T21:32:31+00:00
>>   [   64.242515] Call trace:
>>   [   64.242516]  dump_backtrace+0xe0/0x130
>>   [   64.242523]  show_stack+0x20/0x60
>>   [   64.242525]  dump_stack_lvl+0x68/0x84
>>   [   64.242530]  dump_stack+0x18/0x34
>>   [   64.242532]  warn_alloc+0x11c/0x1b0
>>   [   64.242537]  __vmalloc_node_range+0xe0/0x20c
>>   [   64.242540]  module_alloc+0x118/0x160
>>   [   64.242543]  move_module+0x2c/0x190
>>   [   64.242546]  layout_and_allocate+0xfc/0x160
>>   [   64.242548]  load_module+0x260/0xbc4
>>   [   64.242549]  __do_sys_finit_module+0xac/0x130
>>   [   64.242551]  __arm64_sys_finit_module+0x28/0x34
>>   [   64.242552]  invoke_syscall+0x78/0x100
>>   [   64.242553]  el0_svc_common.constprop.0+0x170/0x194
>>   [   64.242555]  do_el0_svc+0x38/0x4c
>>   [   64.242556]  el0_svc+0x2c/0xc0
>>   [   64.242558]  el0t_64_sync_handler+0xbc/0x13c
>>   [   64.242560]  el0t_64_sync+0x1a0/0x1a4
>>
>>   Documentation/arm64/memory.rst  | 8 ++++----
>>   arch/arm64/include/asm/memory.h | 2 +-
>>   arch/arm64/kernel/module.c      | 2 +-
>>   3 files changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
>> index 2a641ba7be3b..55a55f30eed8 100644
>> --- a/Documentation/arm64/memory.rst
>> +++ b/Documentation/arm64/memory.rst
>> @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
>>     0000000000000000    0000ffffffffffff     256TB        user
>>     ffff000000000000    ffff7fffffffffff     128TB        kernel logical memory map
>>    [ffff600000000000    ffff7fffffffffff]      32TB        [kasan shadow region]
>> -  ffff800000000000    ffff800007ffffff     128MB        modules
>> -  ffff800008000000    fffffbffefffffff     124TB        vmalloc
>> +  ffff800000000000    ffff80007fffffff       2GB        modules
>> +  ffff800080000000    fffffbffefffffff     124TB        vmalloc
>>     fffffbfff0000000    fffffbfffdffffff     224MB        fixed mappings (top down)
>>     fffffbfffe000000    fffffbfffe7fffff       8MB        [guard region]
>>     fffffbfffe800000    fffffbffff7fffff      16MB        PCI I/O space
>> @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
>>     0000000000000000    000fffffffffffff       4PB        user
>>     fff0000000000000    ffff7fffffffffff      ~4PB        kernel logical memory map
>>    [fffd800000000000    ffff7fffffffffff]     512TB        [kasan shadow region]
>> -  ffff800000000000    ffff800007ffffff     128MB        modules
>> -  ffff800008000000    fffffbffefffffff     124TB        vmalloc
>> +  ffff800000000000    ffff80007fffffff       2GB        modules
>> +  ffff800080000000    fffffbffefffffff     124TB        vmalloc
>>     fffffbfff0000000    fffffbfffdffffff     224MB        fixed mappings (top down)
>>     fffffbfffe000000    fffffbfffe7fffff       8MB        [guard region]
>>     fffffbfffe800000    fffffbffff7fffff      16MB        PCI I/O space
>> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
>> index 78e5163836a0..b58c3127323e 100644
>> --- a/arch/arm64/include/asm/memory.h
>> +++ b/arch/arm64/include/asm/memory.h
>> @@ -46,7 +46,7 @@
>>   #define KIMAGE_VADDR        (MODULES_END)
>>   #define MODULES_END        (MODULES_VADDR + MODULES_VSIZE)
>>   #define MODULES_VADDR        (_PAGE_END(VA_BITS_MIN))
>> -#define MODULES_VSIZE        (SZ_128M)
>> +#define MODULES_VSIZE        (SZ_2G)
>>   #define VMEMMAP_START        (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
>>   #define VMEMMAP_END        (VMEMMAP_START + VMEMMAP_SIZE)
>>   #define PCI_IO_END        (VMEMMAP_START - SZ_8M)
>> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
>> index 5af4975caeb5..b4affe775f23 100644
>> --- a/arch/arm64/kernel/module.c
>> +++ b/arch/arm64/kernel/module.c
>> @@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
>>           /* don't exceed the static module region - see below */
>>           module_alloc_end = MODULES_END;
>> -    p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
>> +    p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_end - SZ_128M,
>>                   module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
>>                   NUMA_NO_NODE, __builtin_return_address(0));
> Occasionally encountering the "overflow in relocation type 261" error suggests that the
> expression 'module_alloc_end - SZ_128M' may be outside the 2GB range starting from the
> beginning of _stext. Used the following code to resolve the issue And also enable
> randomization of module base within 128MB if CONFIG_RANDOMIZE_MODULE_REGION_FULL is not
> defined.
> 
> 
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -46,7 +46,7 @@
>   #define KIMAGE_VADDR           (MODULES_END)
>   #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
>   #define MODULES_VADDR          (_PAGE_END(VA_BITS_MIN))
> -#define MODULES_VSIZE          (SZ_128M)
> +#define MODULES_VSIZE          (SZ_2G)
>   #define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
>   #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
>   #define PCI_IO_END             (VMEMMAP_START - SZ_8M)
> diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
> index e7477f21a4c9..4f92fc511b85 100644
> @@ -70,18 +70,19 @@ static int __init kaslr_init(void)
>                   * resolved normally.)
>                   */
>                  module_range = SZ_2G - (u64)(_end - _stext);
> -               module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
> +               module_alloc_base = max((u64)_etext - SZ_2G, (u64)MODULES_VADDR);
>          } else {


Upon careful review, I have determined that this change, which involves utilizing
different fields for the range and base, is incorrect and not required. Please review
the following code and advise me if there are any missing changes that I may have
overlooked or if it is appropriate to post the v3 patch.

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
  #define KIMAGE_VADDR           (MODULES_END)
  #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
  #define MODULES_VADDR          (_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE          (SZ_128M)
+#define MODULES_VSIZE          (SZ_2G)
  #define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
  #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
  #define PCI_IO_END             (VMEMMAP_START - SZ_8M)

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
@@ -74,14 +74,15 @@ static int __init kaslr_init(void)
         } else {
                 /*
                  * Randomize the module region by setting module_alloc_base to
-                * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+                * a PAGE_SIZE multiple in the range [_etext - SZ_128M,
                  * _stext) . This guarantees that the resulting region still
                  * covers [_stext, _etext], and that all relative branches can
                  * be resolved without veneers unless this region is exhausted
                  * and we fall back to a larger 2GB window in module_alloc()
                  * when ARM64_MODULE_PLTS is enabled.
                  */
-               module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+               module_range = SZ_128M - (u64)(_etext - _stext);
+               module_alloc_base = (u64)_etext - SZ_128M;
         }

         /* use the lower 21 bits to randomize the base of the module region */

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
@@ -37,7 +37,8 @@ void *module_alloc(unsigned long size)
                 /* don't exceed the static module region - see below */
                 module_alloc_end = MODULES_END;

-       p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+       p = __vmalloc_node_range(size, MODULE_ALIGN, kaslr_enabled() ?
+                               module_alloc_base : module_alloc_end - SZ_128M,
                                 module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
                                 NUMA_NO_NODE, __builtin_return_address(0));

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB
@ 2023-04-01 19:50     ` Shanker Donthineni
  0 siblings, 0 replies; 6+ messages in thread
From: Shanker Donthineni @ 2023-04-01 19:50 UTC (permalink / raw
  To: Catalin Marinas, Will Deacon, Jonathan Corbet, Ard Biesheuvel,
	Mark Rutland
  Cc: Anshuman Khandual, Kalesh Singh, Zhou Guanghui, linux-arm-kernel,
	linux-doc, linux-kernel, Vikram Sethi, Thierry Reding

HI Ard,

On 3/31/23 16:48, Shanker Donthineni wrote:
> Hi Ard,
> 
> On 3/30/23 09:04, Shanker Donthineni wrote:
>> The allocation of modules occurs in two regions. The first region
>> is MODULES_VSIZE, which is 128MB in size and shared with the core
>> kernel when the KASLR feature is unavailable or disabled through
>> a boot parameter. The second region, which is 2GB in size, is
>> shared with the other vmalloc callers. Depending on the size of
>> the core kernel, the 128MB region may quickly fill up after
>> loading a few modules, causing the system to switch to the 2GB
>> region. Unfortunately, even the 2GB region can run out of space
>> if previously loaded modules and other kernel subsystems consume
>> the entire area, leaving no space for additional modules.
>>
>> This issue usually occurs when the system has a large number of
>> CPU cores, PCIe host-brigde controllers, and I/O devices. For
>> instance, the ECAM region of one host-bridge controller can use
>> up to 256MB of vmalloc space, while eight controllers can occupy
>> the entire 2GB.
>>
>> To address this problem, a possible solution would be to increase
>> the MODULES_VSIZE to 2GB. This would improve the system's ability
>> to accommodate a greater number of dynamically loaded modules and
>> drivers when KASLR is not enabled. However, prior to switching to
>> the 2GB region, it is advisable to allocate modules within the
>> 128MB space that covers the core kernel, in order to benefit from
>> the direct branches.
>>
>> Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
>> ---
>> Changes since v1:
>>   - Included Ard's recommendations.
>>   - Revised the commit message.
>>
>> dmesg:
>>   On a NVIDIA T241 system with Ubuntu-22.04, hitting boot failures
>>   due to vmalloc/vmap allocation errors when loading modules.
>>
>>   [   64.181308] ipmi_ssif: IPMI SSIF Interface driver
>>   [   64.184494] usbcore: registered new interface driver r8152
>>   [   64.242492] vmap allocation for size 393216 failed: use vmalloc=<size> to increase size
>>   [   64.242499] systemd-udevd: vmalloc error: size 327680, vm_struct allocation failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-3
>>   [   64.242510] CPU: 32 PID: 2910 Comm: systemd-udevd Tainted: G           OE      6.2-generic-64k
>>   [   64.242513] Hardware name: NVIDIA T241, BIOS v1.1.0 2023-03-18T21:32:31+00:00
>>   [   64.242515] Call trace:
>>   [   64.242516]  dump_backtrace+0xe0/0x130
>>   [   64.242523]  show_stack+0x20/0x60
>>   [   64.242525]  dump_stack_lvl+0x68/0x84
>>   [   64.242530]  dump_stack+0x18/0x34
>>   [   64.242532]  warn_alloc+0x11c/0x1b0
>>   [   64.242537]  __vmalloc_node_range+0xe0/0x20c
>>   [   64.242540]  module_alloc+0x118/0x160
>>   [   64.242543]  move_module+0x2c/0x190
>>   [   64.242546]  layout_and_allocate+0xfc/0x160
>>   [   64.242548]  load_module+0x260/0xbc4
>>   [   64.242549]  __do_sys_finit_module+0xac/0x130
>>   [   64.242551]  __arm64_sys_finit_module+0x28/0x34
>>   [   64.242552]  invoke_syscall+0x78/0x100
>>   [   64.242553]  el0_svc_common.constprop.0+0x170/0x194
>>   [   64.242555]  do_el0_svc+0x38/0x4c
>>   [   64.242556]  el0_svc+0x2c/0xc0
>>   [   64.242558]  el0t_64_sync_handler+0xbc/0x13c
>>   [   64.242560]  el0t_64_sync+0x1a0/0x1a4
>>
>>   Documentation/arm64/memory.rst  | 8 ++++----
>>   arch/arm64/include/asm/memory.h | 2 +-
>>   arch/arm64/kernel/module.c      | 2 +-
>>   3 files changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
>> index 2a641ba7be3b..55a55f30eed8 100644
>> --- a/Documentation/arm64/memory.rst
>> +++ b/Documentation/arm64/memory.rst
>> @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
>>     0000000000000000    0000ffffffffffff     256TB        user
>>     ffff000000000000    ffff7fffffffffff     128TB        kernel logical memory map
>>    [ffff600000000000    ffff7fffffffffff]      32TB        [kasan shadow region]
>> -  ffff800000000000    ffff800007ffffff     128MB        modules
>> -  ffff800008000000    fffffbffefffffff     124TB        vmalloc
>> +  ffff800000000000    ffff80007fffffff       2GB        modules
>> +  ffff800080000000    fffffbffefffffff     124TB        vmalloc
>>     fffffbfff0000000    fffffbfffdffffff     224MB        fixed mappings (top down)
>>     fffffbfffe000000    fffffbfffe7fffff       8MB        [guard region]
>>     fffffbfffe800000    fffffbffff7fffff      16MB        PCI I/O space
>> @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
>>     0000000000000000    000fffffffffffff       4PB        user
>>     fff0000000000000    ffff7fffffffffff      ~4PB        kernel logical memory map
>>    [fffd800000000000    ffff7fffffffffff]     512TB        [kasan shadow region]
>> -  ffff800000000000    ffff800007ffffff     128MB        modules
>> -  ffff800008000000    fffffbffefffffff     124TB        vmalloc
>> +  ffff800000000000    ffff80007fffffff       2GB        modules
>> +  ffff800080000000    fffffbffefffffff     124TB        vmalloc
>>     fffffbfff0000000    fffffbfffdffffff     224MB        fixed mappings (top down)
>>     fffffbfffe000000    fffffbfffe7fffff       8MB        [guard region]
>>     fffffbfffe800000    fffffbffff7fffff      16MB        PCI I/O space
>> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
>> index 78e5163836a0..b58c3127323e 100644
>> --- a/arch/arm64/include/asm/memory.h
>> +++ b/arch/arm64/include/asm/memory.h
>> @@ -46,7 +46,7 @@
>>   #define KIMAGE_VADDR        (MODULES_END)
>>   #define MODULES_END        (MODULES_VADDR + MODULES_VSIZE)
>>   #define MODULES_VADDR        (_PAGE_END(VA_BITS_MIN))
>> -#define MODULES_VSIZE        (SZ_128M)
>> +#define MODULES_VSIZE        (SZ_2G)
>>   #define VMEMMAP_START        (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
>>   #define VMEMMAP_END        (VMEMMAP_START + VMEMMAP_SIZE)
>>   #define PCI_IO_END        (VMEMMAP_START - SZ_8M)
>> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
>> index 5af4975caeb5..b4affe775f23 100644
>> --- a/arch/arm64/kernel/module.c
>> +++ b/arch/arm64/kernel/module.c
>> @@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
>>           /* don't exceed the static module region - see below */
>>           module_alloc_end = MODULES_END;
>> -    p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
>> +    p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_end - SZ_128M,
>>                   module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
>>                   NUMA_NO_NODE, __builtin_return_address(0));
> Occasionally encountering the "overflow in relocation type 261" error suggests that the
> expression 'module_alloc_end - SZ_128M' may be outside the 2GB range starting from the
> beginning of _stext. Used the following code to resolve the issue And also enable
> randomization of module base within 128MB if CONFIG_RANDOMIZE_MODULE_REGION_FULL is not
> defined.
> 
> 
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -46,7 +46,7 @@
>   #define KIMAGE_VADDR           (MODULES_END)
>   #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
>   #define MODULES_VADDR          (_PAGE_END(VA_BITS_MIN))
> -#define MODULES_VSIZE          (SZ_128M)
> +#define MODULES_VSIZE          (SZ_2G)
>   #define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
>   #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
>   #define PCI_IO_END             (VMEMMAP_START - SZ_8M)
> diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
> index e7477f21a4c9..4f92fc511b85 100644
> @@ -70,18 +70,19 @@ static int __init kaslr_init(void)
>                   * resolved normally.)
>                   */
>                  module_range = SZ_2G - (u64)(_end - _stext);
> -               module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
> +               module_alloc_base = max((u64)_etext - SZ_2G, (u64)MODULES_VADDR);
>          } else {


Upon careful review, I have determined that this change, which involves utilizing
different fields for the range and base, is incorrect and not required. Please review
the following code and advise me if there are any missing changes that I may have
overlooked or if it is appropriate to post the v3 patch.

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
  #define KIMAGE_VADDR           (MODULES_END)
  #define MODULES_END            (MODULES_VADDR + MODULES_VSIZE)
  #define MODULES_VADDR          (_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE          (SZ_128M)
+#define MODULES_VSIZE          (SZ_2G)
  #define VMEMMAP_START          (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
  #define VMEMMAP_END            (VMEMMAP_START + VMEMMAP_SIZE)
  #define PCI_IO_END             (VMEMMAP_START - SZ_8M)

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
@@ -74,14 +74,15 @@ static int __init kaslr_init(void)
         } else {
                 /*
                  * Randomize the module region by setting module_alloc_base to
-                * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+                * a PAGE_SIZE multiple in the range [_etext - SZ_128M,
                  * _stext) . This guarantees that the resulting region still
                  * covers [_stext, _etext], and that all relative branches can
                  * be resolved without veneers unless this region is exhausted
                  * and we fall back to a larger 2GB window in module_alloc()
                  * when ARM64_MODULE_PLTS is enabled.
                  */
-               module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+               module_range = SZ_128M - (u64)(_etext - _stext);
+               module_alloc_base = (u64)_etext - SZ_128M;
         }

         /* use the lower 21 bits to randomize the base of the module region */

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
@@ -37,7 +37,8 @@ void *module_alloc(unsigned long size)
                 /* don't exceed the static module region - see below */
                 module_alloc_end = MODULES_END;

-       p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+       p = __vmalloc_node_range(size, MODULE_ALIGN, kaslr_enabled() ?
+                               module_alloc_base : module_alloc_end - SZ_128M,
                                 module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
                                 NUMA_NO_NODE, __builtin_return_address(0));

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-04-01 19:52 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-30 14:04 [PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB Shanker Donthineni
2023-03-30 14:04 ` Shanker Donthineni
2023-03-31 21:48 ` Shanker Donthineni
2023-03-31 21:48   ` Shanker Donthineni
2023-04-01 19:50   ` Shanker Donthineni
2023-04-01 19:50     ` Shanker Donthineni

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.