xtensa/esp32s3: allow moving .bss data to the external PSRAM

This commit allows placing .bss data into the external PSRAM.
Previously, the PSRAM was fully allocated to the heap memory only
and now part of it can be used to allocate .bss data freeing the
internal memory.
This commit is contained in:
Tiago Medicci Serrano 2025-01-24 12:53:45 -03:00 committed by Alan C. Assis
parent 32bc95182a
commit 97aa90570c
3 changed files with 96 additions and 58 deletions

View file

@ -122,6 +122,9 @@ static struct smp_call_data_s g_call_data =
SMP_CALL_INITIALIZER(pause_cpu_handler, NULL);
#endif
extern uint8_t _ext_ram_bss_start;
extern uint8_t _ext_ram_bss_end;
/****************************************************************************
* ROM Function Prototypes
****************************************************************************/
@ -393,6 +396,7 @@ int IRAM_ATTR esp_spiram_init_cache(void)
uint32_t mapped_vaddr_size;
uint32_t target_mapped_vaddr_start;
uint32_t target_mapped_vaddr_end;
uint32_t ext_bss_size;
int ret = psram_get_available_size(&psram_size);
if (ret != OK)
@ -473,10 +477,12 @@ int IRAM_ATTR esp_spiram_init_cache(void)
cache_resume_dcache(0);
/* Currently no non-heap stuff on ESP32S3 */
ext_bss_size = ((intptr_t)&_ext_ram_bss_end -
(intptr_t)&_ext_ram_bss_start);
g_allocable_vaddr_start = g_mapped_vaddr_start;
g_allocable_vaddr_end = g_mapped_vaddr_start + g_mapped_size;
g_allocable_vaddr_start = g_mapped_vaddr_start + ext_bss_size;
g_allocable_vaddr_end = g_mapped_vaddr_start + g_mapped_size -
ext_bss_size;
return ret;
}

View file

@ -438,9 +438,64 @@ SECTIONS
_sheap = ABSOLUTE(.);
} >dram0_0_seg AT>ROM
_image_drom_vma = ADDR(.flash.rodata);
_image_drom_lma = LOADADDR(.flash.rodata);
_image_drom_size = LOADADDR(.flash.rodata) + SIZEOF(.flash.rodata) - _image_drom_lma;
/* The alignment of the ".flash.text" output section is forced to
* 0x00010000 (64KB) to ensure that it will be allocated at the beginning
* of the next available Flash block.
* This is required to meet the following constraint from the external
* flash MMU:
* VMA % 64KB == LMA % 64KB
* i.e. the lower 16 bits of both the virtual address (address seen by the
* CPU) and the load address (physical address of the external flash) must
* be equal.
*/
.flash.text : ALIGN(0x00010000)
{
_stext = .;
_instruction_reserved_start = ABSOLUTE(.);
*(.literal .text .literal.* .text.* .stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
*(.irom0.text) /* catch stray ICACHE_RODATA_ATTR */
*(.fini.literal)
*(.fini)
*(.gnu.version)
*(EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifi0iram EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifi0iram.*)
*(.wifiextrairam .wifiextrairam.*)
*(EXCLUDE_FILE(*libpp.a) .wifiorslpiram EXCLUDE_FILE(*libpp.a) .wifiorslpiram.*)
*(EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifirxiram EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifirxiram.*)
*(.wifislpiram .wifislpiram.*)
*(EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifislprxiram EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifislprxiram.*)
/* CPU will try to prefetch up to 16 bytes of instructions.
* This means that any configuration (e.g. MMU, PMS) must allow
* safe access to up to 16 bytes after the last real instruction, add
* dummy bytes to ensure this
*/
. += 16;
_instruction_reserved_end = ABSOLUTE(.);
_etext = .;
} >irom0_0_seg AT>ROM
_image_irom_vma = ADDR(.flash.text);
_image_irom_lma = LOADADDR(.flash.text);
_image_irom_size = LOADADDR(.flash.text) + SIZEOF(.flash.text) - _image_irom_lma;
/* Dummy section represents the .flash.text section but in drom0_0_seg.
* Thus, it must have its alignment and (at least) its size.
*/
.flash.rodata_dummy (NOLOAD) :
{
_flash_rodata_dummy_start = ABSOLUTE(.);
. = ALIGN(ALIGNOF(.flash.text)) + SIZEOF(.flash.text);
/* Add alignment of MMU page size + 0x20 bytes for the mapping header. */
. = ALIGN(0x10000) + 0x20;
} > drom0_0_seg
/* The alignment of the ".flash.rodata" output section is forced to
* 0x00010000 (64KB) to ensure that it will be allocated at the beginning
@ -453,11 +508,6 @@ SECTIONS
* be equal.
*/
.flash.rodata_dummy (NOLOAD) :
{
. = ALIGN(0x10000);
} > ROM
.flash.rodata : ALIGN(0x10000)
{
_rodata_reserved_start = ABSOLUTE(.);
@ -544,62 +594,33 @@ SECTIONS
} >drom0_0_seg AT>ROM
_rodata_reserved_align = ALIGNOF(.flash.rodata);
_image_irom_vma = ADDR(.flash.text);
_image_irom_lma = LOADADDR(.flash.text);
_image_irom_size = LOADADDR(.flash.text) + SIZEOF(.flash.text) - _image_irom_lma;
_image_drom_vma = ADDR(.flash.rodata);
_image_drom_lma = LOADADDR(.flash.rodata);
_image_drom_size = LOADADDR(.flash.rodata) + SIZEOF(.flash.rodata) - _image_drom_lma;
/* The alignment of the ".flash.text" output section is forced to
* 0x00010000 (64KB) to ensure that it will be allocated at the beginning
* of the next available Flash block.
* This is required to meet the following constraint from the external
* flash MMU:
* VMA % 64KB == LMA % 64KB
* i.e. the lower 16 bits of both the virtual address (address seen by the
* CPU) and the load address (physical address of the external flash) must
* be equal.
/* Dummy section to skip flash rodata sections.
* Because to `extern_ram_seg` and `drom0_0_seg` are on the same bus
*/
#ifndef CONFIG_ESP32S3_RUN_IRAM
.flash.text_dummy (NOLOAD) : ALIGN(0x10000)
.ext_ram.dummy (NOLOAD):
{
/* This section is required to skip .flash.rodata area because irom0_0_seg
* and drom0_0_seg reflect the same address space on different buses.
*/
. = ORIGIN(extern_ram_seg);
. = . + (_rodata_reserved_end - _flash_rodata_dummy_start);
. = ALIGN (0x10000);
} > extern_ram_seg
. += _image_drom_lma;
. += _image_drom_size;
} >irom0_0_seg
#endif
/* This section holds .ext_ram.bss data, and will be put in PSRAM */
.flash.text : ALIGN(0x00010000)
.ext_ram.bss (NOLOAD) :
{
_stext = .;
_instruction_reserved_start = ABSOLUTE(.);
_ext_ram_bss_start = ABSOLUTE(.);
*(.literal .text .literal.* .text.* .stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
*(.irom0.text) /* catch stray ICACHE_RODATA_ATTR */
*(.fini.literal)
*(.fini)
*(.gnu.version)
*(.ext_ram.bss .ext_ram.bss.*)
*libpython3.13.a:(*.PyRuntime)
*(EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifi0iram EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifi0iram.*)
*(.wifiextrairam .wifiextrairam.*)
*(EXCLUDE_FILE(*libpp.a) .wifiorslpiram EXCLUDE_FILE(*libpp.a) .wifiorslpiram.*)
*(EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifirxiram EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifirxiram.*)
*(.wifislpiram .wifislpiram.*)
*(EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifislprxiram EXCLUDE_FILE(*libnet80211.a *libpp.a) .wifislprxiram.*)
/* CPU will try to prefetch up to 16 bytes of instructions.
* This means that any configuration (e.g. MMU, PMS) must allow
* safe access to up to 16 bytes after the last real instruction, add
* dummy bytes to ensure this
*/
. += 16;
_instruction_reserved_end = ABSOLUTE(.);
_etext = .;
} >irom0_0_seg AT>ROM
. = ALIGN(4);
_ext_ram_bss_end = ABSOLUTE(.);
} > extern_ram_seg
.rtc.text :
{

View file

@ -184,6 +184,17 @@ MEMORY
rtc_slow_seg(RW) : org = 0x50000000 + CONFIG_ESP32S3_ULP_COPROC_RESERVE_MEM,
len = 0x2000 - CONFIG_ESP32S3_ULP_COPROC_RESERVE_MEM
/* `extern_ram_seg` and `drom0_0_seg` share the same bus and the address region.
* A dummy section is used to avoid overlap. See `.ext_ram.dummy` in `esp32s3_sections.ld
*/
#ifdef CONFIG_ESP32S3_APP_FORMAT_MCUBOOT
extern_ram_seg(RWX) : org = 0x3c000000 + ORIGIN(ROM),
len = 0x2000000 - ORIGIN(ROM)
#else
extern_ram_seg(RWX) : org = 0x3c000020 , len = 0x2000000-0x20
#endif
}
#ifdef CONFIG_ESP32S3_RUN_IRAM