Skip to content

Commit b96a950

Browse files
committed
Improved heap and PSRAM handling
- Segment `allocateData()` uses more elaborate DRAM checking to reduce fragmentation and allow for larger setups to run on low heap - Segment data allocation fails if minimum contiguous block size runs low to keep the UI working - Increased `MAX_SEGMENT_DATA` to account for better segment data handling - Memory allocation functions try to keep enough DRAM for segment data - Added constant `PSRAM_THRESHOLD` to improve PSARM usage - Increase MIN_HEAP_SIZE to reduce risk of breaking UI due to low memory for JSON response - ESP32 makes use of IRAM (no 8bit access) for pixeluffers, freeing up to 50kB of RAM - Fix to properly get available heap on all platforms: added function `getFreeHeapSize()` - Bugfix for effects that divide by SEGLEN: don't run FX in service() if segment is not active -Syntax fix in AR: calloc() uses (numelements, size) as arguments
1 parent ab5b6f9 commit b96a950

11 files changed

Lines changed: 178 additions & 69 deletions

File tree

usermods/audioreactive/audio_reactive.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,8 @@ void FFTcode(void * parameter)
224224
DEBUGSR_PRINT("FFT started on core: "); DEBUGSR_PRINTLN(xPortGetCoreID());
225225

226226
// allocate FFT buffers on first call
227-
if (vReal == nullptr) vReal = (float*) calloc(sizeof(float), samplesFFT);
228-
if (vImag == nullptr) vImag = (float*) calloc(sizeof(float), samplesFFT);
227+
if (vReal == nullptr) vReal = (float*) calloc(samplesFFT, sizeof(float));
228+
if (vImag == nullptr) vImag = (float*) calloc(samplesFFT, sizeof(float));
229229
if ((vReal == nullptr) || (vImag == nullptr)) {
230230
// something went wrong
231231
if (vReal) free(vReal); vReal = nullptr;

wled00/FX.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,17 @@ extern byte realtimeMode; // used in getMappedPixelIndex()
8888
#endif
8989
#define FPS_CALC_SHIFT 7 // bit shift for fixed point math
9090

91-
/* each segment uses 82 bytes of SRAM memory, so if you're application fails because of
92-
insufficient memory, decreasing MAX_NUM_SEGMENTS may help */
91+
// heap memory limit for effects data, pixel buffers try to reserve it if PSRAM is available
9392
#ifdef ESP8266
9493
#define MAX_NUM_SEGMENTS 16
9594
/* How much data bytes all segments combined may allocate */
96-
#define MAX_SEGMENT_DATA 5120
95+
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*640) // 10k by default
9796
#elif defined(CONFIG_IDF_TARGET_ESP32S2)
9897
#define MAX_NUM_SEGMENTS 20
99-
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*512) // 10k by default (S2 is short on free RAM)
98+
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1024) // 20k by default (S2 is short on free RAM)
10099
#else
101100
#define MAX_NUM_SEGMENTS 32 // warning: going beyond 32 may consume too much RAM for stable operation
102-
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1280) // 40k by default
101+
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*2560) // 80k by default
103102
#endif
104103

105104
/* How much data bytes each segment should max allocate to leave enough space for other segments,
@@ -600,8 +599,12 @@ class Segment {
600599
, _t(nullptr)
601600
{
602601
DEBUGFX_PRINTF_P(PSTR("-- Creating segment: %p [%d,%d:%d,%d]\n"), this, (int)start, (int)stop, (int)startY, (int)stopY);
603-
// allocate render buffer (always entire segment)
604-
pixels = static_cast<uint32_t*>(d_calloc(sizeof(uint32_t), length())); // error handling is also done in isActive()
602+
// allocate render buffer (always entire segment), prefer PSRAM if DRAM is running low. Note: impact on FPS with PSRAM buffer is low (~2% with QSPI PSRAM)
603+
#ifdef CONFIG_IDF_TARGET_ESP32
604+
pixels = static_cast<uint32_t*>(pixelbuffer_malloc(sizeof(uint32_t) * length()));
605+
#else
606+
pixels = static_cast<uint32_t*>(p_calloc(length(), sizeof(uint32_t))); // prefer PSRAM. note: error handling is also done in isActive()
607+
#endif
605608
if (!pixels) {
606609
DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
607610
extern byte errorFlag;

wled00/FX_fcn.cpp

Lines changed: 68 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ Segment::Segment(const Segment &orig) {
7070
if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); }
7171
if (orig.pixels) {
7272
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * orig.length()));
73+
74+
// pixels = static_cast<uint32_t*>(heap_caps_malloc(orig.length()* sizeof(uint32_t), MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL)); // use this for ESP32
75+
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT));
76+
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT));
77+
78+
7379
if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length());
7480
else {
7581
DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
@@ -111,6 +117,10 @@ Segment& Segment::operator= (const Segment &orig) {
111117
if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); }
112118
if (orig.pixels) {
113119
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * orig.length()));
120+
//TODO: also need to put this in 32bit memory on ESP32, maybe make that a function...
121+
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT));
122+
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT));
123+
114124
if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length());
115125
else {
116126
DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
@@ -143,13 +153,22 @@ Segment& Segment::operator= (Segment &&orig) noexcept {
143153

144154
// allocates effect data buffer on heap and initialises (erases) it
145155
bool Segment::allocateData(size_t len) {
146-
if (len == 0) return false; // nothing to do
147-
if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation)
156+
if (len == 0) return false; // nothing to do
157+
if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation)
148158
if (call == 0) {
149-
//DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this);
150-
memset(data, 0, len); // erase buffer if called during effect initialisation
159+
if(checkHeapHealth()) {
160+
//DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this);
161+
memset(data, 0, len); // erase buffer if called during effect initialisation
162+
return true; // no need to reallocate
163+
}
164+
else {
165+
d_free(data); // free data and try to allocate again
166+
data = nullptr;
167+
Segment::addUsedSegmentData(-_dataLen); // subtract buffer size
168+
}
151169
}
152-
return true;
170+
else
171+
return true;
153172
}
154173
//DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this);
155174
if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) {
@@ -158,23 +177,29 @@ bool Segment::allocateData(size_t len) {
158177
errorFlag = ERR_NORAM;
159178
return false;
160179
}
161-
// prefer DRAM over SPI RAM on ESP32 since it is slow
180+
// prefer DRAM over PSRAM for speed
162181
if (data) {
163182
data = (byte*)d_realloc_malloc(data, len); // realloc with malloc fallback
164-
if (!data) {
165-
data = nullptr;
183+
if (data == nullptr) { // allocation failed
166184
Segment::addUsedSegmentData(-_dataLen); // subtract original buffer size
167185
_dataLen = 0; // reset data length
186+
return false;
168187
}
169188
}
170189
else data = (byte*)d_malloc(len);
171190

172191
if (data) {
173-
memset(data, 0, len); // erase buffer
174-
Segment::addUsedSegmentData(len - _dataLen);
175-
_dataLen = len;
176-
//DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data);
177-
return true;
192+
if(!checkHeapHealth()) {
193+
d_free(data);
194+
data = nullptr;
195+
}
196+
else {
197+
memset(data, 0, len); // erase buffer
198+
Segment::addUsedSegmentData(len);
199+
_dataLen = len;
200+
//DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data);
201+
return true;
202+
}
178203
}
179204
// allocation failed
180205
DEBUG_PRINTLN(F("!!! Allocation failed. !!!"));
@@ -205,7 +230,11 @@ void Segment::deallocateData() {
205230
void Segment::resetIfRequired() {
206231
if (!reset || !isActive()) return;
207232
//DEBUG_PRINTF_P(PSTR("-- Segment reset: %p\n"), this);
208-
if (data && _dataLen > 0) memset(data, 0, _dataLen); // prevent heap fragmentation (just erase buffer instead of deallocateData())
233+
if (data && _dataLen > 0) {
234+
if(_dataLen > FAIR_DATA_PER_SEG) deallocateData(); // do not keep large allocations
235+
else memset(data, 0, _dataLen); // can prevent heap fragmentation
236+
DEBUG_PRINTF_P(PSTR("-- Segment %p reset, data cleared\n"), this);
237+
}
209238
if (pixels) for (size_t i = 0; i < length(); i++) pixels[i] = BLACK; // clear pixel buffer
210239
next_time = 0; step = 0; call = 0; aux0 = 0; aux1 = 0;
211240
reset = false;
@@ -454,16 +483,26 @@ void Segment::setGeometry(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, ui
454483
stop = 0;
455484
return;
456485
}
457-
// re-allocate FX render buffer
486+
// allocate FX render buffer
458487
if (length() != oldLength) {
459-
if (pixels) d_free(pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it
460-
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * length()));
488+
if (pixels) free(pixels); // note: using realloc can block larger heap segments
489+
#ifdef ARDUINO_ARCH_ESP32
490+
pixels = static_cast<uint32_t*>(pixelbuffer_malloc(izeof(uint32_t) * length());
491+
#else
492+
pixels = static_cast<uint32_t*>(p_malloc(sizeof(uint32_t) * length()));
493+
#endif
494+
495+
if(!checkHeapHealth()) {
496+
d_free(pixels);
497+
pixels = nullptr;
498+
}
461499
if (!pixels) {
462500
DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
463501
errorFlag = ERR_NORAM_PX;
464502
stop = 0;
465503
return;
466504
}
505+
467506
}
468507
refreshLightCapabilities();
469508
}
@@ -1198,7 +1237,7 @@ void WS2812FX::finalizeInit() {
11981237
bus->begin();
11991238
bus->setBrightness(bri);
12001239
}
1201-
DEBUG_PRINTF_P(PSTR("Heap after buses: %d\n"), ESP.getFreeHeap());
1240+
DEBUG_PRINTF_P(PSTR("Heap after buses: %d\n"), getFreeHeapSize());
12021241

12031242
Segment::maxWidth = _length;
12041243
Segment::maxHeight = 1;
@@ -1210,11 +1249,17 @@ void WS2812FX::finalizeInit() {
12101249
deserializeMap(); // (re)load default ledmap (will also setUpMatrix() if ledmap does not exist)
12111250

12121251
// allocate frame buffer after matrix has been set up (gaps!)
1213-
if (_pixels) d_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it
1214-
_pixels = static_cast<uint32_t*>(d_malloc(getLengthTotal() * sizeof(uint32_t)));
1252+
if (_pixels) d_free(_pixels);
1253+
#ifdef ARDUINO_ARCH_ESP32
1254+
_pixels = static_cast<uint32_t*>(pixelbuffer_malloc(getLengthTotal() * sizeof(uint32_t), true)); // use 32bit RAM (IRAM) or PSRAM on ESP32
1255+
#elif !defined(ESP8266)
1256+
// use PSRAM on S2 and S3 if available (C3 defaults to DRAM). Note: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM
1257+
_pixels = static_cast<uint32_t*>(heap_caps_malloc_prefer(size, 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); // prefer PSRAM if it exists
1258+
#else
1259+
_pixels = static_cast<uint32_t*>(malloc(getLengthTotal() * sizeof(uint32_t))); // ESP8266 does not support advanced allocation API
1260+
#endif
12151261
DEBUG_PRINTF_P(PSTR("strip buffer size: %uB\n"), getLengthTotal() * sizeof(uint32_t));
1216-
1217-
DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), ESP.getFreeHeap());
1262+
DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), getFreeHeapSize());
12181263
}
12191264

12201265
void WS2812FX::service() {
@@ -1258,7 +1303,7 @@ void WS2812FX::service() {
12581303
// if segment is in transition and no old segment exists we don't need to run the old mode
12591304
// (blendSegments() takes care of On/Off transitions and clipping)
12601305
Segment *segO = seg.getOldSegment();
1261-
if (segO && (seg.mode != segO->mode || blendingStyle != BLEND_STYLE_FADE)) {
1306+
if (segO && (seg.mode != segO->mode || blendingStyle != BLEND_STYLE_FADE) && segO->isActive()) {
12621307
Segment::modeBlend(true); // set semaphore for beginDraw() to blend colors and palette
12631308
segO->beginDraw(prog); // set up palette & colors (also sets draw dimensions), parent segment has transition progress
12641309
_currentSegment = segO; // set current segment

wled00/cfg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ bool deserializeConfig(JsonObject doc, bool fromFS) {
201201
}
202202
#endif
203203

204-
DEBUG_PRINTF_P(PSTR("Heap before buses: %d\n"), ESP.getFreeHeap());
204+
DEBUG_PRINTF_P(PSTR("Heap before buses: %d\n"), getFreeHeapSize());
205205
JsonArray ins = hw_led["ins"];
206206
if (!ins.isNull()) {
207207
int s = 0; // bus iterator

wled00/const.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,8 +546,18 @@ static_assert(WLED_MAX_BUSSES <= 32, "WLED_MAX_BUSSES exceeds hard limit");
546546
#endif
547547
#endif
548548

549-
// minimum heap size required to process web requests
550-
#define MIN_HEAP_SIZE 8192
549+
// minimum heap size required to process web requests: try to keep free heap above this value
550+
#define MIN_HEAP_SIZE (12*1024)
551+
552+
// threshold for PSRAM use: if heap is running low, requests above PSRAM_THRESHOLD will be allocated in PSRAM
553+
// if heap is plenty, requests below PSRAM_THRESHOLD will be allocated in DRAM for speed
554+
#if defined(CONFIG_IDF_TARGET_ESP32S3)
555+
#define PSRAM_THRESHOLD 8192
556+
#elif defined(CONFIG_IDF_TARGET_ESP32)
557+
#define PSRAM_THRESHOLD 4096
558+
#else
559+
#define PSRAM_THRESHOLD 2048 // S2 does not have a lot of RAM, C3 and ESP8266 do not support PSRAM: the value is not used
560+
#endif
551561

552562
// Web server limits
553563
#ifdef ESP8266

wled00/fcn_declare.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,10 @@ inline uint8_t hw_random8() { return HW_RND_REGISTER; };
550550
inline uint8_t hw_random8(uint32_t upperlimit) { return (hw_random8() * upperlimit) >> 8; }; // input range 0-255
551551
inline uint8_t hw_random8(uint32_t lowerlimit, uint32_t upperlimit) { uint32_t range = upperlimit - lowerlimit; return lowerlimit + hw_random8(range); }; // input range 0-255
552552

553-
// PSRAM allocation wrappers
553+
// memory allocation wrappers
554+
#ifdef CONFIG_IDF_TARGET_ESP32
555+
void *pixelbuffer_malloc(size_t size, bool enforcePSRAM = false); // prefer IRAM for pixel buffers if possible
556+
#endif
554557
#if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3)
555558
extern "C" {
556559
void *p_malloc(size_t); // prefer PSRAM over DRAM
@@ -579,6 +582,12 @@ extern "C" {
579582
#define d_realloc_malloc realloc_malloc
580583
#define d_free free
581584
#endif
585+
bool checkHeapHealth(unsigned minFreeBlockSize = MIN_HEAP_SIZE); // checks heap fragmentation: returns true if contiguous free memory is larger than minFreeBlockSize
586+
#ifndef ESP8266
587+
inline unsigned getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types)
588+
#else
589+
inline unsigned getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap
590+
#endif
582591

583592
// RAII guard class for the JSON Buffer lock
584593
// Modeled after std::lock_guard

wled00/json.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ void serializeInfo(JsonObject root)
829829
root[F("lwip")] = LWIP_VERSION_MAJOR;
830830
#endif
831831

832-
root[F("freeheap")] = ESP.getFreeHeap();
832+
root[F("freeheap")] = getFreeHeapSize();
833833
#if defined(ARDUINO_ARCH_ESP32)
834834
if (psramFound()) root[F("psram")] = ESP.getFreePsram();
835835
#endif

0 commit comments

Comments
 (0)