diff --git a/esphome/core/scheduler.cpp b/esphome/core/scheduler.cpp index be4301e19b..20739dedbb 100644 --- a/esphome/core/scheduler.cpp +++ b/esphome/core/scheduler.cpp @@ -15,21 +15,24 @@ namespace esphome { static const char *const TAG = "scheduler"; // Memory pool configuration constants -// Pool size of 10 is a balance between memory usage and performance: -// - Small enough to not waste memory on simple configs (1-2 timers) -// - Large enough to handle complex setups with multiple sensors/components -// - Prevents system-wide stalls from heap allocation/deallocation that can -// disrupt task synchronization and cause dropped events -static constexpr size_t MAX_POOL_SIZE = 10; -// Maximum number of cancelled items to keep in the heap before forcing a cleanup. -// Set to 6 to trigger cleanup relatively frequently, ensuring cancelled items are -// recycled to the pool in a timely manner to maintain pool efficiency. -static const uint32_t MAX_LOGICALLY_DELETED_ITEMS = 6; +// Pool size of 5 matches typical usage patterns (2-4 active timers) +// - Minimal memory overhead (~250 bytes on ESP32) +// - Sufficient for most configs with a couple sensors/components +// - Still prevents heap fragmentation and allocation stalls +// - Complex setups with many timers will just allocate beyond the pool +// See https://github.com/esphome/backlog/issues/52 +static constexpr size_t MAX_POOL_SIZE = 5; -// Ensure MAX_LOGICALLY_DELETED_ITEMS is at least 4 smaller than MAX_POOL_SIZE -// This guarantees we have room in the pool for recycled items when cleanup occurs -static_assert(MAX_LOGICALLY_DELETED_ITEMS + 4 <= MAX_POOL_SIZE, - "MAX_LOGICALLY_DELETED_ITEMS must be at least 4 smaller than MAX_POOL_SIZE"); +// Cleanup is performed when cancelled items exceed this percentage of total items. +// Using integer math: cleanup when (cancelled * 100 / total) > 50 +// This balances cleanup frequency with performance - we avoid O(n) cleanup +// on every cancellation but don't let cancelled items accumulate excessively. +static constexpr uint32_t CLEANUP_PERCENTAGE = 50; + +// Minimum number of cancelled items before considering cleanup. +// Even if the fraction is exceeded, we need at least this many cancelled items +// to make the O(n) cleanup operation worthwhile. +static constexpr uint32_t MIN_CANCELLED_ITEMS_FOR_CLEANUP = 3; // Half the 32-bit range - used to detect rollovers vs normal time progression static constexpr uint32_t HALF_MAX_UINT32 = std::numeric_limits::max() / 2; @@ -417,8 +420,18 @@ void HOT Scheduler::call(uint32_t now) { } #endif /* ESPHOME_DEBUG_SCHEDULER */ - // If we have too many items to remove - if (this->to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) { + // Check if we should perform cleanup based on percentage of cancelled items + // Cleanup when: cancelled items >= MIN_CANCELLED_ITEMS_FOR_CLEANUP AND + // cancelled percentage > CLEANUP_PERCENTAGE + size_t total_items = this->items_.size(); + bool should_cleanup = false; + + if (this->to_remove_ >= MIN_CANCELLED_ITEMS_FOR_CLEANUP && total_items > 0) { + // Use integer math to avoid floating point: (cancelled * 100 / total) > CLEANUP_PERCENTAGE + should_cleanup = (this->to_remove_ * 100) > (total_items * CLEANUP_PERCENTAGE); + } + + if (should_cleanup) { // We hold the lock for the entire cleanup operation because: // 1. We're rebuilding the entire items_ list, so we need exclusive access throughout // 2. Other threads must see either the old state or the new state, not intermediate states diff --git a/esphome/core/scheduler.h b/esphome/core/scheduler.h index 34e1725963..f16a3814ec 100644 --- a/esphome/core/scheduler.h +++ b/esphome/core/scheduler.h @@ -344,8 +344,8 @@ class Scheduler { // Memory pool for recycling SchedulerItem objects to reduce heap churn. // Design decisions: // - std::vector is used instead of a fixed array because many systems only need 1-2 scheduler items - // - The vector grows dynamically up to MAX_POOL_SIZE (10) only when needed, saving memory on simple setups - // - This approach balances memory efficiency for simple configs with performance for complex ones + // - The vector grows dynamically up to MAX_POOL_SIZE (5) only when needed, saving memory on simple setups + // - Pool size of 5 matches typical usage (2-4 timers) while keeping memory overhead low (~250 bytes on ESP32) // - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation // can stall the entire system, causing timing issues and dropped events for any components that need // to synchronize between tasks (see https://github.com/esphome/backlog/issues/52) diff --git a/tests/integration/fixtures/scheduler_pool.yaml b/tests/integration/fixtures/scheduler_pool.yaml index e488f38e2e..5389125188 100644 --- a/tests/integration/fixtures/scheduler_pool.yaml +++ b/tests/integration/fixtures/scheduler_pool.yaml @@ -27,6 +27,9 @@ api: - service: run_phase_6 then: - script.execute: test_full_pool_reuse + - service: run_phase_7 + then: + - script.execute: test_same_defer_optimization - service: run_complete then: - script.execute: complete_test @@ -87,7 +90,8 @@ script: auto *component = id(test_sensor); // Multiple sensors with different update intervals - App.scheduler.set_interval(component, "temp_sensor", 100, []() { + // These should only allocate once and reuse the same item for each interval execution + App.scheduler.set_interval(component, "temp_sensor", 10, []() { ESP_LOGD("test", "Temperature sensor update"); id(interval_counter)++; if (id(interval_counter) >= 3) { @@ -96,7 +100,7 @@ script: } }); - App.scheduler.set_interval(component, "humidity_sensor", 150, []() { + App.scheduler.set_interval(component, "humidity_sensor", 15, []() { ESP_LOGD("test", "Humidity sensor update"); id(interval_counter)++; if (id(interval_counter) >= 5) { @@ -105,7 +109,9 @@ script: } }); + // Only 2 allocations for the intervals, no matter how many times they execute id(create_count) += 2; + ESP_LOGD("test", "Created 2 intervals - they will reuse same items for each execution"); ESP_LOGI("test", "Phase 2 complete"); - id: test_communication_patterns @@ -215,11 +221,14 @@ script: - id: test_full_pool_reuse then: - lambda: |- - ESP_LOGI("test", "Phase 6: Testing full pool reuse after Phase 5 items complete"); + ESP_LOGI("test", "Phase 6: Testing pool size limits after Phase 5 items complete"); // At this point, all Phase 5 timeouts should have completed and been recycled. - // The pool should be at or near its maximum size (10). - // Creating 10 new items should reuse all from the pool. + // The pool should be at its maximum size (5). + // Creating 10 new items tests that: + // - First 5 items reuse from the pool + // - Remaining 5 items allocate new (pool empty) + // - Pool doesn't grow beyond MAX_POOL_SIZE of 5 auto *component = id(test_sensor); int full_reuse_count = 10; @@ -235,6 +244,28 @@ script: id(create_count) += full_reuse_count; ESP_LOGI("test", "Phase 6 complete"); + - id: test_same_defer_optimization + then: + - lambda: |- + ESP_LOGI("test", "Phase 7: Testing same-named defer optimization"); + + auto *component = id(test_sensor); + + // Create 10 defers with the same name - should optimize to update callback in-place + // This pattern is common in components like ratgdo that repeatedly defer state updates + for (int i = 0; i < 10; i++) { + App.scheduler.set_timeout(component, "repeated_defer", 0, [i]() { + ESP_LOGD("test", "Repeated defer executed with value: %d", i); + }); + } + + // Only the first should allocate, the rest should update in-place + // We expect only 1 allocation for all 10 operations + id(create_count) += 1; // Only count 1 since others should be optimized + + ESP_LOGD("test", "Created 10 same-named defers (should only allocate once)"); + ESP_LOGI("test", "Phase 7 complete"); + - id: complete_test then: - lambda: |- diff --git a/tests/integration/test_scheduler_pool.py b/tests/integration/test_scheduler_pool.py index bd878be180..b5f9f12631 100644 --- a/tests/integration/test_scheduler_pool.py +++ b/tests/integration/test_scheduler_pool.py @@ -48,6 +48,7 @@ async def test_scheduler_pool( 4: loop.create_future(), 5: loop.create_future(), 6: loop.create_future(), + 7: loop.create_future(), } def check_output(line: str) -> None: @@ -69,9 +70,10 @@ async def test_scheduler_pool( new_alloc_count += 1 # Track phase completion - for phase_num in range(1, 7): + for phase_num in range(1, 8): if ( f"Phase {phase_num} complete" in line + and phase_num in phase_futures and not phase_futures[phase_num].done() ): phase_futures[phase_num].set_result(True) @@ -102,6 +104,7 @@ async def test_scheduler_pool( "run_phase_4", "run_phase_5", "run_phase_6", + "run_phase_7", "run_complete", } assert expected_services.issubset(service_names), ( @@ -111,7 +114,7 @@ async def test_scheduler_pool( # Get service objects phase_services = { num: next(s for s in services if s.name == f"run_phase_{num}") - for num in range(1, 7) + for num in range(1, 8) } complete_service = next(s for s in services if s.name == "run_complete") @@ -146,6 +149,11 @@ async def test_scheduler_pool( await asyncio.wait_for(phase_futures[6], timeout=1.0) await asyncio.sleep(0.1) # Let Phase 6 timeouts complete + # Phase 7: Same-named defer optimization + client.execute_service(phase_services[7], {}) + await asyncio.wait_for(phase_futures[7], timeout=1.0) + await asyncio.sleep(0.05) # Let the single defer execute + # Complete test client.execute_service(complete_service, {}) await asyncio.wait_for(test_complete_future, timeout=0.5) @@ -166,7 +174,7 @@ async def test_scheduler_pool( ) # Verify all test phases ran - for phase_num in range(1, 7): + for phase_num in range(1, 8): assert phase_futures[phase_num].done(), f"Phase {phase_num} did not complete" # Verify pool behavior @@ -180,8 +188,8 @@ async def test_scheduler_pool( size = int(match.group(1)) max_pool_size = max(max_pool_size, size) - # Pool can grow up to its maximum of 10 - assert max_pool_size <= 10, f"Pool grew beyond maximum ({max_pool_size})" + # Pool can grow up to its maximum of 5 + assert max_pool_size <= 5, f"Pool grew beyond maximum ({max_pool_size})" # Log summary for debugging print("\nScheduler Pool Test Summary (Python Orchestrated):")