1
0
mirror of https://github.com/esphome/esphome.git synced 2025-09-08 22:32:21 +01:00
This commit is contained in:
J. Nick Koston
2025-09-02 18:59:54 -05:00
parent 43634257f6
commit 0c5b63c382
4 changed files with 80 additions and 28 deletions

View File

@@ -15,21 +15,24 @@ namespace esphome {
static const char *const TAG = "scheduler";
// Memory pool configuration constants
// Pool size of 10 is a balance between memory usage and performance:
// - Small enough to not waste memory on simple configs (1-2 timers)
// - Large enough to handle complex setups with multiple sensors/components
// - Prevents system-wide stalls from heap allocation/deallocation that can
// disrupt task synchronization and cause dropped events
static constexpr size_t MAX_POOL_SIZE = 10;
// Maximum number of cancelled items to keep in the heap before forcing a cleanup.
// Set to 6 to trigger cleanup relatively frequently, ensuring cancelled items are
// recycled to the pool in a timely manner to maintain pool efficiency.
static const uint32_t MAX_LOGICALLY_DELETED_ITEMS = 6;
// Pool size of 5 matches typical usage patterns (2-4 active timers)
// - Minimal memory overhead (~250 bytes on ESP32)
// - Sufficient for most configs with a couple sensors/components
// - Still prevents heap fragmentation and allocation stalls
// - Complex setups with many timers will just allocate beyond the pool
// See https://github.com/esphome/backlog/issues/52
static constexpr size_t MAX_POOL_SIZE = 5;
// Ensure MAX_LOGICALLY_DELETED_ITEMS is at least 4 smaller than MAX_POOL_SIZE
// This guarantees we have room in the pool for recycled items when cleanup occurs
static_assert(MAX_LOGICALLY_DELETED_ITEMS + 4 <= MAX_POOL_SIZE,
"MAX_LOGICALLY_DELETED_ITEMS must be at least 4 smaller than MAX_POOL_SIZE");
// Cleanup is performed when cancelled items exceed this percentage of total items.
// Using integer math: cleanup when (cancelled * 100 / total) > 50
// This balances cleanup frequency with performance - we avoid O(n) cleanup
// on every cancellation but don't let cancelled items accumulate excessively.
static constexpr uint32_t CLEANUP_PERCENTAGE = 50;
// Minimum number of cancelled items before considering cleanup.
// Even if the fraction is exceeded, we need at least this many cancelled items
// to make the O(n) cleanup operation worthwhile.
static constexpr uint32_t MIN_CANCELLED_ITEMS_FOR_CLEANUP = 3;
// Half the 32-bit range - used to detect rollovers vs normal time progression
static constexpr uint32_t HALF_MAX_UINT32 = std::numeric_limits<uint32_t>::max() / 2;
@@ -417,8 +420,18 @@ void HOT Scheduler::call(uint32_t now) {
}
#endif /* ESPHOME_DEBUG_SCHEDULER */
// If we have too many items to remove
if (this->to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) {
// Check if we should perform cleanup based on percentage of cancelled items
// Cleanup when: cancelled items >= MIN_CANCELLED_ITEMS_FOR_CLEANUP AND
// cancelled percentage > CLEANUP_PERCENTAGE
size_t total_items = this->items_.size();
bool should_cleanup = false;
if (this->to_remove_ >= MIN_CANCELLED_ITEMS_FOR_CLEANUP && total_items > 0) {
// Use integer math to avoid floating point: (cancelled * 100 / total) > CLEANUP_PERCENTAGE
should_cleanup = (this->to_remove_ * 100) > (total_items * CLEANUP_PERCENTAGE);
}
if (should_cleanup) {
// We hold the lock for the entire cleanup operation because:
// 1. We're rebuilding the entire items_ list, so we need exclusive access throughout
// 2. Other threads must see either the old state or the new state, not intermediate states

View File

@@ -344,8 +344,8 @@ class Scheduler {
// Memory pool for recycling SchedulerItem objects to reduce heap churn.
// Design decisions:
// - std::vector is used instead of a fixed array because many systems only need 1-2 scheduler items
// - The vector grows dynamically up to MAX_POOL_SIZE (10) only when needed, saving memory on simple setups
// - This approach balances memory efficiency for simple configs with performance for complex ones
// - The vector grows dynamically up to MAX_POOL_SIZE (5) only when needed, saving memory on simple setups
// - Pool size of 5 matches typical usage (2-4 timers) while keeping memory overhead low (~250 bytes on ESP32)
// - The pool significantly reduces heap fragmentation which is critical because heap allocation/deallocation
// can stall the entire system, causing timing issues and dropped events for any components that need
// to synchronize between tasks (see https://github.com/esphome/backlog/issues/52)

View File

@@ -27,6 +27,9 @@ api:
- service: run_phase_6
then:
- script.execute: test_full_pool_reuse
- service: run_phase_7
then:
- script.execute: test_same_defer_optimization
- service: run_complete
then:
- script.execute: complete_test
@@ -87,7 +90,8 @@ script:
auto *component = id(test_sensor);
// Multiple sensors with different update intervals
App.scheduler.set_interval(component, "temp_sensor", 100, []() {
// These should only allocate once and reuse the same item for each interval execution
App.scheduler.set_interval(component, "temp_sensor", 10, []() {
ESP_LOGD("test", "Temperature sensor update");
id(interval_counter)++;
if (id(interval_counter) >= 3) {
@@ -96,7 +100,7 @@ script:
}
});
App.scheduler.set_interval(component, "humidity_sensor", 150, []() {
App.scheduler.set_interval(component, "humidity_sensor", 15, []() {
ESP_LOGD("test", "Humidity sensor update");
id(interval_counter)++;
if (id(interval_counter) >= 5) {
@@ -105,7 +109,9 @@ script:
}
});
// Only 2 allocations for the intervals, no matter how many times they execute
id(create_count) += 2;
ESP_LOGD("test", "Created 2 intervals - they will reuse same items for each execution");
ESP_LOGI("test", "Phase 2 complete");
- id: test_communication_patterns
@@ -215,11 +221,14 @@ script:
- id: test_full_pool_reuse
then:
- lambda: |-
ESP_LOGI("test", "Phase 6: Testing full pool reuse after Phase 5 items complete");
ESP_LOGI("test", "Phase 6: Testing pool size limits after Phase 5 items complete");
// At this point, all Phase 5 timeouts should have completed and been recycled.
// The pool should be at or near its maximum size (10).
// Creating 10 new items should reuse all from the pool.
// The pool should be at its maximum size (5).
// Creating 10 new items tests that:
// - First 5 items reuse from the pool
// - Remaining 5 items allocate new (pool empty)
// - Pool doesn't grow beyond MAX_POOL_SIZE of 5
auto *component = id(test_sensor);
int full_reuse_count = 10;
@@ -235,6 +244,28 @@ script:
id(create_count) += full_reuse_count;
ESP_LOGI("test", "Phase 6 complete");
- id: test_same_defer_optimization
then:
- lambda: |-
ESP_LOGI("test", "Phase 7: Testing same-named defer optimization");
auto *component = id(test_sensor);
// Create 10 defers with the same name - should optimize to update callback in-place
// This pattern is common in components like ratgdo that repeatedly defer state updates
for (int i = 0; i < 10; i++) {
App.scheduler.set_timeout(component, "repeated_defer", 0, [i]() {
ESP_LOGD("test", "Repeated defer executed with value: %d", i);
});
}
// Only the first should allocate, the rest should update in-place
// We expect only 1 allocation for all 10 operations
id(create_count) += 1; // Only count 1 since others should be optimized
ESP_LOGD("test", "Created 10 same-named defers (should only allocate once)");
ESP_LOGI("test", "Phase 7 complete");
- id: complete_test
then:
- lambda: |-

View File

@@ -48,6 +48,7 @@ async def test_scheduler_pool(
4: loop.create_future(),
5: loop.create_future(),
6: loop.create_future(),
7: loop.create_future(),
}
def check_output(line: str) -> None:
@@ -69,9 +70,10 @@ async def test_scheduler_pool(
new_alloc_count += 1
# Track phase completion
for phase_num in range(1, 7):
for phase_num in range(1, 8):
if (
f"Phase {phase_num} complete" in line
and phase_num in phase_futures
and not phase_futures[phase_num].done()
):
phase_futures[phase_num].set_result(True)
@@ -102,6 +104,7 @@ async def test_scheduler_pool(
"run_phase_4",
"run_phase_5",
"run_phase_6",
"run_phase_7",
"run_complete",
}
assert expected_services.issubset(service_names), (
@@ -111,7 +114,7 @@ async def test_scheduler_pool(
# Get service objects
phase_services = {
num: next(s for s in services if s.name == f"run_phase_{num}")
for num in range(1, 7)
for num in range(1, 8)
}
complete_service = next(s for s in services if s.name == "run_complete")
@@ -146,6 +149,11 @@ async def test_scheduler_pool(
await asyncio.wait_for(phase_futures[6], timeout=1.0)
await asyncio.sleep(0.1) # Let Phase 6 timeouts complete
# Phase 7: Same-named defer optimization
client.execute_service(phase_services[7], {})
await asyncio.wait_for(phase_futures[7], timeout=1.0)
await asyncio.sleep(0.05) # Let the single defer execute
# Complete test
client.execute_service(complete_service, {})
await asyncio.wait_for(test_complete_future, timeout=0.5)
@@ -166,7 +174,7 @@ async def test_scheduler_pool(
)
# Verify all test phases ran
for phase_num in range(1, 7):
for phase_num in range(1, 8):
assert phase_futures[phase_num].done(), f"Phase {phase_num} did not complete"
# Verify pool behavior
@@ -180,8 +188,8 @@ async def test_scheduler_pool(
size = int(match.group(1))
max_pool_size = max(max_pool_size, size)
# Pool can grow up to its maximum of 10
assert max_pool_size <= 10, f"Pool grew beyond maximum ({max_pool_size})"
# Pool can grow up to its maximum of 5
assert max_pool_size <= 5, f"Pool grew beyond maximum ({max_pool_size})"
# Log summary for debugging
print("\nScheduler Pool Test Summary (Python Orchestrated):")