mirror of
https://github.com/esphome/esphome.git
synced 2025-10-30 06:33:51 +00:00
[core] Add memory pool to scheduler to reduce heap fragmentation (#10536)
This commit is contained in:
282
tests/integration/fixtures/scheduler_pool.yaml
Normal file
282
tests/integration/fixtures/scheduler_pool.yaml
Normal file
@@ -0,0 +1,282 @@
|
||||
esphome:
|
||||
name: scheduler-pool-test
|
||||
on_boot:
|
||||
priority: -100
|
||||
then:
|
||||
- logger.log: "Starting scheduler pool tests"
|
||||
debug_scheduler: true # Enable scheduler debug logging
|
||||
|
||||
host:
|
||||
api:
|
||||
services:
|
||||
- service: run_phase_1
|
||||
then:
|
||||
- script.execute: test_pool_recycling
|
||||
- service: run_phase_2
|
||||
then:
|
||||
- script.execute: test_sensor_polling
|
||||
- service: run_phase_3
|
||||
then:
|
||||
- script.execute: test_communication_patterns
|
||||
- service: run_phase_4
|
||||
then:
|
||||
- script.execute: test_defer_patterns
|
||||
- service: run_phase_5
|
||||
then:
|
||||
- script.execute: test_pool_reuse_verification
|
||||
- service: run_phase_6
|
||||
then:
|
||||
- script.execute: test_full_pool_reuse
|
||||
- service: run_phase_7
|
||||
then:
|
||||
- script.execute: test_same_defer_optimization
|
||||
- service: run_complete
|
||||
then:
|
||||
- script.execute: complete_test
|
||||
logger:
|
||||
level: VERY_VERBOSE # Need VERY_VERBOSE to see pool debug messages
|
||||
|
||||
globals:
|
||||
- id: create_count
|
||||
type: int
|
||||
initial_value: '0'
|
||||
- id: cancel_count
|
||||
type: int
|
||||
initial_value: '0'
|
||||
- id: interval_counter
|
||||
type: int
|
||||
initial_value: '0'
|
||||
- id: pool_test_done
|
||||
type: bool
|
||||
initial_value: 'false'
|
||||
|
||||
script:
|
||||
- id: test_pool_recycling
|
||||
then:
|
||||
- logger.log: "Testing scheduler pool recycling with realistic usage patterns"
|
||||
- lambda: |-
|
||||
auto *component = id(test_sensor);
|
||||
|
||||
// Simulate realistic component behavior with timeouts that complete naturally
|
||||
ESP_LOGI("test", "Phase 1: Simulating normal component lifecycle");
|
||||
|
||||
// Sensor update timeouts (common pattern)
|
||||
App.scheduler.set_timeout(component, "sensor_init", 10, []() {
|
||||
ESP_LOGD("test", "Sensor initialized");
|
||||
id(create_count)++;
|
||||
});
|
||||
|
||||
// Retry timeout (gets cancelled if successful)
|
||||
App.scheduler.set_timeout(component, "retry_timeout", 50, []() {
|
||||
ESP_LOGD("test", "Retry timeout executed");
|
||||
id(create_count)++;
|
||||
});
|
||||
|
||||
// Simulate successful operation - cancel retry
|
||||
App.scheduler.set_timeout(component, "success_sim", 20, []() {
|
||||
ESP_LOGD("test", "Operation succeeded, cancelling retry");
|
||||
App.scheduler.cancel_timeout(id(test_sensor), "retry_timeout");
|
||||
id(cancel_count)++;
|
||||
});
|
||||
|
||||
id(create_count) += 3;
|
||||
ESP_LOGI("test", "Phase 1 complete");
|
||||
|
||||
- id: test_sensor_polling
|
||||
then:
|
||||
- lambda: |-
|
||||
// Simulate sensor polling pattern
|
||||
ESP_LOGI("test", "Phase 2: Simulating sensor polling patterns");
|
||||
auto *component = id(test_sensor);
|
||||
|
||||
// Multiple sensors with different update intervals
|
||||
// These should only allocate once and reuse the same item for each interval execution
|
||||
App.scheduler.set_interval(component, "temp_sensor", 10, []() {
|
||||
ESP_LOGD("test", "Temperature sensor update");
|
||||
id(interval_counter)++;
|
||||
if (id(interval_counter) >= 3) {
|
||||
App.scheduler.cancel_interval(id(test_sensor), "temp_sensor");
|
||||
ESP_LOGD("test", "Temperature sensor stopped");
|
||||
}
|
||||
});
|
||||
|
||||
App.scheduler.set_interval(component, "humidity_sensor", 15, []() {
|
||||
ESP_LOGD("test", "Humidity sensor update");
|
||||
id(interval_counter)++;
|
||||
if (id(interval_counter) >= 5) {
|
||||
App.scheduler.cancel_interval(id(test_sensor), "humidity_sensor");
|
||||
ESP_LOGD("test", "Humidity sensor stopped");
|
||||
}
|
||||
});
|
||||
|
||||
// Only 2 allocations for the intervals, no matter how many times they execute
|
||||
id(create_count) += 2;
|
||||
ESP_LOGD("test", "Created 2 intervals - they will reuse same items for each execution");
|
||||
ESP_LOGI("test", "Phase 2 complete");
|
||||
|
||||
- id: test_communication_patterns
|
||||
then:
|
||||
- lambda: |-
|
||||
// Simulate communication patterns (WiFi/API reconnects, etc)
|
||||
ESP_LOGI("test", "Phase 3: Simulating communication patterns");
|
||||
auto *component = id(test_sensor);
|
||||
|
||||
// Connection timeout pattern
|
||||
App.scheduler.set_timeout(component, "connect_timeout", 200, []() {
|
||||
ESP_LOGD("test", "Connection timeout - would retry");
|
||||
id(create_count)++;
|
||||
|
||||
// Schedule retry
|
||||
App.scheduler.set_timeout(id(test_sensor), "connect_retry", 100, []() {
|
||||
ESP_LOGD("test", "Retrying connection");
|
||||
id(create_count)++;
|
||||
});
|
||||
});
|
||||
|
||||
// Heartbeat pattern
|
||||
App.scheduler.set_interval(component, "heartbeat", 50, []() {
|
||||
ESP_LOGD("test", "Heartbeat");
|
||||
id(interval_counter)++;
|
||||
if (id(interval_counter) >= 10) {
|
||||
App.scheduler.cancel_interval(id(test_sensor), "heartbeat");
|
||||
ESP_LOGD("test", "Heartbeat stopped");
|
||||
}
|
||||
});
|
||||
|
||||
id(create_count) += 2;
|
||||
ESP_LOGI("test", "Phase 3 complete");
|
||||
|
||||
- id: test_defer_patterns
|
||||
then:
|
||||
- lambda: |-
|
||||
// Simulate defer patterns (state changes, async operations)
|
||||
ESP_LOGI("test", "Phase 4: Simulating heavy defer patterns like ratgdo");
|
||||
|
||||
auto *component = id(test_sensor);
|
||||
|
||||
// Simulate a burst of defer operations like ratgdo does with state updates
|
||||
// These should execute immediately and recycle quickly to the pool
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::string defer_name = "defer_" + std::to_string(i);
|
||||
App.scheduler.set_timeout(component, defer_name, 0, [i]() {
|
||||
ESP_LOGD("test", "Defer %d executed", i);
|
||||
// Force a small delay between defer executions to see recycling
|
||||
if (i == 5) {
|
||||
ESP_LOGI("test", "Half of defers executed, checking pool status");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
id(create_count) += 10;
|
||||
ESP_LOGD("test", "Created 10 defer operations (0ms timeouts)");
|
||||
|
||||
// Also create some named defers that might get replaced
|
||||
App.scheduler.set_timeout(component, "state_update", 0, []() {
|
||||
ESP_LOGD("test", "State update 1");
|
||||
});
|
||||
|
||||
// Replace the same named defer (should cancel previous)
|
||||
App.scheduler.set_timeout(component, "state_update", 0, []() {
|
||||
ESP_LOGD("test", "State update 2 (replaced)");
|
||||
});
|
||||
|
||||
id(create_count) += 2;
|
||||
id(cancel_count) += 1; // One cancelled due to replacement
|
||||
|
||||
ESP_LOGI("test", "Phase 4 complete");
|
||||
|
||||
- id: test_pool_reuse_verification
|
||||
then:
|
||||
- lambda: |-
|
||||
ESP_LOGI("test", "Phase 5: Verifying pool reuse after everything settles");
|
||||
|
||||
// Cancel any remaining intervals
|
||||
auto *component = id(test_sensor);
|
||||
App.scheduler.cancel_interval(component, "temp_sensor");
|
||||
App.scheduler.cancel_interval(component, "humidity_sensor");
|
||||
App.scheduler.cancel_interval(component, "heartbeat");
|
||||
|
||||
ESP_LOGD("test", "Cancelled any remaining intervals");
|
||||
|
||||
// The pool should have items from completed timeouts in earlier phases.
|
||||
// Phase 1 had 3 timeouts that completed and were recycled.
|
||||
// Phase 3 had 1 timeout that completed and was recycled.
|
||||
// Phase 4 had 3 defers that completed and were recycled.
|
||||
// So we should have a decent pool size already from naturally completed items.
|
||||
|
||||
// Now create 8 new timeouts - they should reuse from pool when available
|
||||
int reuse_test_count = 8;
|
||||
|
||||
for (int i = 0; i < reuse_test_count; i++) {
|
||||
std::string name = "reuse_test_" + std::to_string(i);
|
||||
App.scheduler.set_timeout(component, name, 10 + i * 5, [i]() {
|
||||
ESP_LOGD("test", "Reuse test %d completed", i);
|
||||
});
|
||||
}
|
||||
|
||||
ESP_LOGI("test", "Created %d items for reuse verification", reuse_test_count);
|
||||
id(create_count) += reuse_test_count;
|
||||
ESP_LOGI("test", "Phase 5 complete");
|
||||
|
||||
- id: test_full_pool_reuse
|
||||
then:
|
||||
- lambda: |-
|
||||
ESP_LOGI("test", "Phase 6: Testing pool size limits after Phase 5 items complete");
|
||||
|
||||
// At this point, all Phase 5 timeouts should have completed and been recycled.
|
||||
// The pool should be at its maximum size (5).
|
||||
// Creating 10 new items tests that:
|
||||
// - First 5 items reuse from the pool
|
||||
// - Remaining 5 items allocate new (pool empty)
|
||||
// - Pool doesn't grow beyond MAX_POOL_SIZE of 5
|
||||
|
||||
auto *component = id(test_sensor);
|
||||
int full_reuse_count = 10;
|
||||
|
||||
for (int i = 0; i < full_reuse_count; i++) {
|
||||
std::string name = "full_reuse_" + std::to_string(i);
|
||||
App.scheduler.set_timeout(component, name, 10 + i * 5, [i]() {
|
||||
ESP_LOGD("test", "Full reuse test %d completed", i);
|
||||
});
|
||||
}
|
||||
|
||||
ESP_LOGI("test", "Created %d items for full pool reuse verification", full_reuse_count);
|
||||
id(create_count) += full_reuse_count;
|
||||
ESP_LOGI("test", "Phase 6 complete");
|
||||
|
||||
- id: test_same_defer_optimization
|
||||
then:
|
||||
- lambda: |-
|
||||
ESP_LOGI("test", "Phase 7: Testing same-named defer optimization");
|
||||
|
||||
auto *component = id(test_sensor);
|
||||
|
||||
// Create 10 defers with the same name - should optimize to update callback in-place
|
||||
// This pattern is common in components like ratgdo that repeatedly defer state updates
|
||||
for (int i = 0; i < 10; i++) {
|
||||
App.scheduler.set_timeout(component, "repeated_defer", 0, [i]() {
|
||||
ESP_LOGD("test", "Repeated defer executed with value: %d", i);
|
||||
});
|
||||
}
|
||||
|
||||
// Only the first should allocate, the rest should update in-place
|
||||
// We expect only 1 allocation for all 10 operations
|
||||
id(create_count) += 1; // Only count 1 since others should be optimized
|
||||
|
||||
ESP_LOGD("test", "Created 10 same-named defers (should only allocate once)");
|
||||
ESP_LOGI("test", "Phase 7 complete");
|
||||
|
||||
- id: complete_test
|
||||
then:
|
||||
- lambda: |-
|
||||
ESP_LOGI("test", "Pool recycling test complete - created %d items, cancelled %d, intervals %d",
|
||||
id(create_count), id(cancel_count), id(interval_counter));
|
||||
|
||||
sensor:
|
||||
- platform: template
|
||||
name: Test Sensor
|
||||
id: test_sensor
|
||||
lambda: return 1.0;
|
||||
update_interval: never
|
||||
|
||||
# No interval - tests will be triggered from Python via API services
|
||||
209
tests/integration/test_scheduler_pool.py
Normal file
209
tests/integration/test_scheduler_pool.py
Normal file
@@ -0,0 +1,209 @@
|
||||
"""Integration test for scheduler memory pool functionality."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from .types import APIClientConnectedFactory, RunCompiledFunction
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_pool(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""Test that the scheduler memory pool is working correctly with realistic usage.
|
||||
|
||||
This test simulates real-world scheduler usage patterns and verifies that:
|
||||
1. Items are recycled to the pool when timeouts complete naturally
|
||||
2. Items are recycled when intervals/timeouts are cancelled
|
||||
3. Items are reused from the pool for new scheduler operations
|
||||
4. The pool grows gradually based on actual usage patterns
|
||||
5. Pool operations are logged correctly with debug scheduler enabled
|
||||
"""
|
||||
# Track log messages to verify pool behavior
|
||||
log_lines: list[str] = []
|
||||
pool_reuse_count = 0
|
||||
pool_recycle_count = 0
|
||||
pool_full_count = 0
|
||||
new_alloc_count = 0
|
||||
|
||||
# Patterns to match pool operations
|
||||
reuse_pattern = re.compile(r"Reused item from pool \(pool size now: (\d+)\)")
|
||||
recycle_pattern = re.compile(r"Recycled item to pool \(pool size now: (\d+)\)")
|
||||
pool_full_pattern = re.compile(r"Pool full \(size: (\d+)\), deleting item")
|
||||
new_alloc_pattern = re.compile(r"Allocated new item \(pool empty\)")
|
||||
|
||||
# Futures to track when test phases complete
|
||||
loop = asyncio.get_running_loop()
|
||||
test_complete_future: asyncio.Future[bool] = loop.create_future()
|
||||
phase_futures = {
|
||||
1: loop.create_future(),
|
||||
2: loop.create_future(),
|
||||
3: loop.create_future(),
|
||||
4: loop.create_future(),
|
||||
5: loop.create_future(),
|
||||
6: loop.create_future(),
|
||||
7: loop.create_future(),
|
||||
}
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
"""Check log output for pool operations and phase completion."""
|
||||
nonlocal pool_reuse_count, pool_recycle_count, pool_full_count, new_alloc_count
|
||||
log_lines.append(line)
|
||||
|
||||
# Track pool operations
|
||||
if reuse_pattern.search(line):
|
||||
pool_reuse_count += 1
|
||||
|
||||
elif recycle_pattern.search(line):
|
||||
pool_recycle_count += 1
|
||||
|
||||
elif pool_full_pattern.search(line):
|
||||
pool_full_count += 1
|
||||
|
||||
elif new_alloc_pattern.search(line):
|
||||
new_alloc_count += 1
|
||||
|
||||
# Track phase completion
|
||||
for phase_num in range(1, 8):
|
||||
if (
|
||||
f"Phase {phase_num} complete" in line
|
||||
and phase_num in phase_futures
|
||||
and not phase_futures[phase_num].done()
|
||||
):
|
||||
phase_futures[phase_num].set_result(True)
|
||||
|
||||
# Check for test completion
|
||||
if "Pool recycling test complete" in line and not test_complete_future.done():
|
||||
test_complete_future.set_result(True)
|
||||
|
||||
# Run the test with log monitoring
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
# Verify device is running
|
||||
device_info = await client.device_info()
|
||||
assert device_info is not None
|
||||
assert device_info.name == "scheduler-pool-test"
|
||||
|
||||
# Get list of services
|
||||
entities, services = await client.list_entities_services()
|
||||
service_names = {s.name for s in services}
|
||||
|
||||
# Verify all test services are available
|
||||
expected_services = {
|
||||
"run_phase_1",
|
||||
"run_phase_2",
|
||||
"run_phase_3",
|
||||
"run_phase_4",
|
||||
"run_phase_5",
|
||||
"run_phase_6",
|
||||
"run_phase_7",
|
||||
"run_complete",
|
||||
}
|
||||
assert expected_services.issubset(service_names), (
|
||||
f"Missing services: {expected_services - service_names}"
|
||||
)
|
||||
|
||||
# Get service objects
|
||||
phase_services = {
|
||||
num: next(s for s in services if s.name == f"run_phase_{num}")
|
||||
for num in range(1, 8)
|
||||
}
|
||||
complete_service = next(s for s in services if s.name == "run_complete")
|
||||
|
||||
try:
|
||||
# Phase 1: Component lifecycle
|
||||
client.execute_service(phase_services[1], {})
|
||||
await asyncio.wait_for(phase_futures[1], timeout=1.0)
|
||||
await asyncio.sleep(0.05) # Let timeouts complete
|
||||
|
||||
# Phase 2: Sensor polling
|
||||
client.execute_service(phase_services[2], {})
|
||||
await asyncio.wait_for(phase_futures[2], timeout=1.0)
|
||||
await asyncio.sleep(0.1) # Let intervals run a bit
|
||||
|
||||
# Phase 3: Communication patterns
|
||||
client.execute_service(phase_services[3], {})
|
||||
await asyncio.wait_for(phase_futures[3], timeout=1.0)
|
||||
await asyncio.sleep(0.1) # Let heartbeat run
|
||||
|
||||
# Phase 4: Defer patterns
|
||||
client.execute_service(phase_services[4], {})
|
||||
await asyncio.wait_for(phase_futures[4], timeout=1.0)
|
||||
await asyncio.sleep(0.2) # Let everything settle and recycle
|
||||
|
||||
# Phase 5: Pool reuse verification
|
||||
client.execute_service(phase_services[5], {})
|
||||
await asyncio.wait_for(phase_futures[5], timeout=1.0)
|
||||
await asyncio.sleep(0.1) # Let Phase 5 timeouts complete and recycle
|
||||
|
||||
# Phase 6: Full pool reuse verification
|
||||
client.execute_service(phase_services[6], {})
|
||||
await asyncio.wait_for(phase_futures[6], timeout=1.0)
|
||||
await asyncio.sleep(0.1) # Let Phase 6 timeouts complete
|
||||
|
||||
# Phase 7: Same-named defer optimization
|
||||
client.execute_service(phase_services[7], {})
|
||||
await asyncio.wait_for(phase_futures[7], timeout=1.0)
|
||||
await asyncio.sleep(0.05) # Let the single defer execute
|
||||
|
||||
# Complete test
|
||||
client.execute_service(complete_service, {})
|
||||
await asyncio.wait_for(test_complete_future, timeout=0.5)
|
||||
|
||||
except TimeoutError as e:
|
||||
# Print debug info if test times out
|
||||
recent_logs = "\n".join(log_lines[-30:])
|
||||
phases_completed = [num for num, fut in phase_futures.items() if fut.done()]
|
||||
pytest.fail(
|
||||
f"Test timed out waiting for phase/completion. Error: {e}\n"
|
||||
f" Phases completed: {phases_completed}\n"
|
||||
f" Pool stats:\n"
|
||||
f" Reuse count: {pool_reuse_count}\n"
|
||||
f" Recycle count: {pool_recycle_count}\n"
|
||||
f" Pool full count: {pool_full_count}\n"
|
||||
f" New alloc count: {new_alloc_count}\n"
|
||||
f"Recent logs:\n{recent_logs}"
|
||||
)
|
||||
|
||||
# Verify all test phases ran
|
||||
for phase_num in range(1, 8):
|
||||
assert phase_futures[phase_num].done(), f"Phase {phase_num} did not complete"
|
||||
|
||||
# Verify pool behavior
|
||||
assert pool_recycle_count > 0, "Should have recycled items to pool"
|
||||
|
||||
# Check pool metrics
|
||||
if pool_recycle_count > 0:
|
||||
max_pool_size = 0
|
||||
for line in log_lines:
|
||||
if match := recycle_pattern.search(line):
|
||||
size = int(match.group(1))
|
||||
max_pool_size = max(max_pool_size, size)
|
||||
|
||||
# Pool can grow up to its maximum of 5
|
||||
assert max_pool_size <= 5, f"Pool grew beyond maximum ({max_pool_size})"
|
||||
|
||||
# Log summary for debugging
|
||||
print("\nScheduler Pool Test Summary (Python Orchestrated):")
|
||||
print(f" Items recycled to pool: {pool_recycle_count}")
|
||||
print(f" Items reused from pool: {pool_reuse_count}")
|
||||
print(f" Pool full events: {pool_full_count}")
|
||||
print(f" New allocations: {new_alloc_count}")
|
||||
print(" All phases completed successfully")
|
||||
|
||||
# Verify reuse happened
|
||||
if pool_reuse_count == 0 and pool_recycle_count > 3:
|
||||
pytest.fail("Pool had items recycled but none were reused")
|
||||
|
||||
# Success - pool is working
|
||||
assert pool_recycle_count > 0 or new_alloc_count < 15, (
|
||||
"Pool should either recycle items or limit new allocations"
|
||||
)
|
||||
Reference in New Issue
Block a user