1
0
mirror of https://github.com/esphome/esphome.git synced 2025-09-02 03:12:20 +01:00
This commit is contained in:
J. Nick Koston
2025-08-17 16:12:53 -04:00
parent 5b674dc28c
commit e06dbffe9f
3 changed files with 223 additions and 0 deletions

View File

@@ -409,6 +409,8 @@ void HOT Scheduler::call(uint32_t now) {
// This handles two cases: // This handles two cases:
// 1. Item was marked for removal after cleanup_() but before we got here // 1. Item was marked for removal after cleanup_() but before we got here
// 2. Item is marked for removal but wasn't at the front of the heap during cleanup_() // 2. Item is marked for removal but wasn't at the front of the heap during cleanup_()
// TEMPORARILY DISABLED TO VERIFY TEST CATCHES THE BUG
/*
#ifdef ESPHOME_THREAD_MULTI_NO_ATOMICS #ifdef ESPHOME_THREAD_MULTI_NO_ATOMICS
// Multi-threaded platforms without atomics: must take lock to safely read remove flag // Multi-threaded platforms without atomics: must take lock to safely read remove flag
{ {
@@ -428,6 +430,7 @@ void HOT Scheduler::call(uint32_t now) {
continue; continue;
} }
#endif #endif
*/
#ifdef ESPHOME_DEBUG_SCHEDULER #ifdef ESPHOME_DEBUG_SCHEDULER
const char *item_name = item->get_name(); const char *item_name = item->get_name();

View File

@@ -0,0 +1,118 @@
esphome:
name: scheduler-removed-item-race
host:
api:
services:
- service: run_test
then:
- script.execute: run_test_script
logger:
level: DEBUG
globals:
- id: test_passed
type: bool
initial_value: 'true'
- id: removed_item_executed
type: int
initial_value: '0'
- id: normal_item_executed
type: int
initial_value: '0'
sensor:
- platform: template
id: test_sensor
name: "Test Sensor"
update_interval: never
lambda: return 0.0;
script:
- id: run_test_script
then:
- logger.log: "=== Starting Removed Item Race Test ==="
# This test creates a scenario where:
# 1. Multiple timeouts are scheduled to execute at nearly the same time
# 2. One timeout in the middle of the heap gets cancelled
# 3. Without the fix, the cancelled timeout would still execute
- lambda: |-
// Schedule multiple timeouts that will all be ready at the same time
// This ensures they're all in the heap together
// First timeout - executes at 10ms
App.scheduler.set_timeout(id(test_sensor), "timeout1", 10, []() {
ESP_LOGD("test", "Timeout 1 executed (expected)");
id(normal_item_executed)++;
});
// Second timeout - executes at 10ms (will be cancelled)
App.scheduler.set_timeout(id(test_sensor), "timeout2", 10, []() {
ESP_LOGE("test", "RACE: Timeout 2 executed after being cancelled!");
id(removed_item_executed)++;
id(test_passed) = false;
});
// Third timeout - executes at 10ms
App.scheduler.set_timeout(id(test_sensor), "timeout3", 10, []() {
ESP_LOGD("test", "Timeout 3 executed (expected)");
id(normal_item_executed)++;
});
// Fourth timeout - executes at 10ms
App.scheduler.set_timeout(id(test_sensor), "timeout4", 10, []() {
ESP_LOGD("test", "Timeout 4 executed (expected)");
id(normal_item_executed)++;
});
// Now cancel timeout2
// Since all timeouts have the same execution time, they're all in the heap
// timeout2 might not be at the front, so cleanup_() won't remove it
bool cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout2");
ESP_LOGD("test", "Cancelled timeout2: %s", cancelled ? "true" : "false");
// Also test with items at slightly different times
App.scheduler.set_timeout(id(test_sensor), "timeout5", 11, []() {
ESP_LOGD("test", "Timeout 5 executed (expected)");
id(normal_item_executed)++;
});
App.scheduler.set_timeout(id(test_sensor), "timeout6", 12, []() {
ESP_LOGE("test", "RACE: Timeout 6 executed after being cancelled!");
id(removed_item_executed)++;
id(test_passed) = false;
});
App.scheduler.set_timeout(id(test_sensor), "timeout7", 13, []() {
ESP_LOGD("test", "Timeout 7 executed (expected)");
id(normal_item_executed)++;
});
// Cancel timeout6
cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout6");
ESP_LOGD("test", "Cancelled timeout6: %s", cancelled ? "true" : "false");
# Wait for all timeouts to execute (or not)
- delay: 50ms
# Check results
- lambda: |-
ESP_LOGI("test", "=== Test Results ===");
ESP_LOGI("test", "Normal items executed: %d (expected 5)", id(normal_item_executed));
ESP_LOGI("test", "Removed items executed: %d (expected 0)", id(removed_item_executed));
if (id(removed_item_executed) > 0) {
ESP_LOGE("test", "TEST FAILED: %d cancelled items were executed!", id(removed_item_executed));
id(test_passed) = false;
} else if (id(normal_item_executed) != 5) {
ESP_LOGE("test", "TEST FAILED: Expected 5 normal items, got %d", id(normal_item_executed));
id(test_passed) = false;
} else {
ESP_LOGI("test", "TEST PASSED: No cancelled items were executed");
}
ESP_LOGI("test", "=== Test Complete ===");

View File

@@ -0,0 +1,102 @@
"""Test for scheduler race condition where removed items still execute."""
import asyncio
import re
import pytest
from .types import APIClientConnectedFactory, RunCompiledFunction
@pytest.mark.asyncio
async def test_scheduler_removed_item_race(
yaml_config: str,
run_compiled: RunCompiledFunction,
api_client_connected: APIClientConnectedFactory,
) -> None:
"""Test that items marked for removal don't execute.
This test verifies the fix for a race condition where:
1. cleanup_() only removes items from the front of the heap
2. Items in the middle of the heap marked for removal still execute
3. This causes cancelled timeouts to run when they shouldn't
"""
loop = asyncio.get_running_loop()
test_complete_future: asyncio.Future[bool] = loop.create_future()
# Track test results
test_passed = False
removed_executed = 0
normal_executed = 0
# Patterns to match
race_pattern = re.compile(r"RACE: .* executed after being cancelled!")
passed_pattern = re.compile(r"TEST PASSED")
failed_pattern = re.compile(r"TEST FAILED")
complete_pattern = re.compile(r"=== Test Complete ===")
normal_count_pattern = re.compile(r"Normal items executed: (\d+)")
removed_count_pattern = re.compile(r"Removed items executed: (\d+)")
def check_output(line: str) -> None:
"""Check log output for test results."""
nonlocal test_passed, removed_executed, normal_executed
if race_pattern.search(line):
# Race condition detected - a cancelled item executed
test_passed = False
if passed_pattern.search(line):
test_passed = True
elif failed_pattern.search(line):
test_passed = False
normal_match = normal_count_pattern.search(line)
if normal_match:
normal_executed = int(normal_match.group(1))
removed_match = removed_count_pattern.search(line)
if removed_match:
removed_executed = int(removed_match.group(1))
if not test_complete_future.done() and complete_pattern.search(line):
test_complete_future.set_result(True)
async with (
run_compiled(yaml_config, line_callback=check_output),
api_client_connected() as client,
):
# Verify we can connect
device_info = await client.device_info()
assert device_info is not None
assert device_info.name == "scheduler-removed-item-race"
# List services
_, services = await asyncio.wait_for(
client.list_entities_services(), timeout=5.0
)
# Find run_test service
run_test_service = next((s for s in services if s.name == "run_test"), None)
assert run_test_service is not None, "run_test service not found"
# Execute the test
client.execute_service(run_test_service, {})
# Wait for test completion
try:
await asyncio.wait_for(test_complete_future, timeout=5.0)
except TimeoutError:
pytest.fail("Test did not complete within timeout")
# Verify results
assert test_passed, (
f"Test failed! Removed items executed: {removed_executed}, "
f"Normal items executed: {normal_executed}"
)
assert removed_executed == 0, (
f"Cancelled items should not execute, but {removed_executed} did"
)
assert normal_executed == 5, (
f"Expected 5 normal items to execute, got {normal_executed}"
)