mirror of
https://github.com/esphome/esphome.git
synced 2025-09-02 03:12:20 +01:00
fix
This commit is contained in:
@@ -409,6 +409,8 @@ void HOT Scheduler::call(uint32_t now) {
|
||||
// This handles two cases:
|
||||
// 1. Item was marked for removal after cleanup_() but before we got here
|
||||
// 2. Item is marked for removal but wasn't at the front of the heap during cleanup_()
|
||||
// TEMPORARILY DISABLED TO VERIFY TEST CATCHES THE BUG
|
||||
/*
|
||||
#ifdef ESPHOME_THREAD_MULTI_NO_ATOMICS
|
||||
// Multi-threaded platforms without atomics: must take lock to safely read remove flag
|
||||
{
|
||||
@@ -428,6 +430,7 @@ void HOT Scheduler::call(uint32_t now) {
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
const char *item_name = item->get_name();
|
||||
|
118
tests/integration/fixtures/scheduler_removed_item_race.yaml
Normal file
118
tests/integration/fixtures/scheduler_removed_item_race.yaml
Normal file
@@ -0,0 +1,118 @@
|
||||
esphome:
|
||||
name: scheduler-removed-item-race
|
||||
|
||||
host:
|
||||
|
||||
api:
|
||||
services:
|
||||
- service: run_test
|
||||
then:
|
||||
- script.execute: run_test_script
|
||||
|
||||
logger:
|
||||
level: DEBUG
|
||||
|
||||
globals:
|
||||
- id: test_passed
|
||||
type: bool
|
||||
initial_value: 'true'
|
||||
- id: removed_item_executed
|
||||
type: int
|
||||
initial_value: '0'
|
||||
- id: normal_item_executed
|
||||
type: int
|
||||
initial_value: '0'
|
||||
|
||||
sensor:
|
||||
- platform: template
|
||||
id: test_sensor
|
||||
name: "Test Sensor"
|
||||
update_interval: never
|
||||
lambda: return 0.0;
|
||||
|
||||
script:
|
||||
- id: run_test_script
|
||||
then:
|
||||
- logger.log: "=== Starting Removed Item Race Test ==="
|
||||
|
||||
# This test creates a scenario where:
|
||||
# 1. Multiple timeouts are scheduled to execute at nearly the same time
|
||||
# 2. One timeout in the middle of the heap gets cancelled
|
||||
# 3. Without the fix, the cancelled timeout would still execute
|
||||
|
||||
- lambda: |-
|
||||
// Schedule multiple timeouts that will all be ready at the same time
|
||||
// This ensures they're all in the heap together
|
||||
|
||||
// First timeout - executes at 10ms
|
||||
App.scheduler.set_timeout(id(test_sensor), "timeout1", 10, []() {
|
||||
ESP_LOGD("test", "Timeout 1 executed (expected)");
|
||||
id(normal_item_executed)++;
|
||||
});
|
||||
|
||||
// Second timeout - executes at 10ms (will be cancelled)
|
||||
App.scheduler.set_timeout(id(test_sensor), "timeout2", 10, []() {
|
||||
ESP_LOGE("test", "RACE: Timeout 2 executed after being cancelled!");
|
||||
id(removed_item_executed)++;
|
||||
id(test_passed) = false;
|
||||
});
|
||||
|
||||
// Third timeout - executes at 10ms
|
||||
App.scheduler.set_timeout(id(test_sensor), "timeout3", 10, []() {
|
||||
ESP_LOGD("test", "Timeout 3 executed (expected)");
|
||||
id(normal_item_executed)++;
|
||||
});
|
||||
|
||||
// Fourth timeout - executes at 10ms
|
||||
App.scheduler.set_timeout(id(test_sensor), "timeout4", 10, []() {
|
||||
ESP_LOGD("test", "Timeout 4 executed (expected)");
|
||||
id(normal_item_executed)++;
|
||||
});
|
||||
|
||||
// Now cancel timeout2
|
||||
// Since all timeouts have the same execution time, they're all in the heap
|
||||
// timeout2 might not be at the front, so cleanup_() won't remove it
|
||||
bool cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout2");
|
||||
ESP_LOGD("test", "Cancelled timeout2: %s", cancelled ? "true" : "false");
|
||||
|
||||
// Also test with items at slightly different times
|
||||
App.scheduler.set_timeout(id(test_sensor), "timeout5", 11, []() {
|
||||
ESP_LOGD("test", "Timeout 5 executed (expected)");
|
||||
id(normal_item_executed)++;
|
||||
});
|
||||
|
||||
App.scheduler.set_timeout(id(test_sensor), "timeout6", 12, []() {
|
||||
ESP_LOGE("test", "RACE: Timeout 6 executed after being cancelled!");
|
||||
id(removed_item_executed)++;
|
||||
id(test_passed) = false;
|
||||
});
|
||||
|
||||
App.scheduler.set_timeout(id(test_sensor), "timeout7", 13, []() {
|
||||
ESP_LOGD("test", "Timeout 7 executed (expected)");
|
||||
id(normal_item_executed)++;
|
||||
});
|
||||
|
||||
// Cancel timeout6
|
||||
cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout6");
|
||||
ESP_LOGD("test", "Cancelled timeout6: %s", cancelled ? "true" : "false");
|
||||
|
||||
# Wait for all timeouts to execute (or not)
|
||||
- delay: 50ms
|
||||
|
||||
# Check results
|
||||
- lambda: |-
|
||||
ESP_LOGI("test", "=== Test Results ===");
|
||||
ESP_LOGI("test", "Normal items executed: %d (expected 5)", id(normal_item_executed));
|
||||
ESP_LOGI("test", "Removed items executed: %d (expected 0)", id(removed_item_executed));
|
||||
|
||||
if (id(removed_item_executed) > 0) {
|
||||
ESP_LOGE("test", "TEST FAILED: %d cancelled items were executed!", id(removed_item_executed));
|
||||
id(test_passed) = false;
|
||||
} else if (id(normal_item_executed) != 5) {
|
||||
ESP_LOGE("test", "TEST FAILED: Expected 5 normal items, got %d", id(normal_item_executed));
|
||||
id(test_passed) = false;
|
||||
} else {
|
||||
ESP_LOGI("test", "TEST PASSED: No cancelled items were executed");
|
||||
}
|
||||
|
||||
ESP_LOGI("test", "=== Test Complete ===");
|
102
tests/integration/test_scheduler_removed_item_race.py
Normal file
102
tests/integration/test_scheduler_removed_item_race.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""Test for scheduler race condition where removed items still execute."""
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
from .types import APIClientConnectedFactory, RunCompiledFunction
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scheduler_removed_item_race(
|
||||
yaml_config: str,
|
||||
run_compiled: RunCompiledFunction,
|
||||
api_client_connected: APIClientConnectedFactory,
|
||||
) -> None:
|
||||
"""Test that items marked for removal don't execute.
|
||||
|
||||
This test verifies the fix for a race condition where:
|
||||
1. cleanup_() only removes items from the front of the heap
|
||||
2. Items in the middle of the heap marked for removal still execute
|
||||
3. This causes cancelled timeouts to run when they shouldn't
|
||||
"""
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
test_complete_future: asyncio.Future[bool] = loop.create_future()
|
||||
|
||||
# Track test results
|
||||
test_passed = False
|
||||
removed_executed = 0
|
||||
normal_executed = 0
|
||||
|
||||
# Patterns to match
|
||||
race_pattern = re.compile(r"RACE: .* executed after being cancelled!")
|
||||
passed_pattern = re.compile(r"TEST PASSED")
|
||||
failed_pattern = re.compile(r"TEST FAILED")
|
||||
complete_pattern = re.compile(r"=== Test Complete ===")
|
||||
normal_count_pattern = re.compile(r"Normal items executed: (\d+)")
|
||||
removed_count_pattern = re.compile(r"Removed items executed: (\d+)")
|
||||
|
||||
def check_output(line: str) -> None:
|
||||
"""Check log output for test results."""
|
||||
nonlocal test_passed, removed_executed, normal_executed
|
||||
|
||||
if race_pattern.search(line):
|
||||
# Race condition detected - a cancelled item executed
|
||||
test_passed = False
|
||||
|
||||
if passed_pattern.search(line):
|
||||
test_passed = True
|
||||
elif failed_pattern.search(line):
|
||||
test_passed = False
|
||||
|
||||
normal_match = normal_count_pattern.search(line)
|
||||
if normal_match:
|
||||
normal_executed = int(normal_match.group(1))
|
||||
|
||||
removed_match = removed_count_pattern.search(line)
|
||||
if removed_match:
|
||||
removed_executed = int(removed_match.group(1))
|
||||
|
||||
if not test_complete_future.done() and complete_pattern.search(line):
|
||||
test_complete_future.set_result(True)
|
||||
|
||||
async with (
|
||||
run_compiled(yaml_config, line_callback=check_output),
|
||||
api_client_connected() as client,
|
||||
):
|
||||
# Verify we can connect
|
||||
device_info = await client.device_info()
|
||||
assert device_info is not None
|
||||
assert device_info.name == "scheduler-removed-item-race"
|
||||
|
||||
# List services
|
||||
_, services = await asyncio.wait_for(
|
||||
client.list_entities_services(), timeout=5.0
|
||||
)
|
||||
|
||||
# Find run_test service
|
||||
run_test_service = next((s for s in services if s.name == "run_test"), None)
|
||||
assert run_test_service is not None, "run_test service not found"
|
||||
|
||||
# Execute the test
|
||||
client.execute_service(run_test_service, {})
|
||||
|
||||
# Wait for test completion
|
||||
try:
|
||||
await asyncio.wait_for(test_complete_future, timeout=5.0)
|
||||
except TimeoutError:
|
||||
pytest.fail("Test did not complete within timeout")
|
||||
|
||||
# Verify results
|
||||
assert test_passed, (
|
||||
f"Test failed! Removed items executed: {removed_executed}, "
|
||||
f"Normal items executed: {normal_executed}"
|
||||
)
|
||||
assert removed_executed == 0, (
|
||||
f"Cancelled items should not execute, but {removed_executed} did"
|
||||
)
|
||||
assert normal_executed == 5, (
|
||||
f"Expected 5 normal items to execute, got {normal_executed}"
|
||||
)
|
Reference in New Issue
Block a user