1
0
mirror of https://github.com/esphome/esphome.git synced 2025-09-02 03:12:20 +01:00
This commit is contained in:
J. Nick Koston
2025-08-17 16:28:32 -04:00
parent e06dbffe9f
commit c56fd00a7c
3 changed files with 69 additions and 51 deletions

View File

@@ -409,8 +409,6 @@ void HOT Scheduler::call(uint32_t now) {
// This handles two cases:
// 1. Item was marked for removal after cleanup_() but before we got here
// 2. Item is marked for removal but wasn't at the front of the heap during cleanup_()
// TEMPORARILY DISABLED TO VERIFY TEST CATCHES THE BUG
/*
#ifdef ESPHOME_THREAD_MULTI_NO_ATOMICS
// Multi-threaded platforms without atomics: must take lock to safely read remove flag
{
@@ -430,7 +428,6 @@ void HOT Scheduler::call(uint32_t now) {
continue;
}
#endif
*/
#ifdef ESPHOME_DEBUG_SCHEDULER
const char *item_name = item->get_name();

View File

@@ -36,80 +36,101 @@ script:
- logger.log: "=== Starting Removed Item Race Test ==="
# This test creates a scenario where:
# 1. Multiple timeouts are scheduled to execute at nearly the same time
# 2. One timeout in the middle of the heap gets cancelled
# 3. Without the fix, the cancelled timeout would still execute
# 1. First item in heap is NOT cancelled (cleanup stops immediately)
# 2. Items behind it ARE cancelled (remain in heap after cleanup)
# 3. All items execute at the same time, including cancelled ones
- lambda: |-
// Schedule multiple timeouts that will all be ready at the same time
// This ensures they're all in the heap together
// The key to hitting the race:
// 1. Add items in a specific order to control heap structure
// 2. Cancel ONLY items that won't be at the front
// 3. Ensure the first item stays non-cancelled so cleanup_() stops immediately
// First timeout - executes at 10ms
App.scheduler.set_timeout(id(test_sensor), "timeout1", 10, []() {
ESP_LOGD("test", "Timeout 1 executed (expected)");
// Schedule all items to execute at the SAME time (1ms from now)
// Using 1ms instead of 0 to avoid defer queue on multi-core platforms
// This ensures they'll all be ready together and go through the heap
const uint32_t exec_time = 1;
// CRITICAL: Add a non-cancellable item FIRST
// This will be at the front of the heap and block cleanup_()
App.scheduler.set_timeout(id(test_sensor), "blocker", exec_time, []() {
ESP_LOGD("test", "Blocker timeout executed (expected) - was at front of heap");
id(normal_item_executed)++;
});
// Second timeout - executes at 10ms (will be cancelled)
App.scheduler.set_timeout(id(test_sensor), "timeout2", 10, []() {
ESP_LOGE("test", "RACE: Timeout 2 executed after being cancelled!");
// Now add items that we WILL cancel
// These will be behind the blocker in the heap
App.scheduler.set_timeout(id(test_sensor), "cancel_1", exec_time, []() {
ESP_LOGE("test", "RACE: Cancelled timeout 1 executed after being cancelled!");
id(removed_item_executed)++;
id(test_passed) = false;
});
// Third timeout - executes at 10ms
App.scheduler.set_timeout(id(test_sensor), "timeout3", 10, []() {
ESP_LOGD("test", "Timeout 3 executed (expected)");
id(normal_item_executed)++;
});
// Fourth timeout - executes at 10ms
App.scheduler.set_timeout(id(test_sensor), "timeout4", 10, []() {
ESP_LOGD("test", "Timeout 4 executed (expected)");
id(normal_item_executed)++;
});
// Now cancel timeout2
// Since all timeouts have the same execution time, they're all in the heap
// timeout2 might not be at the front, so cleanup_() won't remove it
bool cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout2");
ESP_LOGD("test", "Cancelled timeout2: %s", cancelled ? "true" : "false");
// Also test with items at slightly different times
App.scheduler.set_timeout(id(test_sensor), "timeout5", 11, []() {
ESP_LOGD("test", "Timeout 5 executed (expected)");
id(normal_item_executed)++;
});
App.scheduler.set_timeout(id(test_sensor), "timeout6", 12, []() {
ESP_LOGE("test", "RACE: Timeout 6 executed after being cancelled!");
App.scheduler.set_timeout(id(test_sensor), "cancel_2", exec_time, []() {
ESP_LOGE("test", "RACE: Cancelled timeout 2 executed after being cancelled!");
id(removed_item_executed)++;
id(test_passed) = false;
});
App.scheduler.set_timeout(id(test_sensor), "timeout7", 13, []() {
ESP_LOGD("test", "Timeout 7 executed (expected)");
App.scheduler.set_timeout(id(test_sensor), "cancel_3", exec_time, []() {
ESP_LOGE("test", "RACE: Cancelled timeout 3 executed after being cancelled!");
id(removed_item_executed)++;
id(test_passed) = false;
});
// Add some more normal items
App.scheduler.set_timeout(id(test_sensor), "normal_1", exec_time, []() {
ESP_LOGD("test", "Normal timeout 1 executed (expected)");
id(normal_item_executed)++;
});
// Cancel timeout6
cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout6");
ESP_LOGD("test", "Cancelled timeout6: %s", cancelled ? "true" : "false");
App.scheduler.set_timeout(id(test_sensor), "normal_2", exec_time, []() {
ESP_LOGD("test", "Normal timeout 2 executed (expected)");
id(normal_item_executed)++;
});
App.scheduler.set_timeout(id(test_sensor), "normal_3", exec_time, []() {
ESP_LOGD("test", "Normal timeout 3 executed (expected)");
id(normal_item_executed)++;
});
// Force items into the heap before cancelling
App.scheduler.process_to_add();
// NOW cancel the items - they're behind "blocker" in the heap
// When cleanup_() runs, it will see "blocker" (not removed) at the front
// and stop immediately, leaving cancel_1, cancel_2, cancel_3 in the heap
bool c1 = App.scheduler.cancel_timeout(id(test_sensor), "cancel_1");
bool c2 = App.scheduler.cancel_timeout(id(test_sensor), "cancel_2");
bool c3 = App.scheduler.cancel_timeout(id(test_sensor), "cancel_3");
ESP_LOGD("test", "Cancelled items (behind blocker): %s, %s, %s",
c1 ? "true" : "false",
c2 ? "true" : "false",
c3 ? "true" : "false");
// The heap now has:
// - "blocker" at front (not cancelled)
// - cancelled items behind it (marked remove=true but still in heap)
// - When all execute at once, cleanup_() stops at "blocker"
// - The loop then executes ALL ready items including cancelled ones
ESP_LOGD("test", "Setup complete. Blocker at front prevents cleanup of cancelled items behind it");
# Wait for all timeouts to execute (or not)
- delay: 50ms
- delay: 20ms
# Check results
- lambda: |-
ESP_LOGI("test", "=== Test Results ===");
ESP_LOGI("test", "Normal items executed: %d (expected 5)", id(normal_item_executed));
ESP_LOGI("test", "Normal items executed: %d (expected 4)", id(normal_item_executed));
ESP_LOGI("test", "Removed items executed: %d (expected 0)", id(removed_item_executed));
if (id(removed_item_executed) > 0) {
ESP_LOGE("test", "TEST FAILED: %d cancelled items were executed!", id(removed_item_executed));
id(test_passed) = false;
} else if (id(normal_item_executed) != 5) {
ESP_LOGE("test", "TEST FAILED: Expected 5 normal items, got %d", id(normal_item_executed));
} else if (id(normal_item_executed) != 4) {
ESP_LOGE("test", "TEST FAILED: Expected 4 normal items, got %d", id(normal_item_executed));
id(test_passed) = false;
} else {
ESP_LOGI("test", "TEST PASSED: No cancelled items were executed");

View File

@@ -97,6 +97,6 @@ async def test_scheduler_removed_item_race(
assert removed_executed == 0, (
f"Cancelled items should not execute, but {removed_executed} did"
)
assert normal_executed == 5, (
f"Expected 5 normal items to execute, got {normal_executed}"
assert normal_executed == 4, (
f"Expected 4 normal items to execute, got {normal_executed}"
)