mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-31 15:12:06 +00:00 
			
		
		
		
	fix
This commit is contained in:
		| @@ -409,6 +409,8 @@ void HOT Scheduler::call(uint32_t now) { | |||||||
|       // This handles two cases: |       // This handles two cases: | ||||||
|       // 1. Item was marked for removal after cleanup_() but before we got here |       // 1. Item was marked for removal after cleanup_() but before we got here | ||||||
|       // 2. Item is marked for removal but wasn't at the front of the heap during cleanup_() |       // 2. Item is marked for removal but wasn't at the front of the heap during cleanup_() | ||||||
|  |       // TEMPORARILY DISABLED TO VERIFY TEST CATCHES THE BUG | ||||||
|  |       /* | ||||||
| #ifdef ESPHOME_THREAD_MULTI_NO_ATOMICS | #ifdef ESPHOME_THREAD_MULTI_NO_ATOMICS | ||||||
|       // Multi-threaded platforms without atomics: must take lock to safely read remove flag |       // Multi-threaded platforms without atomics: must take lock to safely read remove flag | ||||||
|       { |       { | ||||||
| @@ -428,6 +430,7 @@ void HOT Scheduler::call(uint32_t now) { | |||||||
|         continue; |         continue; | ||||||
|       } |       } | ||||||
| #endif | #endif | ||||||
|  |       */ | ||||||
|  |  | ||||||
| #ifdef ESPHOME_DEBUG_SCHEDULER | #ifdef ESPHOME_DEBUG_SCHEDULER | ||||||
|       const char *item_name = item->get_name(); |       const char *item_name = item->get_name(); | ||||||
|   | |||||||
							
								
								
									
										118
									
								
								tests/integration/fixtures/scheduler_removed_item_race.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										118
									
								
								tests/integration/fixtures/scheduler_removed_item_race.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,118 @@ | |||||||
|  | esphome: | ||||||
|  |   name: scheduler-removed-item-race | ||||||
|  |  | ||||||
|  | host: | ||||||
|  |  | ||||||
|  | api: | ||||||
|  |   services: | ||||||
|  |     - service: run_test | ||||||
|  |       then: | ||||||
|  |         - script.execute: run_test_script | ||||||
|  |  | ||||||
|  | logger: | ||||||
|  |   level: DEBUG | ||||||
|  |  | ||||||
|  | globals: | ||||||
|  |   - id: test_passed | ||||||
|  |     type: bool | ||||||
|  |     initial_value: 'true' | ||||||
|  |   - id: removed_item_executed | ||||||
|  |     type: int | ||||||
|  |     initial_value: '0' | ||||||
|  |   - id: normal_item_executed | ||||||
|  |     type: int | ||||||
|  |     initial_value: '0' | ||||||
|  |  | ||||||
|  | sensor: | ||||||
|  |   - platform: template | ||||||
|  |     id: test_sensor | ||||||
|  |     name: "Test Sensor" | ||||||
|  |     update_interval: never | ||||||
|  |     lambda: return 0.0; | ||||||
|  |  | ||||||
|  | script: | ||||||
|  |   - id: run_test_script | ||||||
|  |     then: | ||||||
|  |       - logger.log: "=== Starting Removed Item Race Test ===" | ||||||
|  |  | ||||||
|  |       # This test creates a scenario where: | ||||||
|  |       # 1. Multiple timeouts are scheduled to execute at nearly the same time | ||||||
|  |       # 2. One timeout in the middle of the heap gets cancelled | ||||||
|  |       # 3. Without the fix, the cancelled timeout would still execute | ||||||
|  |  | ||||||
|  |       - lambda: |- | ||||||
|  |           // Schedule multiple timeouts that will all be ready at the same time | ||||||
|  |           // This ensures they're all in the heap together | ||||||
|  |  | ||||||
|  |           // First timeout - executes at 10ms | ||||||
|  |           App.scheduler.set_timeout(id(test_sensor), "timeout1", 10, []() { | ||||||
|  |             ESP_LOGD("test", "Timeout 1 executed (expected)"); | ||||||
|  |             id(normal_item_executed)++; | ||||||
|  |           }); | ||||||
|  |  | ||||||
|  |           // Second timeout - executes at 10ms (will be cancelled) | ||||||
|  |           App.scheduler.set_timeout(id(test_sensor), "timeout2", 10, []() { | ||||||
|  |             ESP_LOGE("test", "RACE: Timeout 2 executed after being cancelled!"); | ||||||
|  |             id(removed_item_executed)++; | ||||||
|  |             id(test_passed) = false; | ||||||
|  |           }); | ||||||
|  |  | ||||||
|  |           // Third timeout - executes at 10ms | ||||||
|  |           App.scheduler.set_timeout(id(test_sensor), "timeout3", 10, []() { | ||||||
|  |             ESP_LOGD("test", "Timeout 3 executed (expected)"); | ||||||
|  |             id(normal_item_executed)++; | ||||||
|  |           }); | ||||||
|  |  | ||||||
|  |           // Fourth timeout - executes at 10ms | ||||||
|  |           App.scheduler.set_timeout(id(test_sensor), "timeout4", 10, []() { | ||||||
|  |             ESP_LOGD("test", "Timeout 4 executed (expected)"); | ||||||
|  |             id(normal_item_executed)++; | ||||||
|  |           }); | ||||||
|  |  | ||||||
|  |           // Now cancel timeout2 | ||||||
|  |           // Since all timeouts have the same execution time, they're all in the heap | ||||||
|  |           // timeout2 might not be at the front, so cleanup_() won't remove it | ||||||
|  |           bool cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout2"); | ||||||
|  |           ESP_LOGD("test", "Cancelled timeout2: %s", cancelled ? "true" : "false"); | ||||||
|  |  | ||||||
|  |           // Also test with items at slightly different times | ||||||
|  |           App.scheduler.set_timeout(id(test_sensor), "timeout5", 11, []() { | ||||||
|  |             ESP_LOGD("test", "Timeout 5 executed (expected)"); | ||||||
|  |             id(normal_item_executed)++; | ||||||
|  |           }); | ||||||
|  |  | ||||||
|  |           App.scheduler.set_timeout(id(test_sensor), "timeout6", 12, []() { | ||||||
|  |             ESP_LOGE("test", "RACE: Timeout 6 executed after being cancelled!"); | ||||||
|  |             id(removed_item_executed)++; | ||||||
|  |             id(test_passed) = false; | ||||||
|  |           }); | ||||||
|  |  | ||||||
|  |           App.scheduler.set_timeout(id(test_sensor), "timeout7", 13, []() { | ||||||
|  |             ESP_LOGD("test", "Timeout 7 executed (expected)"); | ||||||
|  |             id(normal_item_executed)++; | ||||||
|  |           }); | ||||||
|  |  | ||||||
|  |           // Cancel timeout6 | ||||||
|  |           cancelled = App.scheduler.cancel_timeout(id(test_sensor), "timeout6"); | ||||||
|  |           ESP_LOGD("test", "Cancelled timeout6: %s", cancelled ? "true" : "false"); | ||||||
|  |  | ||||||
|  |       # Wait for all timeouts to execute (or not) | ||||||
|  |       - delay: 50ms | ||||||
|  |  | ||||||
|  |       # Check results | ||||||
|  |       - lambda: |- | ||||||
|  |           ESP_LOGI("test", "=== Test Results ==="); | ||||||
|  |           ESP_LOGI("test", "Normal items executed: %d (expected 5)", id(normal_item_executed)); | ||||||
|  |           ESP_LOGI("test", "Removed items executed: %d (expected 0)", id(removed_item_executed)); | ||||||
|  |  | ||||||
|  |           if (id(removed_item_executed) > 0) { | ||||||
|  |             ESP_LOGE("test", "TEST FAILED: %d cancelled items were executed!", id(removed_item_executed)); | ||||||
|  |             id(test_passed) = false; | ||||||
|  |           } else if (id(normal_item_executed) != 5) { | ||||||
|  |             ESP_LOGE("test", "TEST FAILED: Expected 5 normal items, got %d", id(normal_item_executed)); | ||||||
|  |             id(test_passed) = false; | ||||||
|  |           } else { | ||||||
|  |             ESP_LOGI("test", "TEST PASSED: No cancelled items were executed"); | ||||||
|  |           } | ||||||
|  |  | ||||||
|  |           ESP_LOGI("test", "=== Test Complete ==="); | ||||||
							
								
								
									
										102
									
								
								tests/integration/test_scheduler_removed_item_race.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								tests/integration/test_scheduler_removed_item_race.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,102 @@ | |||||||
|  | """Test for scheduler race condition where removed items still execute.""" | ||||||
|  |  | ||||||
|  | import asyncio | ||||||
|  | import re | ||||||
|  |  | ||||||
|  | import pytest | ||||||
|  |  | ||||||
|  | from .types import APIClientConnectedFactory, RunCompiledFunction | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @pytest.mark.asyncio | ||||||
|  | async def test_scheduler_removed_item_race( | ||||||
|  |     yaml_config: str, | ||||||
|  |     run_compiled: RunCompiledFunction, | ||||||
|  |     api_client_connected: APIClientConnectedFactory, | ||||||
|  | ) -> None: | ||||||
|  |     """Test that items marked for removal don't execute. | ||||||
|  |  | ||||||
|  |     This test verifies the fix for a race condition where: | ||||||
|  |     1. cleanup_() only removes items from the front of the heap | ||||||
|  |     2. Items in the middle of the heap marked for removal still execute | ||||||
|  |     3. This causes cancelled timeouts to run when they shouldn't | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     loop = asyncio.get_running_loop() | ||||||
|  |     test_complete_future: asyncio.Future[bool] = loop.create_future() | ||||||
|  |  | ||||||
|  |     # Track test results | ||||||
|  |     test_passed = False | ||||||
|  |     removed_executed = 0 | ||||||
|  |     normal_executed = 0 | ||||||
|  |  | ||||||
|  |     # Patterns to match | ||||||
|  |     race_pattern = re.compile(r"RACE: .* executed after being cancelled!") | ||||||
|  |     passed_pattern = re.compile(r"TEST PASSED") | ||||||
|  |     failed_pattern = re.compile(r"TEST FAILED") | ||||||
|  |     complete_pattern = re.compile(r"=== Test Complete ===") | ||||||
|  |     normal_count_pattern = re.compile(r"Normal items executed: (\d+)") | ||||||
|  |     removed_count_pattern = re.compile(r"Removed items executed: (\d+)") | ||||||
|  |  | ||||||
|  |     def check_output(line: str) -> None: | ||||||
|  |         """Check log output for test results.""" | ||||||
|  |         nonlocal test_passed, removed_executed, normal_executed | ||||||
|  |  | ||||||
|  |         if race_pattern.search(line): | ||||||
|  |             # Race condition detected - a cancelled item executed | ||||||
|  |             test_passed = False | ||||||
|  |  | ||||||
|  |         if passed_pattern.search(line): | ||||||
|  |             test_passed = True | ||||||
|  |         elif failed_pattern.search(line): | ||||||
|  |             test_passed = False | ||||||
|  |  | ||||||
|  |         normal_match = normal_count_pattern.search(line) | ||||||
|  |         if normal_match: | ||||||
|  |             normal_executed = int(normal_match.group(1)) | ||||||
|  |  | ||||||
|  |         removed_match = removed_count_pattern.search(line) | ||||||
|  |         if removed_match: | ||||||
|  |             removed_executed = int(removed_match.group(1)) | ||||||
|  |  | ||||||
|  |         if not test_complete_future.done() and complete_pattern.search(line): | ||||||
|  |             test_complete_future.set_result(True) | ||||||
|  |  | ||||||
|  |     async with ( | ||||||
|  |         run_compiled(yaml_config, line_callback=check_output), | ||||||
|  |         api_client_connected() as client, | ||||||
|  |     ): | ||||||
|  |         # Verify we can connect | ||||||
|  |         device_info = await client.device_info() | ||||||
|  |         assert device_info is not None | ||||||
|  |         assert device_info.name == "scheduler-removed-item-race" | ||||||
|  |  | ||||||
|  |         # List services | ||||||
|  |         _, services = await asyncio.wait_for( | ||||||
|  |             client.list_entities_services(), timeout=5.0 | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |         # Find run_test service | ||||||
|  |         run_test_service = next((s for s in services if s.name == "run_test"), None) | ||||||
|  |         assert run_test_service is not None, "run_test service not found" | ||||||
|  |  | ||||||
|  |         # Execute the test | ||||||
|  |         client.execute_service(run_test_service, {}) | ||||||
|  |  | ||||||
|  |         # Wait for test completion | ||||||
|  |         try: | ||||||
|  |             await asyncio.wait_for(test_complete_future, timeout=5.0) | ||||||
|  |         except TimeoutError: | ||||||
|  |             pytest.fail("Test did not complete within timeout") | ||||||
|  |  | ||||||
|  |         # Verify results | ||||||
|  |         assert test_passed, ( | ||||||
|  |             f"Test failed! Removed items executed: {removed_executed}, " | ||||||
|  |             f"Normal items executed: {normal_executed}" | ||||||
|  |         ) | ||||||
|  |         assert removed_executed == 0, ( | ||||||
|  |             f"Cancelled items should not execute, but {removed_executed} did" | ||||||
|  |         ) | ||||||
|  |         assert normal_executed == 5, ( | ||||||
|  |             f"Expected 5 normal items to execute, got {normal_executed}" | ||||||
|  |         ) | ||||||
		Reference in New Issue
	
	Block a user