From a859ecaad1cb64d7648c5293771c93109f58c2c5 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 12 Nov 2025 11:56:19 -0600 Subject: [PATCH] [core] Fix wait_until hanging when used in on_boot automations (#11869) --- esphome/core/base_automation.h | 7 +- .../fixtures/wait_until_on_boot.yaml | 47 ++++++++++ tests/integration/test_wait_until_on_boot.py | 91 +++++++++++++++++++ 3 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 tests/integration/fixtures/wait_until_on_boot.yaml create mode 100644 tests/integration/test_wait_until_on_boot.py diff --git a/esphome/core/base_automation.h b/esphome/core/base_automation.h index 6f392c8959..a5e6139182 100644 --- a/esphome/core/base_automation.h +++ b/esphome/core/base_automation.h @@ -412,7 +412,12 @@ template class WaitUntilAction : public Action, public Co void setup() override { // Start with loop disabled - only enable when there's work to do - this->disable_loop(); + // IMPORTANT: Only disable if num_running_ is 0, otherwise play_complex() was already + // called before our setup() (e.g., from on_boot trigger at same priority level) + // and we must not undo its enable_loop() call + if (this->num_running_ == 0) { + this->disable_loop(); + } } void play_complex(const Ts &...x) override { diff --git a/tests/integration/fixtures/wait_until_on_boot.yaml b/tests/integration/fixtures/wait_until_on_boot.yaml new file mode 100644 index 0000000000..358bef971b --- /dev/null +++ b/tests/integration/fixtures/wait_until_on_boot.yaml @@ -0,0 +1,47 @@ +# Test for wait_until in on_boot automation +# Reproduces bug where wait_until in on_boot would hang forever +# because WaitUntilAction::setup() would disable_loop() after +# play_complex() had already enabled it. + +esphome: + name: wait-until-on-boot + on_boot: + then: + - logger.log: "on_boot: Starting wait_until test" + - globals.set: + id: on_boot_started + value: 'true' + - wait_until: + condition: + lambda: return id(test_flag); + timeout: 5s + - logger.log: "on_boot: wait_until completed successfully" + +host: + +logger: + level: DEBUG + +globals: + - id: on_boot_started + type: bool + initial_value: 'false' + - id: test_flag + type: bool + initial_value: 'false' + +api: + actions: + - action: set_test_flag + then: + - globals.set: + id: test_flag + value: 'true' + - action: check_on_boot_started + then: + - lambda: |- + if (id(on_boot_started)) { + ESP_LOGI("test", "on_boot has started"); + } else { + ESP_LOGI("test", "on_boot has NOT started"); + } diff --git a/tests/integration/test_wait_until_on_boot.py b/tests/integration/test_wait_until_on_boot.py new file mode 100644 index 0000000000..b42c530c54 --- /dev/null +++ b/tests/integration/test_wait_until_on_boot.py @@ -0,0 +1,91 @@ +"""Integration test for wait_until in on_boot automation. + +This test validates that wait_until works correctly when triggered from on_boot, +which runs at the same setup priority as WaitUntilAction itself. This was broken +before the fix because WaitUntilAction::setup() would unconditionally disable_loop(), +even if play_complex() had already been called and enabled the loop. + +The bug: on_boot fires during StartupTrigger::setup(), which calls WaitUntilAction::play_complex() +before WaitUntilAction::setup() has run. Then when WaitUntilAction::setup() runs, it calls +disable_loop(), undoing the enable_loop() from play_complex(), causing wait_until to hang forever. +""" + +from __future__ import annotations + +import asyncio +import re + +import pytest + +from .types import APIClientConnectedFactory, RunCompiledFunction + + +@pytest.mark.asyncio +async def test_wait_until_on_boot( + yaml_config: str, + run_compiled: RunCompiledFunction, + api_client_connected: APIClientConnectedFactory, +) -> None: + """Test that wait_until works in on_boot automation with a condition that becomes true later.""" + loop = asyncio.get_running_loop() + + on_boot_started = False + on_boot_completed = False + + on_boot_started_pattern = re.compile(r"on_boot: Starting wait_until test") + on_boot_complete_pattern = re.compile(r"on_boot: wait_until completed successfully") + + on_boot_started_future = loop.create_future() + on_boot_complete_future = loop.create_future() + + def check_output(line: str) -> None: + """Check log output for test progress.""" + nonlocal on_boot_started, on_boot_completed + + if on_boot_started_pattern.search(line): + on_boot_started = True + if not on_boot_started_future.done(): + on_boot_started_future.set_result(True) + + if on_boot_complete_pattern.search(line): + on_boot_completed = True + if not on_boot_complete_future.done(): + on_boot_complete_future.set_result(True) + + async with ( + run_compiled(yaml_config, line_callback=check_output), + api_client_connected() as client, + ): + # Wait for on_boot to start + await asyncio.wait_for(on_boot_started_future, timeout=10.0) + assert on_boot_started, "on_boot did not start" + + # At this point, on_boot is blocked in wait_until waiting for test_flag to become true + # If the bug exists, wait_until's loop is disabled and it will never complete + # even after we set the flag + + # Give a moment for setup to complete + await asyncio.sleep(0.5) + + # Now set the flag that wait_until is waiting for + _, services = await client.list_entities_services() + set_flag_service = next( + (s for s in services if s.name == "set_test_flag"), None + ) + assert set_flag_service is not None, "set_test_flag service not found" + + client.execute_service(set_flag_service, {}) + + # If the fix works, wait_until's loop() will check the condition and proceed + # If the bug exists, wait_until is stuck with disabled loop and will timeout + try: + await asyncio.wait_for(on_boot_complete_future, timeout=2.0) + assert on_boot_completed, ( + "on_boot wait_until did not complete after flag was set" + ) + except TimeoutError: + pytest.fail( + "wait_until in on_boot did not complete within 2s after condition became true. " + "This indicates the bug where WaitUntilAction::setup() disables the loop " + "after play_complex() has already enabled it." + )