From cef39e7c597dd1ee3398108931a9a4d5b97278c8 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 10 Aug 2025 04:44:23 -0500 Subject: [PATCH] [esp32_ble_tracker] Fix false reboots when event loop is blocked (#10144) --- .../esp32_ble_tracker/esp32_ble_tracker.cpp | 47 ++++++++++++++++--- .../esp32_ble_tracker/esp32_ble_tracker.h | 8 ++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/esphome/components/esp32_ble_tracker/esp32_ble_tracker.cpp b/esphome/components/esp32_ble_tracker/esp32_ble_tracker.cpp index 5e97c81044..0455d136df 100644 --- a/esphome/components/esp32_ble_tracker/esp32_ble_tracker.cpp +++ b/esphome/components/esp32_ble_tracker/esp32_ble_tracker.cpp @@ -101,6 +101,38 @@ void ESP32BLETracker::loop() { this->start_scan(); } } + + // Check for scan timeout - moved here from scheduler to avoid false reboots + // when the loop is blocked + if (this->scanner_state_ == ScannerState::RUNNING) { + switch (this->scan_timeout_state_) { + case ScanTimeoutState::MONITORING: { + uint32_t now = App.get_loop_component_start_time(); + uint32_t timeout_ms = this->scan_duration_ * 2000; + // Robust time comparison that handles rollover correctly + // This works because unsigned arithmetic wraps around predictably + if ((now - this->scan_start_time_) > timeout_ms) { + // First time we've seen the timeout exceeded - wait one more loop iteration + // This ensures all components have had a chance to process pending events + // This is because esp32_ble may not have run yet and called + // gap_scan_event_handler yet when the loop unblocks + ESP_LOGW(TAG, "Scan timeout exceeded"); + this->scan_timeout_state_ = ScanTimeoutState::EXCEEDED_WAIT; + } + break; + } + case ScanTimeoutState::EXCEEDED_WAIT: + // We've waited at least one full loop iteration, and scan is still running + ESP_LOGE(TAG, "Scan never terminated, rebooting"); + App.reboot(); + break; + + case ScanTimeoutState::INACTIVE: + // This case should be unreachable - scanner and timeout states are always synchronized + break; + } + } + ClientStateCounts counts = this->count_client_states_(); if (counts != this->client_state_counts_) { this->client_state_counts_ = counts; @@ -164,7 +196,8 @@ void ESP32BLETracker::stop_scan_() { ESP_LOGE(TAG, "Cannot stop scan: %s", this->scanner_state_to_string_(this->scanner_state_)); return; } - this->cancel_timeout("scan"); + // Reset timeout state machine when stopping scan + this->scan_timeout_state_ = ScanTimeoutState::INACTIVE; this->set_scanner_state_(ScannerState::STOPPING); esp_err_t err = esp_ble_gap_stop_scanning(); if (err != ESP_OK) { @@ -197,11 +230,10 @@ void ESP32BLETracker::start_scan_(bool first) { this->scan_params_.scan_interval = this->scan_interval_; this->scan_params_.scan_window = this->scan_window_; - // Start timeout before scan is started. Otherwise scan never starts if any error. - this->set_timeout("scan", this->scan_duration_ * 2000, []() { - ESP_LOGE(TAG, "Scan never terminated, rebooting to restore stack (IDF)"); - App.reboot(); - }); + // Start timeout monitoring in loop() instead of using scheduler + // This prevents false reboots when the loop is blocked + this->scan_start_time_ = App.get_loop_component_start_time(); + this->scan_timeout_state_ = ScanTimeoutState::MONITORING; esp_err_t err = esp_ble_gap_set_scan_params(&this->scan_params_); if (err != ESP_OK) { @@ -752,7 +784,8 @@ void ESP32BLETracker::cleanup_scan_state_(bool is_stop_complete) { #ifdef USE_ESP32_BLE_DEVICE this->already_discovered_.clear(); #endif - this->cancel_timeout("scan"); + // Reset timeout state machine instead of cancelling scheduler timeout + this->scan_timeout_state_ = ScanTimeoutState::INACTIVE; for (auto *listener : this->listeners_) listener->on_scan_end(); diff --git a/esphome/components/esp32_ble_tracker/esp32_ble_tracker.h b/esphome/components/esp32_ble_tracker/esp32_ble_tracker.h index 4b09d521b6..bf99026810 100644 --- a/esphome/components/esp32_ble_tracker/esp32_ble_tracker.h +++ b/esphome/components/esp32_ble_tracker/esp32_ble_tracker.h @@ -367,6 +367,14 @@ class ESP32BLETracker : public Component, #ifdef USE_ESP32_BLE_SOFTWARE_COEXISTENCE bool coex_prefer_ble_{false}; #endif + // Scan timeout state machine + enum class ScanTimeoutState : uint8_t { + INACTIVE, // No timeout monitoring + MONITORING, // Actively monitoring for timeout + EXCEEDED_WAIT, // Timeout exceeded, waiting one loop before reboot + }; + uint32_t scan_start_time_{0}; + ScanTimeoutState scan_timeout_state_{ScanTimeoutState::INACTIVE}; }; // NOLINTNEXTLINE