mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-31 15:12:06 +00:00 
			
		
		
		
	Implement proper API connection teardown before deep sleep/reboot (#9008)
This commit is contained in:
		| @@ -275,7 +275,8 @@ DisconnectResponse APIConnection::disconnect(const DisconnectRequest &msg) { | |||||||
|   return resp; |   return resp; | ||||||
| } | } | ||||||
| void APIConnection::on_disconnect_response(const DisconnectResponse &value) { | void APIConnection::on_disconnect_response(const DisconnectResponse &value) { | ||||||
|   // pass |   this->helper_->close(); | ||||||
|  |   this->remove_ = true; | ||||||
| } | } | ||||||
|  |  | ||||||
| #ifdef USE_BINARY_SENSOR | #ifdef USE_BINARY_SENSOR | ||||||
|   | |||||||
| @@ -88,6 +88,12 @@ void APIServer::setup() { | |||||||
| #ifdef USE_LOGGER | #ifdef USE_LOGGER | ||||||
|   if (logger::global_logger != nullptr) { |   if (logger::global_logger != nullptr) { | ||||||
|     logger::global_logger->add_on_log_callback([this](int level, const char *tag, const char *message) { |     logger::global_logger->add_on_log_callback([this](int level, const char *tag, const char *message) { | ||||||
|  |       if (this->shutting_down_) { | ||||||
|  |         // Don't try to send logs during shutdown | ||||||
|  |         // as it could result in a recursion and | ||||||
|  |         // we would be filling a buffer we are trying to clear | ||||||
|  |         return; | ||||||
|  |       } | ||||||
|       for (auto &c : this->clients_) { |       for (auto &c : this->clients_) { | ||||||
|         if (!c->remove_) |         if (!c->remove_) | ||||||
|           c->try_send_log_message(level, tag, message); |           c->try_send_log_message(level, tag, message); | ||||||
| @@ -112,8 +118,8 @@ void APIServer::setup() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void APIServer::loop() { | void APIServer::loop() { | ||||||
|   // Accept new clients only if the socket has incoming connections |   // Accept new clients only if the socket exists and has incoming connections | ||||||
|   if (this->socket_->ready()) { |   if (this->socket_ && this->socket_->ready()) { | ||||||
|     while (true) { |     while (true) { | ||||||
|       struct sockaddr_storage source_addr; |       struct sockaddr_storage source_addr; | ||||||
|       socklen_t addr_len = sizeof(source_addr); |       socklen_t addr_len = sizeof(source_addr); | ||||||
| @@ -474,10 +480,32 @@ void APIServer::request_time() { | |||||||
| bool APIServer::is_connected() const { return !this->clients_.empty(); } | bool APIServer::is_connected() const { return !this->clients_.empty(); } | ||||||
|  |  | ||||||
| void APIServer::on_shutdown() { | void APIServer::on_shutdown() { | ||||||
|   for (auto &c : this->clients_) { |   this->shutting_down_ = true; | ||||||
|     c->send_disconnect_request(DisconnectRequest()); |  | ||||||
|  |   // Close the listening socket to prevent new connections | ||||||
|  |   if (this->socket_) { | ||||||
|  |     this->socket_->close(); | ||||||
|  |     this->socket_ = nullptr; | ||||||
|   } |   } | ||||||
|   delay(10); |  | ||||||
|  |   // Send disconnect requests to all connected clients | ||||||
|  |   for (auto &c : this->clients_) { | ||||||
|  |     if (!c->send_disconnect_request(DisconnectRequest())) { | ||||||
|  |       // If we can't send the disconnect request, mark for immediate closure | ||||||
|  |       c->next_close_ = true; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool APIServer::teardown() { | ||||||
|  |   // If network is disconnected, no point trying to flush buffers | ||||||
|  |   if (!network::is_connected()) { | ||||||
|  |     return true; | ||||||
|  |   } | ||||||
|  |   this->loop(); | ||||||
|  |  | ||||||
|  |   // Return true only when all clients have been torn down | ||||||
|  |   return this->clients_.empty(); | ||||||
| } | } | ||||||
|  |  | ||||||
| }  // namespace api | }  // namespace api | ||||||
|   | |||||||
| @@ -34,6 +34,7 @@ class APIServer : public Component, public Controller { | |||||||
|   void loop() override; |   void loop() override; | ||||||
|   void dump_config() override; |   void dump_config() override; | ||||||
|   void on_shutdown() override; |   void on_shutdown() override; | ||||||
|  |   bool teardown() override; | ||||||
|   bool check_password(const std::string &password) const; |   bool check_password(const std::string &password) const; | ||||||
|   bool uses_password() const; |   bool uses_password() const; | ||||||
|   void set_port(uint16_t port); |   void set_port(uint16_t port); | ||||||
| @@ -136,6 +137,7 @@ class APIServer : public Component, public Controller { | |||||||
|   } |   } | ||||||
|  |  | ||||||
|  protected: |  protected: | ||||||
|  |   bool shutting_down_ = false; | ||||||
|   std::unique_ptr<socket::Socket> socket_ = nullptr; |   std::unique_ptr<socket::Socket> socket_ = nullptr; | ||||||
|   uint16_t port_{6053}; |   uint16_t port_{6053}; | ||||||
|   uint32_t reboot_timeout_{300000}; |   uint32_t reboot_timeout_{300000}; | ||||||
|   | |||||||
| @@ -6,6 +6,8 @@ namespace esphome { | |||||||
| namespace deep_sleep { | namespace deep_sleep { | ||||||
|  |  | ||||||
| static const char *const TAG = "deep_sleep"; | static const char *const TAG = "deep_sleep"; | ||||||
|  | // 5 seconds for deep sleep to ensure clean disconnect from Home Assistant | ||||||
|  | static const uint32_t TEARDOWN_TIMEOUT_DEEP_SLEEP_MS = 5000; | ||||||
|  |  | ||||||
| bool global_has_deep_sleep = false;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) | bool global_has_deep_sleep = false;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) | ||||||
|  |  | ||||||
| @@ -62,6 +64,9 @@ void DeepSleepComponent::begin_sleep(bool manual) { | |||||||
|     ESP_LOGI(TAG, "Sleeping for %" PRId64 "us", *this->sleep_duration_); |     ESP_LOGI(TAG, "Sleeping for %" PRId64 "us", *this->sleep_duration_); | ||||||
|   } |   } | ||||||
|   App.run_safe_shutdown_hooks(); |   App.run_safe_shutdown_hooks(); | ||||||
|  |   // It's critical to teardown components cleanly for deep sleep to ensure | ||||||
|  |   // Home Assistant sees a clean disconnect instead of marking the device unavailable | ||||||
|  |   App.teardown_components(TEARDOWN_TIMEOUT_DEEP_SLEEP_MS); | ||||||
|  |  | ||||||
|   this->deep_sleep_(); |   this->deep_sleep_(); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -43,7 +43,6 @@ class BLEServer : public Component, | |||||||
|   float get_setup_priority() const override; |   float get_setup_priority() const override; | ||||||
|   bool can_proceed() override; |   bool can_proceed() override; | ||||||
|  |  | ||||||
|   void teardown(); |  | ||||||
|   bool is_running(); |   bool is_running(); | ||||||
|  |  | ||||||
|   void set_manufacturer_data(const std::vector<uint8_t> &data) { |   void set_manufacturer_data(const std::vector<uint8_t> &data) { | ||||||
|   | |||||||
| @@ -126,63 +126,7 @@ void Application::loop() { | |||||||
|     next_schedule = std::max(next_schedule, delay_time / 2); |     next_schedule = std::max(next_schedule, delay_time / 2); | ||||||
|     delay_time = std::min(next_schedule, delay_time); |     delay_time = std::min(next_schedule, delay_time); | ||||||
|  |  | ||||||
| #ifdef USE_SOCKET_SELECT_SUPPORT |     this->delay_with_select_(delay_time); | ||||||
|     if (!this->socket_fds_.empty()) { |  | ||||||
|       // Use select() with timeout when we have sockets to monitor |  | ||||||
|  |  | ||||||
|       // Update fd_set if socket list has changed |  | ||||||
|       if (this->socket_fds_changed_) { |  | ||||||
|         FD_ZERO(&this->base_read_fds_); |  | ||||||
|         for (int fd : this->socket_fds_) { |  | ||||||
|           if (fd >= 0 && fd < FD_SETSIZE) { |  | ||||||
|             FD_SET(fd, &this->base_read_fds_); |  | ||||||
|           } |  | ||||||
|         } |  | ||||||
|         this->socket_fds_changed_ = false; |  | ||||||
|       } |  | ||||||
|  |  | ||||||
|       // Copy base fd_set before each select |  | ||||||
|       this->read_fds_ = this->base_read_fds_; |  | ||||||
|  |  | ||||||
|       // Convert delay_time (milliseconds) to timeval |  | ||||||
|       struct timeval tv; |  | ||||||
|       tv.tv_sec = delay_time / 1000; |  | ||||||
|       tv.tv_usec = (delay_time - tv.tv_sec * 1000) * 1000; |  | ||||||
|  |  | ||||||
|       // Call select with timeout |  | ||||||
| #if defined(USE_SOCKET_IMPL_LWIP_SOCKETS) || (defined(USE_ESP32) && defined(USE_SOCKET_IMPL_BSD_SOCKETS)) |  | ||||||
|       // Use lwip_select() on platforms with lwIP - it's faster |  | ||||||
|       // Note: On ESP32 with BSD sockets, select() is already mapped to lwip_select() via macros, |  | ||||||
|       // but we explicitly call lwip_select() for clarity and to ensure we get the optimized version |  | ||||||
|       int ret = lwip_select(this->max_fd_ + 1, &this->read_fds_, nullptr, nullptr, &tv); |  | ||||||
| #else |  | ||||||
|       // Use standard select() on other platforms (e.g., host/native builds) |  | ||||||
|       int ret = ::select(this->max_fd_ + 1, &this->read_fds_, nullptr, nullptr, &tv); |  | ||||||
| #endif |  | ||||||
|  |  | ||||||
|       // Process select() result: |  | ||||||
|       // ret < 0: error (except EINTR which is normal) |  | ||||||
|       // ret > 0: socket(s) have data ready - normal and expected |  | ||||||
|       // ret == 0: timeout occurred - normal and expected |  | ||||||
|       if (ret < 0) { |  | ||||||
|         if (errno == EINTR) { |  | ||||||
|           // Interrupted by signal - this is normal, just continue |  | ||||||
|           // No need to delay as some time has already passed |  | ||||||
|           ESP_LOGVV(TAG, "select() interrupted by signal"); |  | ||||||
|         } else { |  | ||||||
|           // Actual error - log and fall back to delay |  | ||||||
|           ESP_LOGW(TAG, "select() failed with errno %d", errno); |  | ||||||
|           delay(delay_time); |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|     } else { |  | ||||||
|       // No sockets registered, use regular delay |  | ||||||
|       delay(delay_time); |  | ||||||
|     } |  | ||||||
| #else |  | ||||||
|     // No select support, use regular delay |  | ||||||
|     delay(delay_time); |  | ||||||
| #endif |  | ||||||
|   } |   } | ||||||
|   this->last_loop_ = last_op_end_time; |   this->last_loop_ = last_op_end_time; | ||||||
|  |  | ||||||
| @@ -224,6 +168,7 @@ void Application::reboot() { | |||||||
| void Application::safe_reboot() { | void Application::safe_reboot() { | ||||||
|   ESP_LOGI(TAG, "Rebooting safely"); |   ESP_LOGI(TAG, "Rebooting safely"); | ||||||
|   run_safe_shutdown_hooks(); |   run_safe_shutdown_hooks(); | ||||||
|  |   teardown_components(TEARDOWN_TIMEOUT_REBOOT_MS); | ||||||
|   arch_restart(); |   arch_restart(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -236,6 +181,49 @@ void Application::run_safe_shutdown_hooks() { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void Application::teardown_components(uint32_t timeout_ms) { | ||||||
|  |   uint32_t start_time = millis(); | ||||||
|  |  | ||||||
|  |   // Copy all components in reverse order using reverse iterators | ||||||
|  |   // Reverse order matches the behavior of run_safe_shutdown_hooks() above and ensures | ||||||
|  |   // components are torn down in the opposite order of their setup_priority (which is | ||||||
|  |   // used to sort components during Application::setup()) | ||||||
|  |   std::vector<Component *> pending_components(this->components_.rbegin(), this->components_.rend()); | ||||||
|  |  | ||||||
|  |   uint32_t now = start_time; | ||||||
|  |   while (!pending_components.empty() && (now - start_time) < timeout_ms) { | ||||||
|  |     // Feed watchdog during teardown to prevent triggering | ||||||
|  |     this->feed_wdt(now); | ||||||
|  |  | ||||||
|  |     // Use iterator to safely erase elements | ||||||
|  |     for (auto it = pending_components.begin(); it != pending_components.end();) { | ||||||
|  |       if ((*it)->teardown()) { | ||||||
|  |         // Component finished teardown, erase it | ||||||
|  |         it = pending_components.erase(it); | ||||||
|  |       } else { | ||||||
|  |         // Component still needs time | ||||||
|  |         ++it; | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Give some time for I/O operations if components are still pending | ||||||
|  |     if (!pending_components.empty()) { | ||||||
|  |       this->delay_with_select_(1); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Update time for next iteration | ||||||
|  |     now = millis(); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if (!pending_components.empty()) { | ||||||
|  |     // Note: At this point, connections are either disconnected or in a bad state, | ||||||
|  |     // so this warning will only appear via serial rather than being transmitted to clients | ||||||
|  |     for (auto *component : pending_components) { | ||||||
|  |       ESP_LOGW(TAG, "%s did not complete teardown within %u ms", component->get_component_source(), timeout_ms); | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| void Application::calculate_looping_components_() { | void Application::calculate_looping_components_() { | ||||||
|   for (auto *obj : this->components_) { |   for (auto *obj : this->components_) { | ||||||
|     if (obj->has_overridden_loop()) |     if (obj->has_overridden_loop()) | ||||||
| @@ -304,6 +292,54 @@ bool Application::is_socket_ready(int fd) const { | |||||||
| } | } | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  | void Application::delay_with_select_(uint32_t delay_ms) { | ||||||
|  | #ifdef USE_SOCKET_SELECT_SUPPORT | ||||||
|  |   if (!this->socket_fds_.empty()) { | ||||||
|  |     // Update fd_set if socket list has changed | ||||||
|  |     if (this->socket_fds_changed_) { | ||||||
|  |       FD_ZERO(&this->base_read_fds_); | ||||||
|  |       for (int fd : this->socket_fds_) { | ||||||
|  |         if (fd >= 0 && fd < FD_SETSIZE) { | ||||||
|  |           FD_SET(fd, &this->base_read_fds_); | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |       this->socket_fds_changed_ = false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Copy base fd_set before each select | ||||||
|  |     this->read_fds_ = this->base_read_fds_; | ||||||
|  |  | ||||||
|  |     // Convert delay_ms to timeval | ||||||
|  |     struct timeval tv; | ||||||
|  |     tv.tv_sec = delay_ms / 1000; | ||||||
|  |     tv.tv_usec = (delay_ms - tv.tv_sec * 1000) * 1000; | ||||||
|  |  | ||||||
|  |     // Call select with timeout | ||||||
|  | #if defined(USE_SOCKET_IMPL_LWIP_SOCKETS) || (defined(USE_ESP32) && defined(USE_SOCKET_IMPL_BSD_SOCKETS)) | ||||||
|  |     int ret = lwip_select(this->max_fd_ + 1, &this->read_fds_, nullptr, nullptr, &tv); | ||||||
|  | #else | ||||||
|  |     int ret = ::select(this->max_fd_ + 1, &this->read_fds_, nullptr, nullptr, &tv); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |     // Process select() result: | ||||||
|  |     // ret < 0: error (except EINTR which is normal) | ||||||
|  |     // ret > 0: socket(s) have data ready - normal and expected | ||||||
|  |     // ret == 0: timeout occurred - normal and expected | ||||||
|  |     if (ret < 0 && errno != EINTR) { | ||||||
|  |       // Actual error - log and fall back to delay | ||||||
|  |       ESP_LOGW(TAG, "select() failed with errno %d", errno); | ||||||
|  |       delay(delay_ms); | ||||||
|  |     } | ||||||
|  |   } else { | ||||||
|  |     // No sockets registered, use regular delay | ||||||
|  |     delay(delay_ms); | ||||||
|  |   } | ||||||
|  | #else | ||||||
|  |   // No select support, use regular delay | ||||||
|  |   delay(delay_ms); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  |  | ||||||
| Application App;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) | Application App;  // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) | ||||||
|  |  | ||||||
| }  // namespace esphome | }  // namespace esphome | ||||||
|   | |||||||
| @@ -79,6 +79,12 @@ | |||||||
|  |  | ||||||
| namespace esphome { | namespace esphome { | ||||||
|  |  | ||||||
|  | // Teardown timeout constant (in milliseconds) | ||||||
|  | // For reboots, it's more important to shut down quickly than disconnect cleanly | ||||||
|  | // since we're not entering deep sleep. The only consequence of not shutting down | ||||||
|  | // cleanly is a warning in the log. | ||||||
|  | static const uint32_t TEARDOWN_TIMEOUT_REBOOT_MS = 1000;  // 1 second for quick reboot | ||||||
|  |  | ||||||
| class Application { | class Application { | ||||||
|  public: |  public: | ||||||
|   void pre_setup(const std::string &name, const std::string &friendly_name, const std::string &area, |   void pre_setup(const std::string &name, const std::string &friendly_name, const std::string &area, | ||||||
| @@ -251,6 +257,12 @@ class Application { | |||||||
|  |  | ||||||
|   void run_safe_shutdown_hooks(); |   void run_safe_shutdown_hooks(); | ||||||
|  |  | ||||||
|  |   /** Teardown all components with a timeout. | ||||||
|  |    * | ||||||
|  |    * @param timeout_ms Maximum time to wait for teardown in milliseconds | ||||||
|  |    */ | ||||||
|  |   void teardown_components(uint32_t timeout_ms); | ||||||
|  |  | ||||||
|   uint32_t get_app_state() const { return this->app_state_; } |   uint32_t get_app_state() const { return this->app_state_; } | ||||||
|  |  | ||||||
| #ifdef USE_BINARY_SENSOR | #ifdef USE_BINARY_SENSOR | ||||||
| @@ -493,6 +505,9 @@ class Application { | |||||||
|  |  | ||||||
|   void feed_wdt_arch_(); |   void feed_wdt_arch_(); | ||||||
|  |  | ||||||
|  |   /// Perform a delay while also monitoring socket file descriptors for readiness | ||||||
|  |   void delay_with_select_(uint32_t delay_ms); | ||||||
|  |  | ||||||
|   std::vector<Component *> components_{}; |   std::vector<Component *> components_{}; | ||||||
|   std::vector<Component *> looping_components_{}; |   std::vector<Component *> looping_components_{}; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -110,6 +110,12 @@ class Component { | |||||||
|   virtual void on_shutdown() {} |   virtual void on_shutdown() {} | ||||||
|   virtual void on_safe_shutdown() {} |   virtual void on_safe_shutdown() {} | ||||||
|  |  | ||||||
|  |   /** Called during teardown to allow component to gracefully finish operations. | ||||||
|  |    * | ||||||
|  |    * @return true if teardown is complete, false if more time is needed | ||||||
|  |    */ | ||||||
|  |   virtual bool teardown() { return true; } | ||||||
|  |  | ||||||
|   uint32_t get_component_state() const; |   uint32_t get_component_state() const; | ||||||
|  |  | ||||||
|   /** Mark this component as failed. Any future timeouts/intervals/setup/loop will no longer be called. |   /** Mark this component as failed. Any future timeouts/intervals/setup/loop will no longer be called. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user