mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-30 22:53:59 +00:00 
			
		
		
		
	[esphome] Fix OTA watchdog resets during port scanning and network delays (#10152)
This commit is contained in:
		| @@ -19,7 +19,9 @@ | ||||
| namespace esphome { | ||||
|  | ||||
| static const char *const TAG = "esphome.ota"; | ||||
| static constexpr u_int16_t OTA_BLOCK_SIZE = 8192; | ||||
| static constexpr uint16_t OTA_BLOCK_SIZE = 8192; | ||||
| static constexpr uint32_t OTA_SOCKET_TIMEOUT_HANDSHAKE = 10000;  // milliseconds for initial handshake | ||||
| static constexpr uint32_t OTA_SOCKET_TIMEOUT_DATA = 90000;       // milliseconds for data transfer | ||||
|  | ||||
| void ESPHomeOTAComponent::setup() { | ||||
| #ifdef USE_OTA_STATE_CALLBACK | ||||
| @@ -28,19 +30,19 @@ void ESPHomeOTAComponent::setup() { | ||||
|  | ||||
|   this->server_ = socket::socket_ip_loop_monitored(SOCK_STREAM, 0);  // monitored for incoming connections | ||||
|   if (this->server_ == nullptr) { | ||||
|     ESP_LOGW(TAG, "Could not create socket"); | ||||
|     this->log_socket_error_("creation"); | ||||
|     this->mark_failed(); | ||||
|     return; | ||||
|   } | ||||
|   int enable = 1; | ||||
|   int err = this->server_->setsockopt(SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); | ||||
|   if (err != 0) { | ||||
|     ESP_LOGW(TAG, "Socket unable to set reuseaddr: errno %d", err); | ||||
|     this->log_socket_error_("reuseaddr"); | ||||
|     // we can still continue | ||||
|   } | ||||
|   err = this->server_->setblocking(false); | ||||
|   if (err != 0) { | ||||
|     ESP_LOGW(TAG, "Socket unable to set nonblocking mode: errno %d", err); | ||||
|     this->log_socket_error_("non-blocking"); | ||||
|     this->mark_failed(); | ||||
|     return; | ||||
|   } | ||||
| @@ -49,21 +51,21 @@ void ESPHomeOTAComponent::setup() { | ||||
|  | ||||
|   socklen_t sl = socket::set_sockaddr_any((struct sockaddr *) &server, sizeof(server), this->port_); | ||||
|   if (sl == 0) { | ||||
|     ESP_LOGW(TAG, "Socket unable to set sockaddr: errno %d", errno); | ||||
|     this->log_socket_error_("set sockaddr"); | ||||
|     this->mark_failed(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   err = this->server_->bind((struct sockaddr *) &server, sizeof(server)); | ||||
|   if (err != 0) { | ||||
|     ESP_LOGW(TAG, "Socket unable to bind: errno %d", errno); | ||||
|     this->log_socket_error_("bind"); | ||||
|     this->mark_failed(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   err = this->server_->listen(4); | ||||
|   if (err != 0) { | ||||
|     ESP_LOGW(TAG, "Socket unable to listen: errno %d", errno); | ||||
|     this->log_socket_error_("listen"); | ||||
|     this->mark_failed(); | ||||
|     return; | ||||
|   } | ||||
| @@ -83,17 +85,93 @@ void ESPHomeOTAComponent::dump_config() { | ||||
| } | ||||
|  | ||||
| void ESPHomeOTAComponent::loop() { | ||||
|   // Skip handle_() call if no client connected and no incoming connections | ||||
|   // Skip handle_handshake_() call if no client connected and no incoming connections | ||||
|   // This optimization reduces idle loop overhead when OTA is not active | ||||
|   // Note: No need to check server_ for null as the component is marked failed in setup() if server_ creation fails | ||||
|   // Note: No need to check server_ for null as the component is marked failed in setup() | ||||
|   // if server_ creation fails | ||||
|   if (this->client_ != nullptr || this->server_->ready()) { | ||||
|     this->handle_(); | ||||
|     this->handle_handshake_(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| static const uint8_t FEATURE_SUPPORTS_COMPRESSION = 0x01; | ||||
|  | ||||
| void ESPHomeOTAComponent::handle_() { | ||||
| void ESPHomeOTAComponent::handle_handshake_() { | ||||
|   /// Handle the initial OTA handshake. | ||||
|   /// | ||||
|   /// This method is non-blocking and will return immediately if no data is available. | ||||
|   /// It waits for the first magic byte (0x6C) before proceeding to handle_data_(). | ||||
|   /// A 10-second timeout is enforced from initial connection. | ||||
|  | ||||
|   if (this->client_ == nullptr) { | ||||
|     // We already checked server_->ready() in loop(), so we can accept directly | ||||
|     struct sockaddr_storage source_addr; | ||||
|     socklen_t addr_len = sizeof(source_addr); | ||||
|     int enable = 1; | ||||
|  | ||||
|     this->client_ = this->server_->accept_loop_monitored((struct sockaddr *) &source_addr, &addr_len); | ||||
|     if (this->client_ == nullptr) | ||||
|       return; | ||||
|     int err = this->client_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &enable, sizeof(int)); | ||||
|     if (err != 0) { | ||||
|       this->log_socket_error_("nodelay"); | ||||
|       this->cleanup_connection_(); | ||||
|       return; | ||||
|     } | ||||
|     err = this->client_->setblocking(false); | ||||
|     if (err != 0) { | ||||
|       this->log_socket_error_("non-blocking"); | ||||
|       this->cleanup_connection_(); | ||||
|       return; | ||||
|     } | ||||
|     this->log_start_("handshake"); | ||||
|     this->client_connect_time_ = App.get_loop_component_start_time(); | ||||
|   } | ||||
|  | ||||
|   // Check for handshake timeout | ||||
|   uint32_t now = App.get_loop_component_start_time(); | ||||
|   if (now - this->client_connect_time_ > OTA_SOCKET_TIMEOUT_HANDSHAKE) { | ||||
|     ESP_LOGW(TAG, "Handshake timeout"); | ||||
|     this->cleanup_connection_(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   // Try to read first byte of magic bytes | ||||
|   uint8_t first_byte; | ||||
|   ssize_t read = this->client_->read(&first_byte, 1); | ||||
|  | ||||
|   if (read == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { | ||||
|     return;  // No data yet, try again next loop | ||||
|   } | ||||
|  | ||||
|   if (read <= 0) { | ||||
|     // Error or connection closed | ||||
|     if (read == -1) { | ||||
|       this->log_socket_error_("reading first byte"); | ||||
|     } else { | ||||
|       ESP_LOGW(TAG, "Remote closed during handshake"); | ||||
|     } | ||||
|     this->cleanup_connection_(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   // Got first byte, check if it's the magic byte | ||||
|   if (first_byte != 0x6C) { | ||||
|     ESP_LOGW(TAG, "Invalid initial byte: 0x%02X", first_byte); | ||||
|     this->cleanup_connection_(); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   // First byte is valid, continue with data handling | ||||
|   this->handle_data_(); | ||||
| } | ||||
|  | ||||
| void ESPHomeOTAComponent::handle_data_() { | ||||
|   /// Handle the OTA data transfer and update process. | ||||
|   /// | ||||
|   /// This method is blocking and will not return until the OTA update completes, | ||||
|   /// fails, or times out. It handles authentication, receives the firmware data, | ||||
|   /// writes it to flash, and reboots on success. | ||||
|   ota::OTAResponseTypes error_code = ota::OTA_RESPONSE_ERROR_UNKNOWN; | ||||
|   bool update_started = false; | ||||
|   size_t total = 0; | ||||
| @@ -108,38 +186,14 @@ void ESPHomeOTAComponent::handle_() { | ||||
|   size_t size_acknowledged = 0; | ||||
| #endif | ||||
|  | ||||
|   if (this->client_ == nullptr) { | ||||
|     // We already checked server_->ready() in loop(), so we can accept directly | ||||
|     struct sockaddr_storage source_addr; | ||||
|     socklen_t addr_len = sizeof(source_addr); | ||||
|     this->client_ = this->server_->accept((struct sockaddr *) &source_addr, &addr_len); | ||||
|     if (this->client_ == nullptr) | ||||
|       return; | ||||
|   } | ||||
|  | ||||
|   int enable = 1; | ||||
|   int err = this->client_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &enable, sizeof(int)); | ||||
|   if (err != 0) { | ||||
|     ESP_LOGW(TAG, "Socket could not enable TCP nodelay, errno %d", errno); | ||||
|     this->client_->close(); | ||||
|     this->client_ = nullptr; | ||||
|     return; | ||||
|   } | ||||
|  | ||||
|   ESP_LOGD(TAG, "Starting update from %s", this->client_->getpeername().c_str()); | ||||
|   this->status_set_warning(); | ||||
| #ifdef USE_OTA_STATE_CALLBACK | ||||
|   this->state_callback_.call(ota::OTA_STARTED, 0.0f, 0); | ||||
| #endif | ||||
|  | ||||
|   if (!this->readall_(buf, 5)) { | ||||
|     ESP_LOGW(TAG, "Reading magic bytes failed"); | ||||
|   // Read remaining 4 bytes of magic (we already read the first byte 0x6C in handle_handshake_) | ||||
|   if (!this->readall_(buf, 4)) { | ||||
|     this->log_read_error_("magic bytes"); | ||||
|     goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|   } | ||||
|   // 0x6C, 0x26, 0xF7, 0x5C, 0x45 | ||||
|   if (buf[0] != 0x6C || buf[1] != 0x26 || buf[2] != 0xF7 || buf[3] != 0x5C || buf[4] != 0x45) { | ||||
|     ESP_LOGW(TAG, "Magic bytes do not match! 0x%02X-0x%02X-0x%02X-0x%02X-0x%02X", buf[0], buf[1], buf[2], buf[3], | ||||
|              buf[4]); | ||||
|   // Check remaining magic bytes: 0x26, 0xF7, 0x5C, 0x45 | ||||
|   if (buf[0] != 0x26 || buf[1] != 0xF7 || buf[2] != 0x5C || buf[3] != 0x45) { | ||||
|     ESP_LOGW(TAG, "Magic bytes mismatch! 0x6C-0x%02X-0x%02X-0x%02X-0x%02X", buf[0], buf[1], buf[2], buf[3]); | ||||
|     error_code = ota::OTA_RESPONSE_ERROR_MAGIC; | ||||
|     goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|   } | ||||
| @@ -153,7 +207,7 @@ void ESPHomeOTAComponent::handle_() { | ||||
|  | ||||
|   // Read features - 1 byte | ||||
|   if (!this->readall_(buf, 1)) { | ||||
|     ESP_LOGW(TAG, "Reading features failed"); | ||||
|     this->log_read_error_("features"); | ||||
|     goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|   } | ||||
|   ota_features = buf[0];  // NOLINT | ||||
| @@ -232,7 +286,7 @@ void ESPHomeOTAComponent::handle_() { | ||||
|  | ||||
|   // Read size, 4 bytes MSB first | ||||
|   if (!this->readall_(buf, 4)) { | ||||
|     ESP_LOGW(TAG, "Reading size failed"); | ||||
|     this->log_read_error_("size"); | ||||
|     goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|   } | ||||
|   ota_size = 0; | ||||
| @@ -242,6 +296,17 @@ void ESPHomeOTAComponent::handle_() { | ||||
|   } | ||||
|   ESP_LOGV(TAG, "Size is %u bytes", ota_size); | ||||
|  | ||||
|   // Now that we've passed authentication and are actually | ||||
|   // starting the update, set the warning status and notify | ||||
|   // listeners. This ensures that port scanners do not | ||||
|   // accidentally trigger the update process. | ||||
|   this->log_start_("update"); | ||||
|   this->status_set_warning(); | ||||
| #ifdef USE_OTA_STATE_CALLBACK | ||||
|   this->state_callback_.call(ota::OTA_STARTED, 0.0f, 0); | ||||
| #endif | ||||
|  | ||||
|   // This will block for a few seconds as it locks flash | ||||
|   error_code = backend->begin(ota_size); | ||||
|   if (error_code != ota::OTA_RESPONSE_OK) | ||||
|     goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
| @@ -253,7 +318,7 @@ void ESPHomeOTAComponent::handle_() { | ||||
|  | ||||
|   // Read binary MD5, 32 bytes | ||||
|   if (!this->readall_(buf, 32)) { | ||||
|     ESP_LOGW(TAG, "Reading binary MD5 checksum failed"); | ||||
|     this->log_read_error_("MD5 checksum"); | ||||
|     goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|   } | ||||
|   sbuf[32] = '\0'; | ||||
| @@ -270,23 +335,22 @@ void ESPHomeOTAComponent::handle_() { | ||||
|     ssize_t read = this->client_->read(buf, requested); | ||||
|     if (read == -1) { | ||||
|       if (errno == EAGAIN || errno == EWOULDBLOCK) { | ||||
|         App.feed_wdt(); | ||||
|         delay(1); | ||||
|         this->yield_and_feed_watchdog_(); | ||||
|         continue; | ||||
|       } | ||||
|       ESP_LOGW(TAG, "Error receiving data for update, errno %d", errno); | ||||
|       ESP_LOGW(TAG, "Read error, errno %d", errno); | ||||
|       goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|     } else if (read == 0) { | ||||
|       // $ man recv | ||||
|       // "When  a  stream socket peer has performed an orderly shutdown, the return value will | ||||
|       // be 0 (the traditional "end-of-file" return)." | ||||
|       ESP_LOGW(TAG, "Remote end closed connection"); | ||||
|       ESP_LOGW(TAG, "Remote closed connection"); | ||||
|       goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|     } | ||||
|  | ||||
|     error_code = backend->write(buf, read); | ||||
|     if (error_code != ota::OTA_RESPONSE_OK) { | ||||
|       ESP_LOGW(TAG, "Error writing binary data to flash!, error_code: %d", error_code); | ||||
|       ESP_LOGW(TAG, "Flash write error, code: %d", error_code); | ||||
|       goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|     } | ||||
|     total += read; | ||||
| @@ -307,8 +371,7 @@ void ESPHomeOTAComponent::handle_() { | ||||
|       this->state_callback_.call(ota::OTA_IN_PROGRESS, percentage, 0); | ||||
| #endif | ||||
|       // feed watchdog and give other tasks a chance to run | ||||
|       App.feed_wdt(); | ||||
|       yield(); | ||||
|       this->yield_and_feed_watchdog_(); | ||||
|     } | ||||
|   } | ||||
|  | ||||
| @@ -318,7 +381,7 @@ void ESPHomeOTAComponent::handle_() { | ||||
|  | ||||
|   error_code = backend->end(); | ||||
|   if (error_code != ota::OTA_RESPONSE_OK) { | ||||
|     ESP_LOGW(TAG, "Error ending update! error_code: %d", error_code); | ||||
|     ESP_LOGW(TAG, "Error ending update! code: %d", error_code); | ||||
|     goto error;  // NOLINT(cppcoreguidelines-avoid-goto) | ||||
|   } | ||||
|  | ||||
| @@ -328,12 +391,11 @@ void ESPHomeOTAComponent::handle_() { | ||||
|  | ||||
|   // Read ACK | ||||
|   if (!this->readall_(buf, 1) || buf[0] != ota::OTA_RESPONSE_OK) { | ||||
|     ESP_LOGW(TAG, "Reading back acknowledgement failed"); | ||||
|     this->log_read_error_("ack"); | ||||
|     // do not go to error, this is not fatal | ||||
|   } | ||||
|  | ||||
|   this->client_->close(); | ||||
|   this->client_ = nullptr; | ||||
|   this->cleanup_connection_(); | ||||
|   delay(10); | ||||
|   ESP_LOGI(TAG, "Update complete"); | ||||
|   this->status_clear_warning(); | ||||
| @@ -346,8 +408,7 @@ void ESPHomeOTAComponent::handle_() { | ||||
| error: | ||||
|   buf[0] = static_cast<uint8_t>(error_code); | ||||
|   this->writeall_(buf, 1); | ||||
|   this->client_->close(); | ||||
|   this->client_ = nullptr; | ||||
|   this->cleanup_connection_(); | ||||
|  | ||||
|   if (backend != nullptr && update_started) { | ||||
|     backend->abort(); | ||||
| @@ -364,28 +425,24 @@ bool ESPHomeOTAComponent::readall_(uint8_t *buf, size_t len) { | ||||
|   uint32_t at = 0; | ||||
|   while (len - at > 0) { | ||||
|     uint32_t now = millis(); | ||||
|     if (now - start > 1000) { | ||||
|       ESP_LOGW(TAG, "Timed out reading %d bytes of data", len); | ||||
|     if (now - start > OTA_SOCKET_TIMEOUT_DATA) { | ||||
|       ESP_LOGW(TAG, "Timeout reading %d bytes", len); | ||||
|       return false; | ||||
|     } | ||||
|  | ||||
|     ssize_t read = this->client_->read(buf + at, len - at); | ||||
|     if (read == -1) { | ||||
|       if (errno == EAGAIN || errno == EWOULDBLOCK) { | ||||
|         App.feed_wdt(); | ||||
|         delay(1); | ||||
|         continue; | ||||
|       if (errno != EAGAIN && errno != EWOULDBLOCK) { | ||||
|         ESP_LOGW(TAG, "Error reading %d bytes, errno %d", len, errno); | ||||
|         return false; | ||||
|       } | ||||
|       ESP_LOGW(TAG, "Failed to read %d bytes of data, errno %d", len, errno); | ||||
|       return false; | ||||
|     } else if (read == 0) { | ||||
|       ESP_LOGW(TAG, "Remote closed connection"); | ||||
|       return false; | ||||
|     } else { | ||||
|       at += read; | ||||
|     } | ||||
|     App.feed_wdt(); | ||||
|     delay(1); | ||||
|     this->yield_and_feed_watchdog_(); | ||||
|   } | ||||
|  | ||||
|   return true; | ||||
| @@ -395,25 +452,21 @@ bool ESPHomeOTAComponent::writeall_(const uint8_t *buf, size_t len) { | ||||
|   uint32_t at = 0; | ||||
|   while (len - at > 0) { | ||||
|     uint32_t now = millis(); | ||||
|     if (now - start > 1000) { | ||||
|       ESP_LOGW(TAG, "Timed out writing %d bytes of data", len); | ||||
|     if (now - start > OTA_SOCKET_TIMEOUT_DATA) { | ||||
|       ESP_LOGW(TAG, "Timeout writing %d bytes", len); | ||||
|       return false; | ||||
|     } | ||||
|  | ||||
|     ssize_t written = this->client_->write(buf + at, len - at); | ||||
|     if (written == -1) { | ||||
|       if (errno == EAGAIN || errno == EWOULDBLOCK) { | ||||
|         App.feed_wdt(); | ||||
|         delay(1); | ||||
|         continue; | ||||
|       if (errno != EAGAIN && errno != EWOULDBLOCK) { | ||||
|         ESP_LOGW(TAG, "Error writing %d bytes, errno %d", len, errno); | ||||
|         return false; | ||||
|       } | ||||
|       ESP_LOGW(TAG, "Failed to write %d bytes of data, errno %d", len, errno); | ||||
|       return false; | ||||
|     } else { | ||||
|       at += written; | ||||
|     } | ||||
|     App.feed_wdt(); | ||||
|     delay(1); | ||||
|     this->yield_and_feed_watchdog_(); | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
| @@ -421,5 +474,25 @@ bool ESPHomeOTAComponent::writeall_(const uint8_t *buf, size_t len) { | ||||
| float ESPHomeOTAComponent::get_setup_priority() const { return setup_priority::AFTER_WIFI; } | ||||
| uint16_t ESPHomeOTAComponent::get_port() const { return this->port_; } | ||||
| void ESPHomeOTAComponent::set_port(uint16_t port) { this->port_ = port; } | ||||
|  | ||||
| void ESPHomeOTAComponent::log_socket_error_(const char *msg) { ESP_LOGW(TAG, "Socket %s: errno %d", msg, errno); } | ||||
|  | ||||
| void ESPHomeOTAComponent::log_read_error_(const char *what) { ESP_LOGW(TAG, "Read %s failed", what); } | ||||
|  | ||||
| void ESPHomeOTAComponent::log_start_(const char *phase) { | ||||
|   ESP_LOGD(TAG, "Starting %s from %s", phase, this->client_->getpeername().c_str()); | ||||
| } | ||||
|  | ||||
| void ESPHomeOTAComponent::cleanup_connection_() { | ||||
|   this->client_->close(); | ||||
|   this->client_ = nullptr; | ||||
|   this->client_connect_time_ = 0; | ||||
| } | ||||
|  | ||||
| void ESPHomeOTAComponent::yield_and_feed_watchdog_() { | ||||
|   App.feed_wdt(); | ||||
|   delay(1); | ||||
| } | ||||
|  | ||||
| }  // namespace esphome | ||||
| #endif | ||||
|   | ||||
| @@ -27,15 +27,22 @@ class ESPHomeOTAComponent : public ota::OTAComponent { | ||||
|   uint16_t get_port() const; | ||||
|  | ||||
|  protected: | ||||
|   void handle_(); | ||||
|   void handle_handshake_(); | ||||
|   void handle_data_(); | ||||
|   bool readall_(uint8_t *buf, size_t len); | ||||
|   bool writeall_(const uint8_t *buf, size_t len); | ||||
|   void log_socket_error_(const char *msg); | ||||
|   void log_read_error_(const char *what); | ||||
|   void log_start_(const char *phase); | ||||
|   void cleanup_connection_(); | ||||
|   void yield_and_feed_watchdog_(); | ||||
|  | ||||
| #ifdef USE_OTA_PASSWORD | ||||
|   std::string password_; | ||||
| #endif  // USE_OTA_PASSWORD | ||||
|  | ||||
|   uint16_t port_; | ||||
|   uint32_t client_connect_time_{0}; | ||||
|  | ||||
|   std::unique_ptr<socket::Socket> server_; | ||||
|   std::unique_ptr<socket::Socket> client_; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user