1
0
mirror of https://github.com/esphome/esphome.git synced 2025-09-02 19:32:19 +01:00

[esphome] Fix OTA watchdog resets during port scanning and network delays (#10152)

This commit is contained in:
J. Nick Koston
2025-08-11 15:37:01 -05:00
committed by GitHub
parent 45b88f2da9
commit 4c2874a32b
2 changed files with 158 additions and 78 deletions

View File

@@ -19,7 +19,9 @@
namespace esphome {
static const char *const TAG = "esphome.ota";
static constexpr u_int16_t OTA_BLOCK_SIZE = 8192;
static constexpr uint16_t OTA_BLOCK_SIZE = 8192;
static constexpr uint32_t OTA_SOCKET_TIMEOUT_HANDSHAKE = 10000; // milliseconds for initial handshake
static constexpr uint32_t OTA_SOCKET_TIMEOUT_DATA = 90000; // milliseconds for data transfer
void ESPHomeOTAComponent::setup() {
#ifdef USE_OTA_STATE_CALLBACK
@@ -28,19 +30,19 @@ void ESPHomeOTAComponent::setup() {
this->server_ = socket::socket_ip_loop_monitored(SOCK_STREAM, 0); // monitored for incoming connections
if (this->server_ == nullptr) {
ESP_LOGW(TAG, "Could not create socket");
this->log_socket_error_("creation");
this->mark_failed();
return;
}
int enable = 1;
int err = this->server_->setsockopt(SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int));
if (err != 0) {
ESP_LOGW(TAG, "Socket unable to set reuseaddr: errno %d", err);
this->log_socket_error_("reuseaddr");
// we can still continue
}
err = this->server_->setblocking(false);
if (err != 0) {
ESP_LOGW(TAG, "Socket unable to set nonblocking mode: errno %d", err);
this->log_socket_error_("non-blocking");
this->mark_failed();
return;
}
@@ -49,21 +51,21 @@ void ESPHomeOTAComponent::setup() {
socklen_t sl = socket::set_sockaddr_any((struct sockaddr *) &server, sizeof(server), this->port_);
if (sl == 0) {
ESP_LOGW(TAG, "Socket unable to set sockaddr: errno %d", errno);
this->log_socket_error_("set sockaddr");
this->mark_failed();
return;
}
err = this->server_->bind((struct sockaddr *) &server, sizeof(server));
if (err != 0) {
ESP_LOGW(TAG, "Socket unable to bind: errno %d", errno);
this->log_socket_error_("bind");
this->mark_failed();
return;
}
err = this->server_->listen(4);
if (err != 0) {
ESP_LOGW(TAG, "Socket unable to listen: errno %d", errno);
this->log_socket_error_("listen");
this->mark_failed();
return;
}
@@ -83,17 +85,93 @@ void ESPHomeOTAComponent::dump_config() {
}
void ESPHomeOTAComponent::loop() {
// Skip handle_() call if no client connected and no incoming connections
// Skip handle_handshake_() call if no client connected and no incoming connections
// This optimization reduces idle loop overhead when OTA is not active
// Note: No need to check server_ for null as the component is marked failed in setup() if server_ creation fails
// Note: No need to check server_ for null as the component is marked failed in setup()
// if server_ creation fails
if (this->client_ != nullptr || this->server_->ready()) {
this->handle_();
this->handle_handshake_();
}
}
static const uint8_t FEATURE_SUPPORTS_COMPRESSION = 0x01;
void ESPHomeOTAComponent::handle_() {
void ESPHomeOTAComponent::handle_handshake_() {
/// Handle the initial OTA handshake.
///
/// This method is non-blocking and will return immediately if no data is available.
/// It waits for the first magic byte (0x6C) before proceeding to handle_data_().
/// A 10-second timeout is enforced from initial connection.
if (this->client_ == nullptr) {
// We already checked server_->ready() in loop(), so we can accept directly
struct sockaddr_storage source_addr;
socklen_t addr_len = sizeof(source_addr);
int enable = 1;
this->client_ = this->server_->accept_loop_monitored((struct sockaddr *) &source_addr, &addr_len);
if (this->client_ == nullptr)
return;
int err = this->client_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &enable, sizeof(int));
if (err != 0) {
this->log_socket_error_("nodelay");
this->cleanup_connection_();
return;
}
err = this->client_->setblocking(false);
if (err != 0) {
this->log_socket_error_("non-blocking");
this->cleanup_connection_();
return;
}
this->log_start_("handshake");
this->client_connect_time_ = App.get_loop_component_start_time();
}
// Check for handshake timeout
uint32_t now = App.get_loop_component_start_time();
if (now - this->client_connect_time_ > OTA_SOCKET_TIMEOUT_HANDSHAKE) {
ESP_LOGW(TAG, "Handshake timeout");
this->cleanup_connection_();
return;
}
// Try to read first byte of magic bytes
uint8_t first_byte;
ssize_t read = this->client_->read(&first_byte, 1);
if (read == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
return; // No data yet, try again next loop
}
if (read <= 0) {
// Error or connection closed
if (read == -1) {
this->log_socket_error_("reading first byte");
} else {
ESP_LOGW(TAG, "Remote closed during handshake");
}
this->cleanup_connection_();
return;
}
// Got first byte, check if it's the magic byte
if (first_byte != 0x6C) {
ESP_LOGW(TAG, "Invalid initial byte: 0x%02X", first_byte);
this->cleanup_connection_();
return;
}
// First byte is valid, continue with data handling
this->handle_data_();
}
void ESPHomeOTAComponent::handle_data_() {
/// Handle the OTA data transfer and update process.
///
/// This method is blocking and will not return until the OTA update completes,
/// fails, or times out. It handles authentication, receives the firmware data,
/// writes it to flash, and reboots on success.
ota::OTAResponseTypes error_code = ota::OTA_RESPONSE_ERROR_UNKNOWN;
bool update_started = false;
size_t total = 0;
@@ -108,38 +186,14 @@ void ESPHomeOTAComponent::handle_() {
size_t size_acknowledged = 0;
#endif
if (this->client_ == nullptr) {
// We already checked server_->ready() in loop(), so we can accept directly
struct sockaddr_storage source_addr;
socklen_t addr_len = sizeof(source_addr);
this->client_ = this->server_->accept((struct sockaddr *) &source_addr, &addr_len);
if (this->client_ == nullptr)
return;
}
int enable = 1;
int err = this->client_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &enable, sizeof(int));
if (err != 0) {
ESP_LOGW(TAG, "Socket could not enable TCP nodelay, errno %d", errno);
this->client_->close();
this->client_ = nullptr;
return;
}
ESP_LOGD(TAG, "Starting update from %s", this->client_->getpeername().c_str());
this->status_set_warning();
#ifdef USE_OTA_STATE_CALLBACK
this->state_callback_.call(ota::OTA_STARTED, 0.0f, 0);
#endif
if (!this->readall_(buf, 5)) {
ESP_LOGW(TAG, "Reading magic bytes failed");
// Read remaining 4 bytes of magic (we already read the first byte 0x6C in handle_handshake_)
if (!this->readall_(buf, 4)) {
this->log_read_error_("magic bytes");
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
// 0x6C, 0x26, 0xF7, 0x5C, 0x45
if (buf[0] != 0x6C || buf[1] != 0x26 || buf[2] != 0xF7 || buf[3] != 0x5C || buf[4] != 0x45) {
ESP_LOGW(TAG, "Magic bytes do not match! 0x%02X-0x%02X-0x%02X-0x%02X-0x%02X", buf[0], buf[1], buf[2], buf[3],
buf[4]);
// Check remaining magic bytes: 0x26, 0xF7, 0x5C, 0x45
if (buf[0] != 0x26 || buf[1] != 0xF7 || buf[2] != 0x5C || buf[3] != 0x45) {
ESP_LOGW(TAG, "Magic bytes mismatch! 0x6C-0x%02X-0x%02X-0x%02X-0x%02X", buf[0], buf[1], buf[2], buf[3]);
error_code = ota::OTA_RESPONSE_ERROR_MAGIC;
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
@@ -153,7 +207,7 @@ void ESPHomeOTAComponent::handle_() {
// Read features - 1 byte
if (!this->readall_(buf, 1)) {
ESP_LOGW(TAG, "Reading features failed");
this->log_read_error_("features");
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
ota_features = buf[0]; // NOLINT
@@ -232,7 +286,7 @@ void ESPHomeOTAComponent::handle_() {
// Read size, 4 bytes MSB first
if (!this->readall_(buf, 4)) {
ESP_LOGW(TAG, "Reading size failed");
this->log_read_error_("size");
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
ota_size = 0;
@@ -242,6 +296,17 @@ void ESPHomeOTAComponent::handle_() {
}
ESP_LOGV(TAG, "Size is %u bytes", ota_size);
// Now that we've passed authentication and are actually
// starting the update, set the warning status and notify
// listeners. This ensures that port scanners do not
// accidentally trigger the update process.
this->log_start_("update");
this->status_set_warning();
#ifdef USE_OTA_STATE_CALLBACK
this->state_callback_.call(ota::OTA_STARTED, 0.0f, 0);
#endif
// This will block for a few seconds as it locks flash
error_code = backend->begin(ota_size);
if (error_code != ota::OTA_RESPONSE_OK)
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
@@ -253,7 +318,7 @@ void ESPHomeOTAComponent::handle_() {
// Read binary MD5, 32 bytes
if (!this->readall_(buf, 32)) {
ESP_LOGW(TAG, "Reading binary MD5 checksum failed");
this->log_read_error_("MD5 checksum");
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
sbuf[32] = '\0';
@@ -270,23 +335,22 @@ void ESPHomeOTAComponent::handle_() {
ssize_t read = this->client_->read(buf, requested);
if (read == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
App.feed_wdt();
delay(1);
this->yield_and_feed_watchdog_();
continue;
}
ESP_LOGW(TAG, "Error receiving data for update, errno %d", errno);
ESP_LOGW(TAG, "Read error, errno %d", errno);
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
} else if (read == 0) {
// $ man recv
// "When a stream socket peer has performed an orderly shutdown, the return value will
// be 0 (the traditional "end-of-file" return)."
ESP_LOGW(TAG, "Remote end closed connection");
ESP_LOGW(TAG, "Remote closed connection");
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
error_code = backend->write(buf, read);
if (error_code != ota::OTA_RESPONSE_OK) {
ESP_LOGW(TAG, "Error writing binary data to flash!, error_code: %d", error_code);
ESP_LOGW(TAG, "Flash write error, code: %d", error_code);
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
total += read;
@@ -307,8 +371,7 @@ void ESPHomeOTAComponent::handle_() {
this->state_callback_.call(ota::OTA_IN_PROGRESS, percentage, 0);
#endif
// feed watchdog and give other tasks a chance to run
App.feed_wdt();
yield();
this->yield_and_feed_watchdog_();
}
}
@@ -318,7 +381,7 @@ void ESPHomeOTAComponent::handle_() {
error_code = backend->end();
if (error_code != ota::OTA_RESPONSE_OK) {
ESP_LOGW(TAG, "Error ending update! error_code: %d", error_code);
ESP_LOGW(TAG, "Error ending update! code: %d", error_code);
goto error; // NOLINT(cppcoreguidelines-avoid-goto)
}
@@ -328,12 +391,11 @@ void ESPHomeOTAComponent::handle_() {
// Read ACK
if (!this->readall_(buf, 1) || buf[0] != ota::OTA_RESPONSE_OK) {
ESP_LOGW(TAG, "Reading back acknowledgement failed");
this->log_read_error_("ack");
// do not go to error, this is not fatal
}
this->client_->close();
this->client_ = nullptr;
this->cleanup_connection_();
delay(10);
ESP_LOGI(TAG, "Update complete");
this->status_clear_warning();
@@ -346,8 +408,7 @@ void ESPHomeOTAComponent::handle_() {
error:
buf[0] = static_cast<uint8_t>(error_code);
this->writeall_(buf, 1);
this->client_->close();
this->client_ = nullptr;
this->cleanup_connection_();
if (backend != nullptr && update_started) {
backend->abort();
@@ -364,28 +425,24 @@ bool ESPHomeOTAComponent::readall_(uint8_t *buf, size_t len) {
uint32_t at = 0;
while (len - at > 0) {
uint32_t now = millis();
if (now - start > 1000) {
ESP_LOGW(TAG, "Timed out reading %d bytes of data", len);
if (now - start > OTA_SOCKET_TIMEOUT_DATA) {
ESP_LOGW(TAG, "Timeout reading %d bytes", len);
return false;
}
ssize_t read = this->client_->read(buf + at, len - at);
if (read == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
App.feed_wdt();
delay(1);
continue;
}
ESP_LOGW(TAG, "Failed to read %d bytes of data, errno %d", len, errno);
if (errno != EAGAIN && errno != EWOULDBLOCK) {
ESP_LOGW(TAG, "Error reading %d bytes, errno %d", len, errno);
return false;
}
} else if (read == 0) {
ESP_LOGW(TAG, "Remote closed connection");
return false;
} else {
at += read;
}
App.feed_wdt();
delay(1);
this->yield_and_feed_watchdog_();
}
return true;
@@ -395,25 +452,21 @@ bool ESPHomeOTAComponent::writeall_(const uint8_t *buf, size_t len) {
uint32_t at = 0;
while (len - at > 0) {
uint32_t now = millis();
if (now - start > 1000) {
ESP_LOGW(TAG, "Timed out writing %d bytes of data", len);
if (now - start > OTA_SOCKET_TIMEOUT_DATA) {
ESP_LOGW(TAG, "Timeout writing %d bytes", len);
return false;
}
ssize_t written = this->client_->write(buf + at, len - at);
if (written == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
App.feed_wdt();
delay(1);
continue;
}
ESP_LOGW(TAG, "Failed to write %d bytes of data, errno %d", len, errno);
if (errno != EAGAIN && errno != EWOULDBLOCK) {
ESP_LOGW(TAG, "Error writing %d bytes, errno %d", len, errno);
return false;
}
} else {
at += written;
}
App.feed_wdt();
delay(1);
this->yield_and_feed_watchdog_();
}
return true;
}
@@ -421,5 +474,25 @@ bool ESPHomeOTAComponent::writeall_(const uint8_t *buf, size_t len) {
float ESPHomeOTAComponent::get_setup_priority() const { return setup_priority::AFTER_WIFI; }
uint16_t ESPHomeOTAComponent::get_port() const { return this->port_; }
void ESPHomeOTAComponent::set_port(uint16_t port) { this->port_ = port; }
void ESPHomeOTAComponent::log_socket_error_(const char *msg) { ESP_LOGW(TAG, "Socket %s: errno %d", msg, errno); }
void ESPHomeOTAComponent::log_read_error_(const char *what) { ESP_LOGW(TAG, "Read %s failed", what); }
void ESPHomeOTAComponent::log_start_(const char *phase) {
ESP_LOGD(TAG, "Starting %s from %s", phase, this->client_->getpeername().c_str());
}
void ESPHomeOTAComponent::cleanup_connection_() {
this->client_->close();
this->client_ = nullptr;
this->client_connect_time_ = 0;
}
void ESPHomeOTAComponent::yield_and_feed_watchdog_() {
App.feed_wdt();
delay(1);
}
} // namespace esphome
#endif

View File

@@ -27,15 +27,22 @@ class ESPHomeOTAComponent : public ota::OTAComponent {
uint16_t get_port() const;
protected:
void handle_();
void handle_handshake_();
void handle_data_();
bool readall_(uint8_t *buf, size_t len);
bool writeall_(const uint8_t *buf, size_t len);
void log_socket_error_(const char *msg);
void log_read_error_(const char *what);
void log_start_(const char *phase);
void cleanup_connection_();
void yield_and_feed_watchdog_();
#ifdef USE_OTA_PASSWORD
std::string password_;
#endif // USE_OTA_PASSWORD
uint16_t port_;
uint32_t client_connect_time_{0};
std::unique_ptr<socket::Socket> server_;
std::unique_ptr<socket::Socket> client_;