From 8464307a43f07eb1fa5fc95ac01bd8c4fc69e2b1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 7 Jan 2026 08:23:50 -1000 Subject: [PATCH] [api] Coalesce log packets to reduce buffer pressure and prevent dropped state updates (#13026) --- esphome/components/api/api_connection.cpp | 22 ++++++++++++++++++- esphome/components/api/api_frame_helper.h | 26 ++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/esphome/components/api/api_connection.cpp b/esphome/components/api/api_connection.cpp index 649ed31283..fb3548d117 100644 --- a/esphome/components/api/api_connection.cpp +++ b/esphome/components/api/api_connection.cpp @@ -1819,10 +1819,30 @@ bool APIConnection::try_to_clear_buffer(bool log_out_of_space) { return false; } bool APIConnection::send_buffer(ProtoWriteBuffer buffer, uint8_t message_type) { - if (!this->try_to_clear_buffer(message_type != SubscribeLogsResponse::MESSAGE_TYPE)) { // SubscribeLogsResponse + const bool is_log_message = (message_type == SubscribeLogsResponse::MESSAGE_TYPE); + + if (!this->try_to_clear_buffer(!is_log_message)) { return false; } + // Toggle Nagle's algorithm based on message type to prevent log messages from + // filling the TCP send buffer and crowding out important state updates. + // + // This honors the `no_delay` proto option - SubscribeLogsResponse is the only + // message with `option (no_delay) = false;` in api.proto, indicating it should + // allow Nagle coalescing. This option existed since 2019 but was never implemented. + // + // - Log messages: Enable Nagle (NODELAY=false) so small log packets coalesce + // into fewer, larger packets. They flush naturally via TCP delayed ACK timer + // (~200ms), buffer filling, or when a state update triggers a flush. + // + // - All other messages (state updates, responses): Disable Nagle (NODELAY=true) + // for immediate delivery. These are time-sensitive and should not be delayed. + // + // This must be done proactively BEFORE the buffer fills up - checking buffer + // state here would be too late since we'd already be in a degraded state. + this->helper_->set_nodelay(!is_log_message); + APIError err = this->helper_->write_protobuf_packet(message_type, buffer); if (err == APIError::WOULD_BLOCK) return false; diff --git a/esphome/components/api/api_frame_helper.h b/esphome/components/api/api_frame_helper.h index 76a93d094e..27ec1ff915 100644 --- a/esphome/components/api/api_frame_helper.h +++ b/esphome/components/api/api_frame_helper.h @@ -120,6 +120,27 @@ class APIFrameHelper { } return APIError::OK; } + /// Toggle TCP_NODELAY socket option to control Nagle's algorithm. + /// + /// This is used to allow log messages to coalesce (Nagle enabled) while keeping + /// state updates low-latency (NODELAY enabled). Without this, many small log + /// packets fill the TCP send buffer, crowding out important state updates. + /// + /// State is tracked to minimize setsockopt() overhead - on lwip_raw (ESP8266/RP2040) + /// this is just a boolean assignment; on other platforms it's a lightweight syscall. + /// + /// @param enable true to enable NODELAY (disable Nagle), false to enable Nagle + /// @return true if successful or already in desired state + bool set_nodelay(bool enable) { + if (this->nodelay_enabled_ == enable) + return true; + int val = enable ? 1 : 0; + int err = this->socket_->setsockopt(IPPROTO_TCP, TCP_NODELAY, &val, sizeof(int)); + if (err == 0) { + this->nodelay_enabled_ = enable; + } + return err == 0; + } virtual APIError write_protobuf_packet(uint8_t type, ProtoWriteBuffer buffer) = 0; // Write multiple protobuf messages in a single operation // messages contains (message_type, offset, length) for each message in the buffer @@ -208,7 +229,10 @@ class APIFrameHelper { uint8_t tx_buf_head_{0}; uint8_t tx_buf_tail_{0}; uint8_t tx_buf_count_{0}; - // 8 bytes total, 0 bytes padding + // Tracks TCP_NODELAY state to minimize setsockopt() calls. Initialized to true + // since init_common_() enables NODELAY. Used by set_nodelay() to allow log + // messages to coalesce while keeping state updates low-latency. + bool nodelay_enabled_{true}; // Common initialization for both plaintext and noise protocols APIError init_common_();