diff --git a/esphome/components/http_request/http_request_arduino.cpp b/esphome/components/http_request/http_request_arduino.cpp index 2f12b58766..aee1f651bf 100644 --- a/esphome/components/http_request/http_request_arduino.cpp +++ b/esphome/components/http_request/http_request_arduino.cpp @@ -133,20 +133,10 @@ std::shared_ptr HttpRequestArduino::perform(const std::string &ur // HTTPClient::getSize() returns -1 for chunked transfer encoding (no Content-Length). // When cast to size_t, -1 becomes SIZE_MAX (4294967295 on 32-bit). - // The read() method handles this: bytes_read_ can never reach SIZE_MAX, so the - // early return check (bytes_read_ >= content_length) will never trigger. - // - // TODO: Chunked transfer encoding is NOT properly supported on Arduino. - // The implementation in #7884 was incomplete - it only works correctly on ESP-IDF where - // esp_http_client_read() decodes chunks internally. On Arduino, using getStreamPtr() - // returns raw TCP data with chunk framing (e.g., "12a\r\n{json}\r\n0\r\n\r\n") instead - // of decoded content. This wasn't noticed because requests would complete and payloads - // were only examined on IDF. The long transfer times were also masked by the misleading - // "HTTP on Arduino version >= 3.1 is **very** slow" warning above. This causes two issues: - // 1. Response body is corrupted - contains chunk size headers mixed with data - // 2. Cannot detect end of transfer - connection stays open (keep-alive), causing timeout - // The proper fix would be to use getString() for chunked responses, which decodes chunks - // internally, but this buffers the entire response in memory. + // The read() method uses a chunked transfer encoding decoder (read_chunked_) to strip + // chunk framing and deliver only decoded content. When the final 0-size chunk is received, + // is_chunked_ is cleared and content_length is set to the actual decoded size, so + // is_read_complete() returns true and callers exit their read loops correctly. int content_length = container->client_.getSize(); ESP_LOGD(TAG, "Content-Length: %d", content_length); container->content_length = (size_t) content_length; @@ -174,6 +164,10 @@ std::shared_ptr HttpRequestArduino::perform(const std::string &ur // > 0: bytes read // 0: no data yet, retry <-- NOTE: 0 means retry, NOT EOF! // < 0: error/connection closed <-- connection closed returns -1, not 0 +// +// For chunked transfer encoding, read_chunked_() decodes chunk framing and delivers +// only the payload data. When the final 0-size chunk is received, it clears is_chunked_ +// and sets content_length = bytes_read_ so is_read_complete() returns true. int HttpContainerArduino::read(uint8_t *buf, size_t max_len) { const uint32_t start = millis(); watchdog::WatchdogManager wdm(this->parent_->get_watchdog_timeout()); @@ -184,24 +178,42 @@ int HttpContainerArduino::read(uint8_t *buf, size_t max_len) { return HTTP_ERROR_CONNECTION_CLOSED; } + if (this->is_chunked_) { + int result = this->read_chunked_(buf, max_len, stream_ptr); + this->duration_ms += (millis() - start); + if (result > 0) { + return result; + } + // result <= 0: check for completion or errors + if (this->is_read_complete()) { + return 0; // Chunked transfer complete (final 0-size chunk received) + } + if (result < 0) { + return result; // Stream error during chunk decoding + } + // read_chunked_ returned 0: no data was available (available() was 0). + // This happens when the TCP buffer is empty - either more data is in flight, + // or the connection dropped. Arduino's connected() returns false only when + // both the remote has closed AND the receive buffer is empty, so any buffered + // data is fully drained before we report the drop. + if (!stream_ptr->connected()) { + return HTTP_ERROR_CONNECTION_CLOSED; + } + return 0; // No data yet, caller should retry + } + + // Non-chunked path int available_data = stream_ptr->available(); - // For chunked transfer encoding, HTTPClient::getSize() returns -1, which becomes SIZE_MAX when - // cast to size_t. SIZE_MAX - bytes_read_ is still huge, so it won't limit the read. size_t remaining = (this->content_length > 0) ? (this->content_length - this->bytes_read_) : max_len; int bufsize = std::min(max_len, std::min(remaining, (size_t) available_data)); if (bufsize == 0) { this->duration_ms += (millis() - start); - // Check if we've read all expected content (non-chunked only) - // For chunked encoding (content_length == SIZE_MAX), is_read_complete() returns false if (this->is_read_complete()) { return 0; // All content read successfully } - // No data available - check if connection is still open - // For chunked encoding, !connected() after reading means EOF (all chunks received) - // For known content_length with bytes_read_ < content_length, it means connection dropped if (!stream_ptr->connected()) { - return HTTP_ERROR_CONNECTION_CLOSED; // Connection closed or EOF for chunked + return HTTP_ERROR_CONNECTION_CLOSED; } return 0; // No data yet, caller should retry } @@ -215,6 +227,143 @@ int HttpContainerArduino::read(uint8_t *buf, size_t max_len) { return read_len; } +void HttpContainerArduino::chunk_header_complete_() { + if (this->chunk_remaining_ == 0) { + this->chunk_state_ = ChunkedState::CHUNK_TRAILER; + this->chunk_remaining_ = 1; // repurpose as at-start-of-line flag + } else { + this->chunk_state_ = ChunkedState::CHUNK_DATA; + } +} + +// Chunked transfer encoding decoder +// +// On Arduino, getStreamPtr() returns raw TCP data. For chunked responses, this includes +// chunk framing (size headers, CRLF delimiters) mixed with payload data. This decoder +// strips the framing and delivers only decoded content to the caller. +// +// Chunk format (RFC 9112 Section 7.1): +// [;extension]\r\n +// \r\n +// ... +// 0\r\n +// [trailer-field\r\n]* +// \r\n +// +// Non-blocking: only processes bytes already in the TCP receive buffer. +// State (chunk_state_, chunk_remaining_) is preserved between calls, so partial +// chunk headers or split \r\n sequences resume correctly on the next call. +// Framing bytes (hex sizes, \r\n) may be consumed without producing output; +// the caller sees 0 and retries via the normal read timeout logic. +// +// WiFiClient::read() returns -1 on error despite available() > 0 (connection reset +// between check and read). On any stream error (c < 0 or readBytes <= 0), we return +// already-decoded data if any; otherwise HTTP_ERROR_CONNECTION_CLOSED. The error +// will surface again on the next call since the stream stays broken. +// +// Returns: > 0 decoded bytes, 0 no data available, < 0 error +int HttpContainerArduino::read_chunked_(uint8_t *buf, size_t max_len, WiFiClient *stream) { + int total_decoded = 0; + + while (total_decoded < (int) max_len && this->chunk_state_ != ChunkedState::COMPLETE) { + // Non-blocking: only process what's already buffered + if (stream->available() == 0) + break; + + // CHUNK_DATA reads multiple bytes; handle before the single-byte switch + if (this->chunk_state_ == ChunkedState::CHUNK_DATA) { + // Only read what's available, what fits in buf, and what remains in this chunk + size_t to_read = + std::min({max_len - (size_t) total_decoded, this->chunk_remaining_, (size_t) stream->available()}); + if (to_read == 0) + break; + App.feed_wdt(); + int read_len = stream->readBytes(buf + total_decoded, to_read); + if (read_len <= 0) + return total_decoded > 0 ? total_decoded : HTTP_ERROR_CONNECTION_CLOSED; + total_decoded += read_len; + this->chunk_remaining_ -= read_len; + this->bytes_read_ += read_len; + if (this->chunk_remaining_ == 0) + this->chunk_state_ = ChunkedState::CHUNK_DATA_TRAIL; + continue; + } + + // All other states consume a single byte + int c = stream->read(); + if (c < 0) + return total_decoded > 0 ? total_decoded : HTTP_ERROR_CONNECTION_CLOSED; + + switch (this->chunk_state_) { + // Parse hex chunk size, one byte at a time: "[;ext]\r\n" + // Note: if no hex digits are parsed (e.g., bare \r\n), chunk_remaining_ stays 0 + // and is treated as the final chunk. This is intentionally lenient — on embedded + // devices, rejecting malformed framing is less useful than terminating cleanly. + // Overflow of chunk_remaining_ from extremely long hex strings (>8 digits on + // 32-bit) is not checked; >4GB chunks are unrealistic on embedded targets and + // would simply cause fewer bytes to be read from that chunk. + case ChunkedState::CHUNK_HEADER: + if (c == '\n') { + // \n terminates the size line; chunk_remaining_ == 0 means last chunk + this->chunk_header_complete_(); + } else { + uint8_t hex = parse_hex_char(c); + if (hex != INVALID_HEX_CHAR) { + this->chunk_remaining_ = (this->chunk_remaining_ << 4) | hex; + } else if (c != '\r') { + this->chunk_state_ = ChunkedState::CHUNK_HEADER_EXT; // ';' starts extension, skip to \n + } + } + break; + + // Skip chunk extension bytes until \n (e.g., ";name=value\r\n") + case ChunkedState::CHUNK_HEADER_EXT: + if (c == '\n') { + this->chunk_header_complete_(); + } + break; + + // Consume \r\n trailing each chunk's data + case ChunkedState::CHUNK_DATA_TRAIL: + if (c == '\n') { + this->chunk_state_ = ChunkedState::CHUNK_HEADER; + this->chunk_remaining_ = 0; // reset for next chunk's hex accumulation + } + // else: \r is consumed silently, next iteration gets \n + break; + + // Consume optional trailer headers and terminating empty line after final chunk. + // Per RFC 9112 Section 7.1: "0\r\n" is followed by optional "field\r\n" lines + // and a final "\r\n". chunk_remaining_ is repurposed as a flag: 1 = at start + // of line (may be the empty terminator), 0 = mid-line (reading a trailer field). + case ChunkedState::CHUNK_TRAILER: + if (c == '\n') { + if (this->chunk_remaining_ != 0) { + this->chunk_state_ = ChunkedState::COMPLETE; // Empty line terminates trailers + } else { + this->chunk_remaining_ = 1; // End of trailer field, at start of next line + } + } else if (c != '\r') { + this->chunk_remaining_ = 0; // Non-CRLF char: reading a trailer field + } + // \r doesn't change the flag — it's part of \r\n line endings + break; + + default: + break; + } + + if (this->chunk_state_ == ChunkedState::COMPLETE) { + // Clear chunked flag and set content_length to actual decoded size so + // is_read_complete() returns true and callers exit their read loops + this->is_chunked_ = false; + this->content_length = this->bytes_read_; + } + } + + return total_decoded; +} + void HttpContainerArduino::end() { watchdog::WatchdogManager wdm(this->parent_->get_watchdog_timeout()); this->client_.end(); diff --git a/esphome/components/http_request/http_request_arduino.h b/esphome/components/http_request/http_request_arduino.h index d9b5af9d81..a1084b12d5 100644 --- a/esphome/components/http_request/http_request_arduino.h +++ b/esphome/components/http_request/http_request_arduino.h @@ -18,6 +18,17 @@ namespace esphome::http_request { class HttpRequestArduino; + +/// State machine for decoding chunked transfer encoding on Arduino +enum class ChunkedState : uint8_t { + CHUNK_HEADER, ///< Reading hex digits of chunk size + CHUNK_HEADER_EXT, ///< Skipping chunk extensions until \n + CHUNK_DATA, ///< Reading chunk data bytes + CHUNK_DATA_TRAIL, ///< Skipping \r\n after chunk data + CHUNK_TRAILER, ///< Consuming trailer headers after final 0-size chunk + COMPLETE, ///< Finished: final chunk and trailers consumed +}; + class HttpContainerArduino : public HttpContainer { public: int read(uint8_t *buf, size_t max_len) override; @@ -26,6 +37,13 @@ class HttpContainerArduino : public HttpContainer { protected: friend class HttpRequestArduino; HTTPClient client_{}; + + /// Decode chunked transfer encoding from the raw stream + int read_chunked_(uint8_t *buf, size_t max_len, WiFiClient *stream); + /// Transition from chunk header to data or trailer based on parsed size + void chunk_header_complete_(); + ChunkedState chunk_state_{ChunkedState::CHUNK_HEADER}; + size_t chunk_remaining_{0}; ///< Bytes remaining in current chunk }; class HttpRequestArduino : public HttpRequestComponent { diff --git a/esphome/components/http_request/ota/ota_http_request.cpp b/esphome/components/http_request/ota/ota_http_request.cpp index 8f4ecfab2d..882def4d7f 100644 --- a/esphome/components/http_request/ota/ota_http_request.cpp +++ b/esphome/components/http_request/ota/ota_http_request.cpp @@ -133,8 +133,10 @@ uint8_t OtaHttpRequestComponent::do_ota_() { auto result = http_read_loop_result(bufsize_or_error, last_data_time, read_timeout, container->is_read_complete()); if (result == HttpReadLoopResult::RETRY) continue; - // Note: COMPLETE is currently unreachable since the loop condition checks bytes_read < content_length, - // but this is defensive code in case chunked transfer encoding support is added for OTA in the future. + // For non-chunked responses, COMPLETE is unreachable (loop condition checks bytes_read < content_length). + // For chunked responses, the decoder sets content_length = bytes_read when the final chunk arrives, + // which causes the loop condition to terminate. But COMPLETE can still be returned if the decoder + // finishes mid-read, so this is needed for correctness. if (result == HttpReadLoopResult::COMPLETE) break; if (result != HttpReadLoopResult::DATA) { diff --git a/esphome/core/helpers.cpp b/esphome/core/helpers.cpp index 1a5d22f8d8..c2f7f67d9a 100644 --- a/esphome/core/helpers.cpp +++ b/esphome/core/helpers.cpp @@ -295,7 +295,7 @@ size_t parse_hex(const char *str, size_t length, uint8_t *data, size_t count) { size_t chars = std::min(length, 2 * count); for (size_t i = 2 * count - chars; i < 2 * count; i++, str++) { uint8_t val = parse_hex_char(*str); - if (val > 15) + if (val == INVALID_HEX_CHAR) return 0; data[i >> 1] = (i & 1) ? data[i >> 1] | val : val << 4; } diff --git a/esphome/core/helpers.h b/esphome/core/helpers.h index 9c7060cd1d..f7de34b6d5 100644 --- a/esphome/core/helpers.h +++ b/esphome/core/helpers.h @@ -874,6 +874,9 @@ template::value, int> = 0> optional< } /// Parse a hex character to its nibble value (0-15), returns 255 on invalid input +/// Returned by parse_hex_char() for non-hex characters. +static constexpr uint8_t INVALID_HEX_CHAR = 255; + constexpr uint8_t parse_hex_char(char c) { if (c >= '0' && c <= '9') return c - '0'; @@ -881,7 +884,7 @@ constexpr uint8_t parse_hex_char(char c) { return c - 'A' + 10; if (c >= 'a' && c <= 'f') return c - 'a' + 10; - return 255; + return INVALID_HEX_CHAR; } /// Convert a nibble (0-15) to hex char with specified base ('a' for lowercase, 'A' for uppercase)