1
0
mirror of https://github.com/esphome/esphome.git synced 2026-02-08 00:31:58 +00:00

[http_request] Fix chunked transfer encoding on Arduino platforms (#13790)

This commit is contained in:
J. Nick Koston
2026-02-06 15:52:41 +01:00
committed by GitHub
parent e4ad2082bc
commit c3622ef7fb
5 changed files with 198 additions and 26 deletions

View File

@@ -133,20 +133,10 @@ std::shared_ptr<HttpContainer> HttpRequestArduino::perform(const std::string &ur
// HTTPClient::getSize() returns -1 for chunked transfer encoding (no Content-Length).
// When cast to size_t, -1 becomes SIZE_MAX (4294967295 on 32-bit).
// The read() method handles this: bytes_read_ can never reach SIZE_MAX, so the
// early return check (bytes_read_ >= content_length) will never trigger.
//
// TODO: Chunked transfer encoding is NOT properly supported on Arduino.
// The implementation in #7884 was incomplete - it only works correctly on ESP-IDF where
// esp_http_client_read() decodes chunks internally. On Arduino, using getStreamPtr()
// returns raw TCP data with chunk framing (e.g., "12a\r\n{json}\r\n0\r\n\r\n") instead
// of decoded content. This wasn't noticed because requests would complete and payloads
// were only examined on IDF. The long transfer times were also masked by the misleading
// "HTTP on Arduino version >= 3.1 is **very** slow" warning above. This causes two issues:
// 1. Response body is corrupted - contains chunk size headers mixed with data
// 2. Cannot detect end of transfer - connection stays open (keep-alive), causing timeout
// The proper fix would be to use getString() for chunked responses, which decodes chunks
// internally, but this buffers the entire response in memory.
// The read() method uses a chunked transfer encoding decoder (read_chunked_) to strip
// chunk framing and deliver only decoded content. When the final 0-size chunk is received,
// is_chunked_ is cleared and content_length is set to the actual decoded size, so
// is_read_complete() returns true and callers exit their read loops correctly.
int content_length = container->client_.getSize();
ESP_LOGD(TAG, "Content-Length: %d", content_length);
container->content_length = (size_t) content_length;
@@ -174,6 +164,10 @@ std::shared_ptr<HttpContainer> HttpRequestArduino::perform(const std::string &ur
// > 0: bytes read
// 0: no data yet, retry <-- NOTE: 0 means retry, NOT EOF!
// < 0: error/connection closed <-- connection closed returns -1, not 0
//
// For chunked transfer encoding, read_chunked_() decodes chunk framing and delivers
// only the payload data. When the final 0-size chunk is received, it clears is_chunked_
// and sets content_length = bytes_read_ so is_read_complete() returns true.
int HttpContainerArduino::read(uint8_t *buf, size_t max_len) {
const uint32_t start = millis();
watchdog::WatchdogManager wdm(this->parent_->get_watchdog_timeout());
@@ -184,24 +178,42 @@ int HttpContainerArduino::read(uint8_t *buf, size_t max_len) {
return HTTP_ERROR_CONNECTION_CLOSED;
}
if (this->is_chunked_) {
int result = this->read_chunked_(buf, max_len, stream_ptr);
this->duration_ms += (millis() - start);
if (result > 0) {
return result;
}
// result <= 0: check for completion or errors
if (this->is_read_complete()) {
return 0; // Chunked transfer complete (final 0-size chunk received)
}
if (result < 0) {
return result; // Stream error during chunk decoding
}
// read_chunked_ returned 0: no data was available (available() was 0).
// This happens when the TCP buffer is empty - either more data is in flight,
// or the connection dropped. Arduino's connected() returns false only when
// both the remote has closed AND the receive buffer is empty, so any buffered
// data is fully drained before we report the drop.
if (!stream_ptr->connected()) {
return HTTP_ERROR_CONNECTION_CLOSED;
}
return 0; // No data yet, caller should retry
}
// Non-chunked path
int available_data = stream_ptr->available();
// For chunked transfer encoding, HTTPClient::getSize() returns -1, which becomes SIZE_MAX when
// cast to size_t. SIZE_MAX - bytes_read_ is still huge, so it won't limit the read.
size_t remaining = (this->content_length > 0) ? (this->content_length - this->bytes_read_) : max_len;
int bufsize = std::min(max_len, std::min(remaining, (size_t) available_data));
if (bufsize == 0) {
this->duration_ms += (millis() - start);
// Check if we've read all expected content (non-chunked only)
// For chunked encoding (content_length == SIZE_MAX), is_read_complete() returns false
if (this->is_read_complete()) {
return 0; // All content read successfully
}
// No data available - check if connection is still open
// For chunked encoding, !connected() after reading means EOF (all chunks received)
// For known content_length with bytes_read_ < content_length, it means connection dropped
if (!stream_ptr->connected()) {
return HTTP_ERROR_CONNECTION_CLOSED; // Connection closed or EOF for chunked
return HTTP_ERROR_CONNECTION_CLOSED;
}
return 0; // No data yet, caller should retry
}
@@ -215,6 +227,143 @@ int HttpContainerArduino::read(uint8_t *buf, size_t max_len) {
return read_len;
}
void HttpContainerArduino::chunk_header_complete_() {
if (this->chunk_remaining_ == 0) {
this->chunk_state_ = ChunkedState::CHUNK_TRAILER;
this->chunk_remaining_ = 1; // repurpose as at-start-of-line flag
} else {
this->chunk_state_ = ChunkedState::CHUNK_DATA;
}
}
// Chunked transfer encoding decoder
//
// On Arduino, getStreamPtr() returns raw TCP data. For chunked responses, this includes
// chunk framing (size headers, CRLF delimiters) mixed with payload data. This decoder
// strips the framing and delivers only decoded content to the caller.
//
// Chunk format (RFC 9112 Section 7.1):
// <hex-size>[;extension]\r\n
// <data bytes>\r\n
// ...
// 0\r\n
// [trailer-field\r\n]*
// \r\n
//
// Non-blocking: only processes bytes already in the TCP receive buffer.
// State (chunk_state_, chunk_remaining_) is preserved between calls, so partial
// chunk headers or split \r\n sequences resume correctly on the next call.
// Framing bytes (hex sizes, \r\n) may be consumed without producing output;
// the caller sees 0 and retries via the normal read timeout logic.
//
// WiFiClient::read() returns -1 on error despite available() > 0 (connection reset
// between check and read). On any stream error (c < 0 or readBytes <= 0), we return
// already-decoded data if any; otherwise HTTP_ERROR_CONNECTION_CLOSED. The error
// will surface again on the next call since the stream stays broken.
//
// Returns: > 0 decoded bytes, 0 no data available, < 0 error
int HttpContainerArduino::read_chunked_(uint8_t *buf, size_t max_len, WiFiClient *stream) {
int total_decoded = 0;
while (total_decoded < (int) max_len && this->chunk_state_ != ChunkedState::COMPLETE) {
// Non-blocking: only process what's already buffered
if (stream->available() == 0)
break;
// CHUNK_DATA reads multiple bytes; handle before the single-byte switch
if (this->chunk_state_ == ChunkedState::CHUNK_DATA) {
// Only read what's available, what fits in buf, and what remains in this chunk
size_t to_read =
std::min({max_len - (size_t) total_decoded, this->chunk_remaining_, (size_t) stream->available()});
if (to_read == 0)
break;
App.feed_wdt();
int read_len = stream->readBytes(buf + total_decoded, to_read);
if (read_len <= 0)
return total_decoded > 0 ? total_decoded : HTTP_ERROR_CONNECTION_CLOSED;
total_decoded += read_len;
this->chunk_remaining_ -= read_len;
this->bytes_read_ += read_len;
if (this->chunk_remaining_ == 0)
this->chunk_state_ = ChunkedState::CHUNK_DATA_TRAIL;
continue;
}
// All other states consume a single byte
int c = stream->read();
if (c < 0)
return total_decoded > 0 ? total_decoded : HTTP_ERROR_CONNECTION_CLOSED;
switch (this->chunk_state_) {
// Parse hex chunk size, one byte at a time: "<hex>[;ext]\r\n"
// Note: if no hex digits are parsed (e.g., bare \r\n), chunk_remaining_ stays 0
// and is treated as the final chunk. This is intentionally lenient — on embedded
// devices, rejecting malformed framing is less useful than terminating cleanly.
// Overflow of chunk_remaining_ from extremely long hex strings (>8 digits on
// 32-bit) is not checked; >4GB chunks are unrealistic on embedded targets and
// would simply cause fewer bytes to be read from that chunk.
case ChunkedState::CHUNK_HEADER:
if (c == '\n') {
// \n terminates the size line; chunk_remaining_ == 0 means last chunk
this->chunk_header_complete_();
} else {
uint8_t hex = parse_hex_char(c);
if (hex != INVALID_HEX_CHAR) {
this->chunk_remaining_ = (this->chunk_remaining_ << 4) | hex;
} else if (c != '\r') {
this->chunk_state_ = ChunkedState::CHUNK_HEADER_EXT; // ';' starts extension, skip to \n
}
}
break;
// Skip chunk extension bytes until \n (e.g., ";name=value\r\n")
case ChunkedState::CHUNK_HEADER_EXT:
if (c == '\n') {
this->chunk_header_complete_();
}
break;
// Consume \r\n trailing each chunk's data
case ChunkedState::CHUNK_DATA_TRAIL:
if (c == '\n') {
this->chunk_state_ = ChunkedState::CHUNK_HEADER;
this->chunk_remaining_ = 0; // reset for next chunk's hex accumulation
}
// else: \r is consumed silently, next iteration gets \n
break;
// Consume optional trailer headers and terminating empty line after final chunk.
// Per RFC 9112 Section 7.1: "0\r\n" is followed by optional "field\r\n" lines
// and a final "\r\n". chunk_remaining_ is repurposed as a flag: 1 = at start
// of line (may be the empty terminator), 0 = mid-line (reading a trailer field).
case ChunkedState::CHUNK_TRAILER:
if (c == '\n') {
if (this->chunk_remaining_ != 0) {
this->chunk_state_ = ChunkedState::COMPLETE; // Empty line terminates trailers
} else {
this->chunk_remaining_ = 1; // End of trailer field, at start of next line
}
} else if (c != '\r') {
this->chunk_remaining_ = 0; // Non-CRLF char: reading a trailer field
}
// \r doesn't change the flag — it's part of \r\n line endings
break;
default:
break;
}
if (this->chunk_state_ == ChunkedState::COMPLETE) {
// Clear chunked flag and set content_length to actual decoded size so
// is_read_complete() returns true and callers exit their read loops
this->is_chunked_ = false;
this->content_length = this->bytes_read_;
}
}
return total_decoded;
}
void HttpContainerArduino::end() {
watchdog::WatchdogManager wdm(this->parent_->get_watchdog_timeout());
this->client_.end();

View File

@@ -18,6 +18,17 @@
namespace esphome::http_request {
class HttpRequestArduino;
/// State machine for decoding chunked transfer encoding on Arduino
enum class ChunkedState : uint8_t {
CHUNK_HEADER, ///< Reading hex digits of chunk size
CHUNK_HEADER_EXT, ///< Skipping chunk extensions until \n
CHUNK_DATA, ///< Reading chunk data bytes
CHUNK_DATA_TRAIL, ///< Skipping \r\n after chunk data
CHUNK_TRAILER, ///< Consuming trailer headers after final 0-size chunk
COMPLETE, ///< Finished: final chunk and trailers consumed
};
class HttpContainerArduino : public HttpContainer {
public:
int read(uint8_t *buf, size_t max_len) override;
@@ -26,6 +37,13 @@ class HttpContainerArduino : public HttpContainer {
protected:
friend class HttpRequestArduino;
HTTPClient client_{};
/// Decode chunked transfer encoding from the raw stream
int read_chunked_(uint8_t *buf, size_t max_len, WiFiClient *stream);
/// Transition from chunk header to data or trailer based on parsed size
void chunk_header_complete_();
ChunkedState chunk_state_{ChunkedState::CHUNK_HEADER};
size_t chunk_remaining_{0}; ///< Bytes remaining in current chunk
};
class HttpRequestArduino : public HttpRequestComponent {

View File

@@ -133,8 +133,10 @@ uint8_t OtaHttpRequestComponent::do_ota_() {
auto result = http_read_loop_result(bufsize_or_error, last_data_time, read_timeout, container->is_read_complete());
if (result == HttpReadLoopResult::RETRY)
continue;
// Note: COMPLETE is currently unreachable since the loop condition checks bytes_read < content_length,
// but this is defensive code in case chunked transfer encoding support is added for OTA in the future.
// For non-chunked responses, COMPLETE is unreachable (loop condition checks bytes_read < content_length).
// For chunked responses, the decoder sets content_length = bytes_read when the final chunk arrives,
// which causes the loop condition to terminate. But COMPLETE can still be returned if the decoder
// finishes mid-read, so this is needed for correctness.
if (result == HttpReadLoopResult::COMPLETE)
break;
if (result != HttpReadLoopResult::DATA) {

View File

@@ -295,7 +295,7 @@ size_t parse_hex(const char *str, size_t length, uint8_t *data, size_t count) {
size_t chars = std::min(length, 2 * count);
for (size_t i = 2 * count - chars; i < 2 * count; i++, str++) {
uint8_t val = parse_hex_char(*str);
if (val > 15)
if (val == INVALID_HEX_CHAR)
return 0;
data[i >> 1] = (i & 1) ? data[i >> 1] | val : val << 4;
}

View File

@@ -874,6 +874,9 @@ template<typename T, enable_if_t<std::is_unsigned<T>::value, int> = 0> optional<
}
/// Parse a hex character to its nibble value (0-15), returns 255 on invalid input
/// Returned by parse_hex_char() for non-hex characters.
static constexpr uint8_t INVALID_HEX_CHAR = 255;
constexpr uint8_t parse_hex_char(char c) {
if (c >= '0' && c <= '9')
return c - '0';
@@ -881,7 +884,7 @@ constexpr uint8_t parse_hex_char(char c) {
return c - 'A' + 10;
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
return 255;
return INVALID_HEX_CHAR;
}
/// Convert a nibble (0-15) to hex char with specified base ('a' for lowercase, 'A' for uppercase)