From 0246a8eb1de7f3bd820d29df75c2ab4dc723193c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Sep 2025 23:48:51 -0500 Subject: [PATCH] [usb_host] Fix double-free crash with lock-free atomic pool allocation (#10926) --- esphome/components/usb_host/usb_host.h | 47 +++++++--- .../components/usb_host/usb_host_client.cpp | 85 ++++++++++++++++--- esphome/components/usb_uart/usb_uart.cpp | 13 ++- 3 files changed, 117 insertions(+), 28 deletions(-) diff --git a/esphome/components/usb_host/usb_host.h b/esphome/components/usb_host/usb_host.h index 8cf313aa9b..4f8d2ec9a8 100644 --- a/esphome/components/usb_host/usb_host.h +++ b/esphome/components/usb_host/usb_host.h @@ -9,11 +9,31 @@ #include #include "esphome/core/lock_free_queue.h" #include "esphome/core/event_pool.h" -#include +#include namespace esphome { namespace usb_host { +// THREADING MODEL: +// This component uses a dedicated USB task for event processing to prevent data loss. +// - USB Task (high priority): Handles USB events, executes transfer callbacks +// - Main Loop Task: Initiates transfers, processes completion events +// +// Thread-safe communication: +// - Lock-free queues for USB task -> main loop events (SPSC pattern) +// - Lock-free TransferRequest pool using atomic bitmask (MCSP pattern) +// +// TransferRequest pool access pattern: +// - get_trq_() [allocate]: Called from BOTH USB task and main loop threads +// * USB task: via USB UART input callbacks that restart transfers immediately +// * Main loop: for output transfers and flow-controlled input restarts +// - release_trq() [deallocate]: Called from main loop thread only +// +// The multi-threaded allocation is intentional for performance: +// - USB task can immediately restart input transfers without context switching +// - Main loop controls backpressure by deciding when to restart after consuming data +// The atomic bitmask ensures thread-safe allocation without mutex blocking. + static const char *const TAG = "usb_host"; // Forward declarations @@ -32,7 +52,8 @@ static const uint8_t USB_DIR_IN = 1 << 7; static const uint8_t USB_DIR_OUT = 0; static const size_t SETUP_PACKET_SIZE = 8; -static const size_t MAX_REQUESTS = 16; // maximum number of outstanding requests possible. +static const size_t MAX_REQUESTS = 16; // maximum number of outstanding requests possible. +static_assert(MAX_REQUESTS <= 16, "MAX_REQUESTS must be <= 16 to fit in uint16_t bitmask"); static constexpr size_t USB_EVENT_QUEUE_SIZE = 32; // Size of event queue between USB task and main loop static constexpr size_t USB_TASK_STACK_SIZE = 4096; // Stack size for USB task (same as ESP-IDF USB examples) static constexpr UBaseType_t USB_TASK_PRIORITY = 5; // Higher priority than main loop (tskIDLE_PRIORITY + 5) @@ -98,13 +119,7 @@ class USBClient : public Component { friend class USBHost; public: - USBClient(uint16_t vid, uint16_t pid) : vid_(vid), pid_(pid) { init_pool(); } - - void init_pool() { - this->trq_pool_.clear(); - for (size_t i = 0; i != MAX_REQUESTS; i++) - this->trq_pool_.push_back(&this->requests_[i]); - } + USBClient(uint16_t vid, uint16_t pid) : vid_(vid), pid_(pid), trq_in_use_(0) {} void setup() override; void loop() override; // setup must happen after the host bus has been setup @@ -126,10 +141,13 @@ class USBClient : public Component { protected: bool register_(); - TransferRequest *get_trq_(); + TransferRequest *get_trq_(); // Lock-free allocation using atomic bitmask (multi-consumer safe) virtual void disconnect(); virtual void on_connected() {} - virtual void on_disconnected() { this->init_pool(); } + virtual void on_disconnected() { + // Reset all requests to available (all bits to 0) + this->trq_in_use_.store(0); + } // USB task management static void usb_task_fn(void *arg); @@ -143,7 +161,12 @@ class USBClient : public Component { int state_{USB_CLIENT_INIT}; uint16_t vid_{}; uint16_t pid_{}; - std::list trq_pool_{}; + // Lock-free pool management using atomic bitmask (no dynamic allocation) + // Bit i = 1: requests_[i] is in use, Bit i = 0: requests_[i] is available + // Supports multiple concurrent consumers (both threads can allocate) + // Single producer for deallocation (main loop only) + // Limited to 16 slots by uint16_t size (enforced by static_assert) + std::atomic trq_in_use_; TransferRequest requests_[MAX_REQUESTS]{}; }; class USBHost : public Component { diff --git a/esphome/components/usb_host/usb_host_client.cpp b/esphome/components/usb_host/usb_host_client.cpp index a9d4d42a8c..b26385a8ef 100644 --- a/esphome/components/usb_host/usb_host_client.cpp +++ b/esphome/components/usb_host/usb_host_client.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace esphome { namespace usb_host { @@ -185,9 +186,11 @@ void USBClient::setup() { this->mark_failed(); return; } - for (auto *trq : this->trq_pool_) { - usb_host_transfer_alloc(64, 0, &trq->transfer); - trq->client = this; + // Pre-allocate USB transfer buffers for all slots at startup + // This avoids any dynamic allocation during runtime + for (size_t i = 0; i < MAX_REQUESTS; i++) { + usb_host_transfer_alloc(64, 0, &this->requests_[i].transfer); + this->requests_[i].client = this; // Set once, never changes } // Create and start USB task @@ -347,17 +350,44 @@ static void control_callback(const usb_transfer_t *xfer) { queue_transfer_cleanup(trq, EVENT_CONTROL_COMPLETE); } +// THREAD CONTEXT: Called from both USB task and main loop threads (multi-consumer) +// - USB task: USB UART input callbacks restart transfers for immediate data reception +// - Main loop: Output transfers and flow-controlled input restarts after consuming data +// +// THREAD SAFETY: Lock-free using atomic compare-and-swap on bitmask +// This multi-threaded access is intentional for performance - USB task can +// immediately restart transfers without waiting for main loop scheduling. TransferRequest *USBClient::get_trq_() { - if (this->trq_pool_.empty()) { - ESP_LOGE(TAG, "Too many requests queued"); - return nullptr; + uint16_t mask = this->trq_in_use_.load(std::memory_order_relaxed); + + // Find first available slot (bit = 0) and try to claim it atomically + // We use a while loop to allow retrying the same slot after CAS failure + size_t i = 0; + while (i != MAX_REQUESTS) { + if (mask & (1U << i)) { + // Slot is in use, move to next slot + i++; + continue; + } + + // Slot i appears available, try to claim it atomically + uint16_t desired = mask | (1U << i); // Set bit i to mark as in-use + + if (this->trq_in_use_.compare_exchange_weak(mask, desired, std::memory_order_acquire, std::memory_order_relaxed)) { + // Successfully claimed slot i - prepare the TransferRequest + auto *trq = &this->requests_[i]; + trq->transfer->context = trq; + trq->transfer->device_handle = this->device_handle_; + return trq; + } + // CAS failed - another thread modified the bitmask + // mask was already updated by compare_exchange_weak with the current value + // No need to reload - the CAS already did that for us + i = 0; } - auto *trq = this->trq_pool_.front(); - this->trq_pool_.pop_front(); - trq->client = this; - trq->transfer->context = trq; - trq->transfer->device_handle = this->device_handle_; - return trq; + + ESP_LOGE(TAG, "All %d transfer slots in use", MAX_REQUESTS); + return nullptr; } void USBClient::disconnect() { this->on_disconnected(); @@ -370,6 +400,8 @@ void USBClient::disconnect() { this->device_addr_ = -1; } +// THREAD CONTEXT: Called from main loop thread only +// - Used for device configuration and control operations bool USBClient::control_transfer(uint8_t type, uint8_t request, uint16_t value, uint16_t index, const transfer_cb_t &callback, const std::vector &data) { auto *trq = this->get_trq_(); @@ -425,6 +457,9 @@ static void transfer_callback(usb_transfer_t *xfer) { } /** * Performs a transfer input operation. + * THREAD CONTEXT: Called from both USB task and main loop threads! + * - USB task: USB UART input callbacks call start_input() which calls this + * - Main loop: Initial setup and other components * * @param ep_address The endpoint address. * @param callback The callback function to be called when the transfer is complete. @@ -451,6 +486,9 @@ void USBClient::transfer_in(uint8_t ep_address, const transfer_cb_t &callback, u /** * Performs an output transfer operation. + * THREAD CONTEXT: Called from main loop thread only + * - USB UART output uses defer() to ensure main loop context + * - Modbus and other components call from loop() * * @param ep_address The endpoint address. * @param callback The callback function to be called when the transfer is complete. @@ -483,7 +521,28 @@ void USBClient::dump_config() { " Product id %04X", this->vid_, this->pid_); } -void USBClient::release_trq(TransferRequest *trq) { this->trq_pool_.push_back(trq); } +// THREAD CONTEXT: Only called from main loop thread (single producer for deallocation) +// - Via event processing when handling EVENT_TRANSFER_COMPLETE/EVENT_CONTROL_COMPLETE +// - Directly when transfer submission fails +// +// THREAD SAFETY: Lock-free using atomic AND to clear bit +// Single-producer pattern makes this simpler than allocation +void USBClient::release_trq(TransferRequest *trq) { + if (trq == nullptr) + return; + + // Calculate index from pointer arithmetic + size_t index = trq - this->requests_; + if (index >= MAX_REQUESTS) { + ESP_LOGE(TAG, "Invalid TransferRequest pointer"); + return; + } + + // Atomically clear bit i to mark slot as available + // fetch_and with inverted bitmask clears the bit atomically + uint16_t bit = 1U << index; + this->trq_in_use_.fetch_and(static_cast(~bit), std::memory_order_release); +} } // namespace usb_host } // namespace esphome diff --git a/esphome/components/usb_uart/usb_uart.cpp b/esphome/components/usb_uart/usb_uart.cpp index a8a8bc231c..29003e071e 100644 --- a/esphome/components/usb_uart/usb_uart.cpp +++ b/esphome/components/usb_uart/usb_uart.cpp @@ -216,9 +216,16 @@ void USBUartComponent::dump_config() { void USBUartComponent::start_input(USBUartChannel *channel) { if (!channel->initialised_.load() || channel->input_started_.load()) return; - // Note: This function is called from both USB task and main loop, so we cannot - // directly check ring buffer space here. Backpressure is handled by the chunk pool: - // when exhausted, USB input stops until chunks are freed by the main loop + // THREAD CONTEXT: Called from both USB task and main loop threads + // - USB task: Immediate restart after successful transfer for continuous data flow + // - Main loop: Controlled restart after consuming data (backpressure mechanism) + // + // This dual-thread access is intentional for performance: + // - USB task restarts avoid context switch delays for high-speed data + // - Main loop restarts provide flow control when buffers are full + // + // The underlying transfer_in() uses lock-free atomic allocation from the + // TransferRequest pool, making this multi-threaded access safe const auto *ep = channel->cdc_dev_.in_ep; // CALLBACK CONTEXT: This lambda is executed in USB task via transfer_callback auto callback = [this, channel](const usb_host::TransferStatus &status) {