mirror of
https://github.com/esphome/esphome.git
synced 2025-09-25 06:32:22 +01:00
Fix defer() thread safety issues on multi-core platforms (#9317)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -73,8 +73,6 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
|
||||
if (delay == SCHEDULER_DONT_RUN)
|
||||
return;
|
||||
|
||||
const auto now = this->millis_();
|
||||
|
||||
// Create and populate the scheduler item
|
||||
auto item = make_unique<SchedulerItem>();
|
||||
item->component = component;
|
||||
@@ -83,6 +81,19 @@ void HOT Scheduler::set_timer_common_(Component *component, SchedulerItem::Type
|
||||
item->callback = std::move(func);
|
||||
item->remove = false;
|
||||
|
||||
#if !defined(USE_ESP8266) && !defined(USE_RP2040)
|
||||
// Special handling for defer() (delay = 0, type = TIMEOUT)
|
||||
// ESP8266 and RP2040 are excluded because they don't need thread-safe defer handling
|
||||
if (delay == 0 && type == SchedulerItem::TIMEOUT) {
|
||||
// Put in defer queue for guaranteed FIFO execution
|
||||
LockGuard guard{this->lock_};
|
||||
this->defer_queue_.push_back(std::move(item));
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
const auto now = this->millis_();
|
||||
|
||||
// Type-specific setup
|
||||
if (type == SchedulerItem::INTERVAL) {
|
||||
item->interval = delay;
|
||||
@@ -209,6 +220,35 @@ optional<uint32_t> HOT Scheduler::next_schedule_in() {
|
||||
return item->next_execution_ - now;
|
||||
}
|
||||
void HOT Scheduler::call() {
|
||||
#if !defined(USE_ESP8266) && !defined(USE_RP2040)
|
||||
// Process defer queue first to guarantee FIFO execution order for deferred items.
|
||||
// Previously, defer() used the heap which gave undefined order for equal timestamps,
|
||||
// causing race conditions on multi-core systems (ESP32, BK7200).
|
||||
// With the defer queue:
|
||||
// - Deferred items (delay=0) go directly to defer_queue_ in set_timer_common_
|
||||
// - Items execute in exact order they were deferred (FIFO guarantee)
|
||||
// - No deferred items exist in to_add_, so processing order doesn't affect correctness
|
||||
// ESP8266 and RP2040 don't use this queue - they fall back to the heap-based approach
|
||||
// (ESP8266: single-core, RP2040: empty mutex implementation).
|
||||
while (!this->defer_queue_.empty()) {
|
||||
// The outer check is done without a lock for performance. If the queue
|
||||
// appears non-empty, we lock and process an item. We don't need to check
|
||||
// empty() again inside the lock because only this thread can remove items.
|
||||
std::unique_ptr<SchedulerItem> item;
|
||||
{
|
||||
LockGuard lock(this->lock_);
|
||||
item = std::move(this->defer_queue_.front());
|
||||
this->defer_queue_.pop_front();
|
||||
}
|
||||
|
||||
// Execute callback without holding lock to prevent deadlocks
|
||||
// if the callback tries to call defer() again
|
||||
if (!this->should_skip_item_(item.get())) {
|
||||
this->execute_item_(item.get());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
const auto now = this->millis_();
|
||||
this->process_to_add();
|
||||
|
||||
@@ -282,8 +322,6 @@ void HOT Scheduler::call() {
|
||||
this->pop_raw_();
|
||||
continue;
|
||||
}
|
||||
App.set_current_component(item->component);
|
||||
|
||||
#ifdef ESPHOME_DEBUG_SCHEDULER
|
||||
const char *item_name = item->get_name();
|
||||
ESP_LOGV(TAG, "Running %s '%s/%s' with interval=%" PRIu32 " next_execution=%" PRIu64 " (now=%" PRIu64 ")",
|
||||
@@ -294,13 +332,7 @@ void HOT Scheduler::call() {
|
||||
// Warning: During callback(), a lot of stuff can happen, including:
|
||||
// - timeouts/intervals get added, potentially invalidating vector pointers
|
||||
// - timeouts/intervals get cancelled
|
||||
{
|
||||
uint32_t now_ms = millis();
|
||||
WarnIfComponentBlockingGuard guard{item->component, now_ms};
|
||||
item->callback();
|
||||
// Call finish to ensure blocking time is properly calculated and reported
|
||||
guard.finish();
|
||||
}
|
||||
this->execute_item_(item.get());
|
||||
}
|
||||
|
||||
{
|
||||
@@ -364,6 +396,26 @@ void HOT Scheduler::push_(std::unique_ptr<Scheduler::SchedulerItem> item) {
|
||||
LockGuard guard{this->lock_};
|
||||
this->to_add_.push_back(std::move(item));
|
||||
}
|
||||
// Helper function to check if item matches criteria for cancellation
|
||||
bool HOT Scheduler::matches_item_(const std::unique_ptr<SchedulerItem> &item, Component *component,
|
||||
const char *name_cstr, SchedulerItem::Type type) {
|
||||
if (item->component != component || item->type != type || item->remove) {
|
||||
return false;
|
||||
}
|
||||
const char *item_name = item->get_name();
|
||||
return item_name != nullptr && strcmp(name_cstr, item_name) == 0;
|
||||
}
|
||||
|
||||
// Helper to execute a scheduler item
|
||||
void HOT Scheduler::execute_item_(SchedulerItem *item) {
|
||||
App.set_current_component(item->component);
|
||||
|
||||
uint32_t now_ms = millis();
|
||||
WarnIfComponentBlockingGuard guard{item->component, now_ms};
|
||||
item->callback();
|
||||
guard.finish();
|
||||
}
|
||||
|
||||
// Common implementation for cancel operations
|
||||
bool HOT Scheduler::cancel_item_common_(Component *component, bool is_static_string, const void *name_ptr,
|
||||
SchedulerItem::Type type) {
|
||||
@@ -379,19 +431,28 @@ bool HOT Scheduler::cancel_item_common_(Component *component, bool is_static_str
|
||||
LockGuard guard{this->lock_};
|
||||
bool ret = false;
|
||||
|
||||
for (auto &it : this->items_) {
|
||||
const char *item_name = it->get_name();
|
||||
if (it->component == component && item_name != nullptr && strcmp(name_cstr, item_name) == 0 && it->type == type &&
|
||||
!it->remove) {
|
||||
to_remove_++;
|
||||
it->remove = true;
|
||||
// Check all containers for matching items
|
||||
#if !defined(USE_ESP8266) && !defined(USE_RP2040)
|
||||
// Only check defer_queue_ on platforms that have it
|
||||
for (auto &item : this->defer_queue_) {
|
||||
if (this->matches_item_(item, component, name_cstr, type)) {
|
||||
item->remove = true;
|
||||
ret = true;
|
||||
}
|
||||
}
|
||||
for (auto &it : this->to_add_) {
|
||||
const char *item_name = it->get_name();
|
||||
if (it->component == component && item_name != nullptr && strcmp(name_cstr, item_name) == 0 && it->type == type) {
|
||||
it->remove = true;
|
||||
#endif
|
||||
|
||||
for (auto &item : this->items_) {
|
||||
if (this->matches_item_(item, component, name_cstr, type)) {
|
||||
item->remove = true;
|
||||
ret = true;
|
||||
this->to_remove_++; // Only track removals for heap items
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &item : this->to_add_) {
|
||||
if (this->matches_item_(item, component, name_cstr, type)) {
|
||||
item->remove = true;
|
||||
ret = true;
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user