mirror of
https://github.com/esphome/esphome.git
synced 2025-09-16 18:22:22 +01:00
Merge branch 'heap_scheduler_stress_component' into integration
This commit is contained in:
@@ -50,7 +50,8 @@ void HttpRequestUpdate::update_task(void *params) {
|
||||
|
||||
if (container == nullptr || container->status_code != HTTP_STATUS_OK) {
|
||||
std::string msg = str_sprintf("Failed to fetch manifest from %s", this_update->source_url_.c_str());
|
||||
this_update->status_set_error(msg.c_str());
|
||||
// Defer to main loop to avoid race condition on component_state_ read-modify-write
|
||||
this_update->defer([this_update, msg]() { this_update->status_set_error(msg.c_str()); });
|
||||
UPDATE_RETURN;
|
||||
}
|
||||
|
||||
@@ -58,7 +59,8 @@ void HttpRequestUpdate::update_task(void *params) {
|
||||
uint8_t *data = allocator.allocate(container->content_length);
|
||||
if (data == nullptr) {
|
||||
std::string msg = str_sprintf("Failed to allocate %zu bytes for manifest", container->content_length);
|
||||
this_update->status_set_error(msg.c_str());
|
||||
// Defer to main loop to avoid race condition on component_state_ read-modify-write
|
||||
this_update->defer([this_update, msg]() { this_update->status_set_error(msg.c_str()); });
|
||||
container->end();
|
||||
UPDATE_RETURN;
|
||||
}
|
||||
@@ -120,7 +122,8 @@ void HttpRequestUpdate::update_task(void *params) {
|
||||
|
||||
if (!valid) {
|
||||
std::string msg = str_sprintf("Failed to parse JSON from %s", this_update->source_url_.c_str());
|
||||
this_update->status_set_error(msg.c_str());
|
||||
// Defer to main loop to avoid race condition on component_state_ read-modify-write
|
||||
this_update->defer([this_update, msg]() { this_update->status_set_error(msg.c_str()); });
|
||||
UPDATE_RETURN;
|
||||
}
|
||||
|
||||
@@ -147,18 +150,34 @@ void HttpRequestUpdate::update_task(void *params) {
|
||||
this_update->update_info_.current_version = current_version;
|
||||
}
|
||||
|
||||
bool trigger_update_available = false;
|
||||
|
||||
if (this_update->update_info_.latest_version.empty() ||
|
||||
this_update->update_info_.latest_version == this_update->update_info_.current_version) {
|
||||
this_update->state_ = update::UPDATE_STATE_NO_UPDATE;
|
||||
} else {
|
||||
if (this_update->state_ != update::UPDATE_STATE_AVAILABLE) {
|
||||
trigger_update_available = true;
|
||||
}
|
||||
this_update->state_ = update::UPDATE_STATE_AVAILABLE;
|
||||
}
|
||||
|
||||
this_update->update_info_.has_progress = false;
|
||||
this_update->update_info_.progress = 0.0f;
|
||||
// Defer to main loop to ensure thread-safe execution of:
|
||||
// - status_clear_error() performs non-atomic read-modify-write on component_state_
|
||||
// - publish_state() triggers API callbacks that write to the shared protobuf buffer
|
||||
// which can be corrupted if accessed concurrently from task and main loop threads
|
||||
// - update_available trigger to ensure consistent state when the trigger fires
|
||||
this_update->defer([this_update, trigger_update_available]() {
|
||||
this_update->update_info_.has_progress = false;
|
||||
this_update->update_info_.progress = 0.0f;
|
||||
|
||||
this_update->status_clear_error();
|
||||
this_update->publish_state();
|
||||
this_update->status_clear_error();
|
||||
this_update->publish_state();
|
||||
|
||||
if (trigger_update_available) {
|
||||
this_update->get_update_available_trigger()->trigger(this_update->update_info_);
|
||||
}
|
||||
});
|
||||
|
||||
UPDATE_RETURN;
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "esphome/core/automation.h"
|
||||
#include "esphome/core/component.h"
|
||||
#include "esphome/core/entity_base.h"
|
||||
@@ -38,12 +39,19 @@ class UpdateEntity : public EntityBase, public EntityBase_DeviceClass {
|
||||
const UpdateState &state = state_;
|
||||
|
||||
void add_on_state_callback(std::function<void()> &&callback) { this->state_callback_.add(std::move(callback)); }
|
||||
Trigger<const UpdateInfo &> *get_update_available_trigger() {
|
||||
if (!update_available_trigger_) {
|
||||
update_available_trigger_ = std::make_unique<Trigger<const UpdateInfo &>>();
|
||||
}
|
||||
return update_available_trigger_.get();
|
||||
}
|
||||
|
||||
protected:
|
||||
UpdateState state_{UPDATE_STATE_UNKNOWN};
|
||||
UpdateInfo update_info_;
|
||||
|
||||
CallbackManager<void()> state_callback_{};
|
||||
std::unique_ptr<Trigger<const UpdateInfo &>> update_available_trigger_{nullptr};
|
||||
};
|
||||
|
||||
} // namespace update
|
||||
|
@@ -220,6 +220,9 @@ bool HOT Scheduler::cancel_retry(Component *component, const std::string &name)
|
||||
}
|
||||
|
||||
optional<uint32_t> HOT Scheduler::next_schedule_in() {
|
||||
// IMPORTANT: This method should only be called from the main thread (loop task).
|
||||
// It calls empty_() and accesses items_[0] without holding a lock, which is only
|
||||
// safe when called from the main thread. Other threads must not call this method.
|
||||
if (this->empty_())
|
||||
return {};
|
||||
auto &item = this->items_[0];
|
||||
@@ -291,29 +294,27 @@ void HOT Scheduler::call() {
|
||||
}
|
||||
#endif // ESPHOME_DEBUG_SCHEDULER
|
||||
|
||||
auto to_remove_was = this->to_remove_;
|
||||
auto items_was = this->items_.size();
|
||||
// If we have too many items to remove
|
||||
if (this->to_remove_ > MAX_LOGICALLY_DELETED_ITEMS) {
|
||||
// We hold the lock for the entire cleanup operation because:
|
||||
// 1. We're rebuilding the entire items_ list, so we need exclusive access throughout
|
||||
// 2. Other threads must see either the old state or the new state, not intermediate states
|
||||
// 3. The operation is already expensive (O(n)), so lock overhead is negligible
|
||||
// 4. No operations inside can block or take other locks, so no deadlock risk
|
||||
LockGuard guard{this->lock_};
|
||||
|
||||
std::vector<std::unique_ptr<SchedulerItem>> valid_items;
|
||||
while (!this->empty_()) {
|
||||
LockGuard guard{this->lock_};
|
||||
auto item = std::move(this->items_[0]);
|
||||
this->pop_raw_();
|
||||
valid_items.push_back(std::move(item));
|
||||
|
||||
// Move all non-removed items to valid_items
|
||||
for (auto &item : this->items_) {
|
||||
if (!item->remove) {
|
||||
valid_items.push_back(std::move(item));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
LockGuard guard{this->lock_};
|
||||
this->items_ = std::move(valid_items);
|
||||
}
|
||||
|
||||
// The following should not happen unless I'm missing something
|
||||
if (this->to_remove_ != 0) {
|
||||
ESP_LOGW(TAG, "to_remove_ was %" PRIu32 " now: %" PRIu32 " items where %zu now %zu. Please report this",
|
||||
to_remove_was, to_remove_, items_was, items_.size());
|
||||
this->to_remove_ = 0;
|
||||
}
|
||||
// Replace items_ with the filtered list
|
||||
this->items_ = std::move(valid_items);
|
||||
this->to_remove_ = 0;
|
||||
}
|
||||
|
||||
while (!this->empty_()) {
|
||||
@@ -383,17 +384,29 @@ void HOT Scheduler::process_to_add() {
|
||||
this->to_add_.clear();
|
||||
}
|
||||
void HOT Scheduler::cleanup_() {
|
||||
// Fast path: if nothing to remove, just return
|
||||
// Reading to_remove_ without lock is safe because:
|
||||
// 1. We only call this from the main thread during call()
|
||||
// 2. If it's 0, there's definitely nothing to cleanup
|
||||
// 3. If it becomes non-zero after we check, cleanup will happen next time
|
||||
if (this->to_remove_ == 0)
|
||||
return;
|
||||
|
||||
// We must hold the lock for the entire cleanup operation because:
|
||||
// 1. We're modifying items_ (via pop_raw_) which requires exclusive access
|
||||
// 2. We're decrementing to_remove_ which is also modified by other threads
|
||||
// (though all modifications are already under lock)
|
||||
// 3. Other threads read items_ when searching for items to cancel in cancel_item_locked_()
|
||||
// 4. We need a consistent view of items_ and to_remove_ throughout the operation
|
||||
// Without the lock, we could access items_ while another thread is reading it,
|
||||
// leading to race conditions
|
||||
LockGuard guard{this->lock_};
|
||||
while (!this->items_.empty()) {
|
||||
auto &item = this->items_[0];
|
||||
if (!item->remove)
|
||||
return;
|
||||
|
||||
this->to_remove_--;
|
||||
|
||||
{
|
||||
LockGuard guard{this->lock_};
|
||||
this->pop_raw_();
|
||||
}
|
||||
this->pop_raw_();
|
||||
}
|
||||
}
|
||||
void HOT Scheduler::pop_raw_() {
|
||||
|
@@ -99,9 +99,15 @@ class Scheduler {
|
||||
SchedulerItem(const SchedulerItem &) = delete;
|
||||
SchedulerItem &operator=(const SchedulerItem &) = delete;
|
||||
|
||||
// Default move operations
|
||||
SchedulerItem(SchedulerItem &&) = default;
|
||||
SchedulerItem &operator=(SchedulerItem &&) = default;
|
||||
// Delete move operations to prevent accidental moves of SchedulerItem objects.
|
||||
// This is intentional because:
|
||||
// 1. SchedulerItem contains a dynamically allocated name that requires careful ownership management
|
||||
// 2. The scheduler only moves unique_ptr<SchedulerItem>, never SchedulerItem objects directly
|
||||
// 3. Moving unique_ptr only transfers pointer ownership without moving the pointed-to object
|
||||
// 4. Deleting these operations makes it explicit that SchedulerItem objects should not be moved
|
||||
// 5. This prevents potential double-free bugs if the code is refactored to move SchedulerItem objects
|
||||
SchedulerItem(SchedulerItem &&) = delete;
|
||||
SchedulerItem &operator=(SchedulerItem &&) = delete;
|
||||
|
||||
// Helper to get the name regardless of storage type
|
||||
const char *get_name() const { return name_is_dynamic ? name_.dynamic_name : name_.static_name; }
|
||||
@@ -179,6 +185,12 @@ class Scheduler {
|
||||
return item->remove || (item->component != nullptr && item->component->is_failed());
|
||||
}
|
||||
|
||||
// Check if the scheduler has no items.
|
||||
// IMPORTANT: This method should only be called from the main thread (loop task).
|
||||
// It performs cleanup of removed items and checks if the queue is empty.
|
||||
// The items_.empty() check at the end is done without a lock for performance,
|
||||
// which is safe because this is only called from the main thread while other
|
||||
// threads only add items (never remove them).
|
||||
bool empty_() {
|
||||
this->cleanup_();
|
||||
return this->items_.empty();
|
||||
|
Reference in New Issue
Block a user