From 266c2ef337fa5e683c6576180c6f162f743addac Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Wed, 12 Mar 2025 14:18:31 -0500 Subject: [PATCH] [audio, mixer] Memory and CPU performance improvements (#8387) --- esphome/components/audio/__init__.py | 2 +- esphome/components/audio/audio_decoder.cpp | 54 +++++++++++++++---- esphome/components/audio/audio_reader.cpp | 4 +- esphome/components/audio/audio_resampler.cpp | 6 ++- .../audio/audio_transfer_buffer.cpp | 28 ++++++---- .../components/audio/audio_transfer_buffer.h | 9 +++- .../mixer/speaker/mixer_speaker.cpp | 3 +- platformio.ini | 4 +- 8 files changed, 80 insertions(+), 30 deletions(-) diff --git a/esphome/components/audio/__init__.py b/esphome/components/audio/__init__.py index 31d3c39ffa..c5ef781060 100644 --- a/esphome/components/audio/__init__.py +++ b/esphome/components/audio/__init__.py @@ -118,4 +118,4 @@ def final_validate_audio_schema( async def to_code(config): - cg.add_library("esphome/esp-audio-libs", "1.1.1") + cg.add_library("esphome/esp-audio-libs", "1.1.2") diff --git a/esphome/components/audio/audio_decoder.cpp b/esphome/components/audio/audio_decoder.cpp index ab358ad805..60489d7d78 100644 --- a/esphome/components/audio/audio_decoder.cpp +++ b/esphome/components/audio/audio_decoder.cpp @@ -66,19 +66,30 @@ esp_err_t AudioDecoder::start(AudioFileType audio_file_type) { case AudioFileType::FLAC: this->flac_decoder_ = make_unique(); this->free_buffer_required_ = - this->output_transfer_buffer_->capacity(); // We'll revise this after reading the header + this->output_transfer_buffer_->capacity(); // Adjusted and reallocated after reading the header break; #endif #ifdef USE_AUDIO_MP3_SUPPORT case AudioFileType::MP3: this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder(); + + // MP3 always has 1152 samples per chunk this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2; // samples * size per sample * channels + + // Always reallocate the output transfer buffer to the smallest necessary size + this->output_transfer_buffer_->reallocate(this->free_buffer_required_); break; #endif case AudioFileType::WAV: this->wav_decoder_ = make_unique(); this->wav_decoder_->reset(); + + // Processing WAVs doesn't actually require a specific amount of buffer size, as it is already in PCM format. + // Thus, we don't reallocate to a minimum size. this->free_buffer_required_ = 1024; + if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) { + this->output_transfer_buffer_->reallocate(this->free_buffer_required_); + } break; case AudioFileType::NONE: default: @@ -116,10 +127,18 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { uint32_t decoding_start = millis(); + bool first_loop_iteration = true; + + size_t bytes_processed = 0; + size_t bytes_available_before_processing = 0; + while (state == FileDecoderState::MORE_TO_PROCESS) { // Transfer decoded out if (!this->pause_output_) { - size_t bytes_written = this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS)); + // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves + size_t bytes_written = + this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false); + if (this->audio_stream_info_.has_value()) { this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written); this->playback_ms_ += @@ -138,12 +157,24 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { // Decode more audio - size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS)); + // Only shift data on the first loop iteration to avoid unnecessary, slow moves + size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), + first_loop_iteration); - if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) { + if (!first_loop_iteration && (this->input_transfer_buffer_->available() < bytes_processed)) { + // Less data is available than what was processed in last iteration, so don't attempt to decode. + // This attempts to avoid the decoder from consistently trying to decode an incomplete frame. The transfer buffer + // will shift the remaining data to the start and copy more from the source the next time the decode function is + // called + break; + } + + bytes_available_before_processing = this->input_transfer_buffer_->available(); + + if ((this->potentially_failed_count_ > 10) && (bytes_read == 0)) { // Failed to decode in last attempt and there is no new data - if (this->input_transfer_buffer_->free() == 0) { + if ((this->input_transfer_buffer_->free() == 0) && first_loop_iteration) { // The input buffer is full. Since it previously failed on the exact same data, we can never recover state = FileDecoderState::FAILED; } else { @@ -175,6 +206,9 @@ AudioDecoderState AudioDecoder::decode(bool stop_gracefully) { } } + first_loop_iteration = false; + bytes_processed = bytes_available_before_processing - this->input_transfer_buffer_->available(); + if (state == FileDecoderState::POTENTIALLY_FAILED) { ++this->potentially_failed_count_; } else if (state == FileDecoderState::END_OF_FILE) { @@ -207,13 +241,11 @@ FileDecoderState AudioDecoder::decode_flac_() { size_t bytes_consumed = this->flac_decoder_->get_bytes_index(); this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed); + // Reallocate the output transfer buffer to the smallest necessary size this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes(); - if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) { - // Output buffer is not big enough - if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) { - // Couldn't reallocate output buffer - return FileDecoderState::FAILED; - } + if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) { + // Couldn't reallocate output buffer + return FileDecoderState::FAILED; } this->audio_stream_info_ = diff --git a/esphome/components/audio/audio_reader.cpp b/esphome/components/audio/audio_reader.cpp index 90b73a1f46..b82c6db9ee 100644 --- a/esphome/components/audio/audio_reader.cpp +++ b/esphome/components/audio/audio_reader.cpp @@ -259,14 +259,14 @@ AudioReaderState AudioReader::file_read_() { } AudioReaderState AudioReader::http_read_() { - this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS)); + this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false); if (esp_http_client_is_complete_data_received(this->client_)) { if (this->output_transfer_buffer_->available() == 0) { this->cleanup_connection_(); return AudioReaderState::FINISHED; } - } else { + } else if (this->output_transfer_buffer_->free() > 0) { size_t bytes_to_read = this->output_transfer_buffer_->free(); int received_len = esp_http_client_read(this->client_, (char *) this->output_transfer_buffer_->get_buffer_end(), bytes_to_read); diff --git a/esphome/components/audio/audio_resampler.cpp b/esphome/components/audio/audio_resampler.cpp index 05e9ff6ca1..a7621225a1 100644 --- a/esphome/components/audio/audio_resampler.cpp +++ b/esphome/components/audio/audio_resampler.cpp @@ -93,8 +93,9 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_d } if (!this->pause_output_) { - // Move audio data to the sink - this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS)); + // Move audio data to the sink without shifting the data in the output transfer buffer to avoid unnecessary, slow + // data moves + this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS), false); } else { // If paused, block to avoid wasting CPU resources delay(READ_WRITE_TIMEOUT_MS); @@ -115,6 +116,7 @@ AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_d if ((this->input_stream_info_.get_sample_rate() != this->output_stream_info_.get_sample_rate()) || (this->input_stream_info_.get_bits_per_sample() != this->output_stream_info_.get_bits_per_sample())) { + // Adjust gain by -3 dB to avoid clipping due to the resampling process esp_audio_libs::resampler::ResamplerResults results = this->resampler_->resample(this->input_transfer_buffer_->get_buffer_start(), this->output_transfer_buffer_->get_buffer_end(), frames_available, frames_free, -3); diff --git a/esphome/components/audio/audio_transfer_buffer.cpp b/esphome/components/audio/audio_transfer_buffer.cpp index 9b6067aac4..1566884c3d 100644 --- a/esphome/components/audio/audio_transfer_buffer.cpp +++ b/esphome/components/audio/audio_transfer_buffer.cpp @@ -33,12 +33,17 @@ size_t AudioTransferBuffer::free() const { if (this->buffer_size_ == 0) { return 0; } - return this->buffer_size_ - (this->buffer_length_ - (this->data_start_ - this->buffer_)); + return this->buffer_size_ - (this->buffer_length_ + (this->data_start_ - this->buffer_)); } void AudioTransferBuffer::decrease_buffer_length(size_t bytes) { this->buffer_length_ -= bytes; - this->data_start_ += bytes; + if (this->buffer_length_ > 0) { + this->data_start_ += bytes; + } else { + // All the data in the buffer has been consumed, reset the start pointer + this->data_start_ = this->buffer_; + } } void AudioTransferBuffer::increase_buffer_length(size_t bytes) { this->buffer_length_ += bytes; } @@ -71,7 +76,7 @@ bool AudioTransferBuffer::has_buffered_data() const { bool AudioTransferBuffer::reallocate(size_t new_buffer_size) { if (this->buffer_length_ > 0) { - // Already has data in the buffer, fail + // Buffer currently has data, so reallocation is impossible return false; } this->deallocate_buffer_(); @@ -106,12 +111,14 @@ void AudioTransferBuffer::deallocate_buffer_() { this->buffer_length_ = 0; } -size_t AudioSourceTransferBuffer::transfer_data_from_source(TickType_t ticks_to_wait) { - // Shift data in buffer to start - if (this->buffer_length_ > 0) { - memmove(this->buffer_, this->data_start_, this->buffer_length_); +size_t AudioSourceTransferBuffer::transfer_data_from_source(TickType_t ticks_to_wait, bool pre_shift) { + if (pre_shift) { + // Shift data in buffer to start + if (this->buffer_length_ > 0) { + memmove(this->buffer_, this->data_start_, this->buffer_length_); + } + this->data_start_ = this->buffer_; } - this->data_start_ = this->buffer_; size_t bytes_to_read = this->free(); size_t bytes_read = 0; @@ -125,7 +132,7 @@ size_t AudioSourceTransferBuffer::transfer_data_from_source(TickType_t ticks_to_ return bytes_read; } -size_t AudioSinkTransferBuffer::transfer_data_to_sink(TickType_t ticks_to_wait) { +size_t AudioSinkTransferBuffer::transfer_data_to_sink(TickType_t ticks_to_wait, bool post_shift) { size_t bytes_written = 0; if (this->available()) { #ifdef USE_SPEAKER @@ -139,11 +146,14 @@ size_t AudioSinkTransferBuffer::transfer_data_to_sink(TickType_t ticks_to_wait) } this->decrease_buffer_length(bytes_written); + } + if (post_shift) { // Shift unwritten data to the start of the buffer memmove(this->buffer_, this->data_start_, this->buffer_length_); this->data_start_ = this->buffer_; } + return bytes_written; } diff --git a/esphome/components/audio/audio_transfer_buffer.h b/esphome/components/audio/audio_transfer_buffer.h index 4e461db56d..edb484e7d2 100644 --- a/esphome/components/audio/audio_transfer_buffer.h +++ b/esphome/components/audio/audio_transfer_buffer.h @@ -60,6 +60,7 @@ class AudioTransferBuffer { protected: /// @brief Allocates the transfer buffer in external memory, if available. + /// @param buffer_size The number of bytes to allocate /// @return True is successful, false otherwise. bool allocate_buffer_(size_t buffer_size); @@ -89,8 +90,10 @@ class AudioSinkTransferBuffer : public AudioTransferBuffer { /// @brief Writes any available data in the transfer buffer to the sink. /// @param ticks_to_wait FreeRTOS ticks to block while waiting for the sink to have enough space + /// @param post_shift If true, all remaining data is moved to the start of the buffer after transferring to the sink. + /// Defaults to true. /// @return Number of bytes written - size_t transfer_data_to_sink(TickType_t ticks_to_wait); + size_t transfer_data_to_sink(TickType_t ticks_to_wait, bool post_shift = true); /// @brief Adds a ring buffer as the transfer buffer's sink. /// @param ring_buffer weak_ptr to the allocated ring buffer @@ -125,8 +128,10 @@ class AudioSourceTransferBuffer : public AudioTransferBuffer { /// @brief Reads any available data from the sink into the transfer buffer. /// @param ticks_to_wait FreeRTOS ticks to block while waiting for the source to have enough data + /// @param pre_shift If true, any unwritten data is moved to the start of the buffer before transferring from the + /// source. Defaults to true. /// @return Number of bytes read - size_t transfer_data_from_source(TickType_t ticks_to_wait); + size_t transfer_data_from_source(TickType_t ticks_to_wait, bool pre_shift = true); /// @brief Adds a ring buffer as the transfer buffer's source. /// @param ring_buffer weak_ptr to the allocated ring buffer diff --git a/esphome/components/mixer/speaker/mixer_speaker.cpp b/esphome/components/mixer/speaker/mixer_speaker.cpp index 60cff95eb2..d9231154a3 100644 --- a/esphome/components/mixer/speaker/mixer_speaker.cpp +++ b/esphome/components/mixer/speaker/mixer_speaker.cpp @@ -490,7 +490,8 @@ void MixerSpeaker::audio_mixer_task(void *params) { break; } - output_transfer_buffer->transfer_data_to_sink(pdMS_TO_TICKS(TASK_DELAY_MS)); + // Never shift the data in the output transfer buffer to avoid unnecessary, slow data moves + output_transfer_buffer->transfer_data_to_sink(pdMS_TO_TICKS(TASK_DELAY_MS), false); const uint32_t output_frames_free = this_mixer->audio_stream_info_.value().bytes_to_frames(output_transfer_buffer->free()); diff --git a/platformio.ini b/platformio.ini index fab7fda659..a2c2a74ac0 100644 --- a/platformio.ini +++ b/platformio.ini @@ -128,7 +128,7 @@ lib_deps = DNSServer ; captive_portal (Arduino built-in) esphome/ESP32-audioI2S@2.0.7 ; i2s_audio droscy/esp_wireguard@0.4.2 ; wireguard - esphome/esp-audio-libs@1.1.1 ; audio + esphome/esp-audio-libs@1.1.2 ; audio build_flags = ${common:arduino.build_flags} @@ -149,7 +149,7 @@ lib_deps = ${common:idf.lib_deps} droscy/esp_wireguard@0.4.2 ; wireguard kahrendt/ESPMicroSpeechFeatures@1.1.0 ; micro_wake_word - esphome/esp-audio-libs@1.1.1 ; audio + esphome/esp-audio-libs@1.1.2 ; audio build_flags = ${common:idf.build_flags} -Wno-nonnull-compare