mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-30 22:53:59 +00:00 
			
		
		
		
	[i2s_audio, mixer, resampler, speaker] Simplify duration played callback (#8703)
This commit is contained in:
		| @@ -14,6 +14,8 @@ | |||||||
| #include "esphome/core/hal.h" | #include "esphome/core/hal.h" | ||||||
| #include "esphome/core/log.h" | #include "esphome/core/log.h" | ||||||
|  |  | ||||||
|  | #include "esp_timer.h" | ||||||
|  |  | ||||||
| namespace esphome { | namespace esphome { | ||||||
| namespace i2s_audio { | namespace i2s_audio { | ||||||
|  |  | ||||||
| @@ -366,25 +368,15 @@ void I2SAudioSpeaker::speaker_task(void *params) { | |||||||
|                             bytes_to_write, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5)); |                             bytes_to_write, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5)); | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|           uint32_t write_timestamp = micros(); |           int64_t now = esp_timer_get_time(); | ||||||
|  |  | ||||||
|           if (bytes_written != bytes_to_write) { |           if (bytes_written != bytes_to_write) { | ||||||
|             xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE); |             xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE); | ||||||
|           } |           } | ||||||
|  |  | ||||||
|           bytes_read -= bytes_written; |           bytes_read -= bytes_written; | ||||||
|  |  | ||||||
|           this_speaker->accumulated_frames_written_ += audio_stream_info.bytes_to_frames(bytes_written); |           this_speaker->audio_output_callback_(audio_stream_info.bytes_to_frames(bytes_written), | ||||||
|           const uint32_t new_playback_ms = |                                                now + dma_buffers_duration_ms * 1000); | ||||||
|               audio_stream_info.frames_to_milliseconds_with_remainder(&this_speaker->accumulated_frames_written_); |  | ||||||
|           const uint32_t remainder_us = |  | ||||||
|               audio_stream_info.frames_to_microseconds(this_speaker->accumulated_frames_written_); |  | ||||||
|  |  | ||||||
|           uint32_t pending_frames = |  | ||||||
|               audio_stream_info.bytes_to_frames(bytes_read + this_speaker->audio_ring_buffer_->available()); |  | ||||||
|           const uint32_t pending_ms = audio_stream_info.frames_to_milliseconds_with_remainder(&pending_frames); |  | ||||||
|  |  | ||||||
|           this_speaker->audio_output_callback_(new_playback_ms, remainder_us, pending_ms, write_timestamp); |  | ||||||
|  |  | ||||||
|           tx_dma_underflow = false; |           tx_dma_underflow = false; | ||||||
|           last_data_received_time = millis(); |           last_data_received_time = millis(); | ||||||
|   | |||||||
| @@ -53,12 +53,13 @@ void SourceSpeaker::dump_config() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void SourceSpeaker::setup() { | void SourceSpeaker::setup() { | ||||||
|   this->parent_->get_output_speaker()->add_audio_output_callback( |   this->parent_->get_output_speaker()->add_audio_output_callback([this](uint32_t new_frames, int64_t write_timestamp) { | ||||||
|       [this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) { |     // The SourceSpeaker may not have included any audio in the mixed output, so verify there were pending frames | ||||||
|         uint32_t personal_playback_ms = std::min(new_playback_ms, this->pending_playback_ms_); |     uint32_t speakers_playback_frames = std::min(new_frames, this->pending_playback_frames_); | ||||||
|         if (personal_playback_ms > 0) { |     this->pending_playback_frames_ -= speakers_playback_frames; | ||||||
|           this->pending_playback_ms_ -= personal_playback_ms; |  | ||||||
|           this->audio_output_callback_(personal_playback_ms, remainder_us, this->pending_playback_ms_, write_timestamp); |     if (speakers_playback_frames > 0) { | ||||||
|  |       this->audio_output_callback_(speakers_playback_frames, write_timestamp); | ||||||
|     } |     } | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
| @@ -153,6 +154,7 @@ esp_err_t SourceSpeaker::start_() { | |||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   this->pending_playback_frames_ = 0;  // reset | ||||||
|   return this->parent_->start(this->audio_stream_info_); |   return this->parent_->start(this->audio_stream_info_); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -542,11 +544,7 @@ void MixerSpeaker::audio_mixer_task(void *params) { | |||||||
|  |  | ||||||
|         // Update source speaker buffer length |         // Update source speaker buffer length | ||||||
|         transfer_buffers_with_data[0]->decrease_buffer_length(active_stream_info.frames_to_bytes(frames_to_mix)); |         transfer_buffers_with_data[0]->decrease_buffer_length(active_stream_info.frames_to_bytes(frames_to_mix)); | ||||||
|         speakers_with_data[0]->accumulated_frames_read_ += frames_to_mix; |         speakers_with_data[0]->pending_playback_frames_ += frames_to_mix; | ||||||
|  |  | ||||||
|         // Add new audio duration to the source speaker pending playback |  | ||||||
|         speakers_with_data[0]->pending_playback_ms_ += |  | ||||||
|             active_stream_info.frames_to_milliseconds_with_remainder(&speakers_with_data[0]->accumulated_frames_read_); |  | ||||||
|  |  | ||||||
|         // Update output transfer buffer length |         // Update output transfer buffer length | ||||||
|         output_transfer_buffer->increase_buffer_length( |         output_transfer_buffer->increase_buffer_length( | ||||||
| @@ -586,10 +584,6 @@ void MixerSpeaker::audio_mixer_task(void *params) { | |||||||
|                           reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()), |                           reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()), | ||||||
|                           this_mixer->audio_stream_info_.value(), frames_to_mix); |                           this_mixer->audio_stream_info_.value(), frames_to_mix); | ||||||
|  |  | ||||||
|         speakers_with_data[i]->pending_playback_ms_ += |  | ||||||
|             speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder( |  | ||||||
|                 &speakers_with_data[i]->accumulated_frames_read_); |  | ||||||
|  |  | ||||||
|         if (i != transfer_buffers_with_data.size() - 1) { |         if (i != transfer_buffers_with_data.size() - 1) { | ||||||
|           // Need to mix more streams together, point primary buffer and stream info to the already mixed output |           // Need to mix more streams together, point primary buffer and stream info to the already mixed output | ||||||
|           primary_buffer = reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()); |           primary_buffer = reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()); | ||||||
| @@ -601,11 +595,7 @@ void MixerSpeaker::audio_mixer_task(void *params) { | |||||||
|       for (int i = 0; i < transfer_buffers_with_data.size(); ++i) { |       for (int i = 0; i < transfer_buffers_with_data.size(); ++i) { | ||||||
|         transfer_buffers_with_data[i]->decrease_buffer_length( |         transfer_buffers_with_data[i]->decrease_buffer_length( | ||||||
|             speakers_with_data[i]->get_audio_stream_info().frames_to_bytes(frames_to_mix)); |             speakers_with_data[i]->get_audio_stream_info().frames_to_bytes(frames_to_mix)); | ||||||
|         speakers_with_data[i]->accumulated_frames_read_ += frames_to_mix; |         speakers_with_data[i]->pending_playback_frames_ += frames_to_mix; | ||||||
|  |  | ||||||
|         speakers_with_data[i]->pending_playback_ms_ += |  | ||||||
|             speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder( |  | ||||||
|                 &speakers_with_data[i]->accumulated_frames_read_); |  | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       // Update output transfer buffer length |       // Update output transfer buffer length | ||||||
|   | |||||||
| @@ -114,9 +114,7 @@ class SourceSpeaker : public speaker::Speaker, public Component { | |||||||
|   uint32_t ducking_transition_samples_remaining_{0}; |   uint32_t ducking_transition_samples_remaining_{0}; | ||||||
|   uint32_t samples_per_ducking_step_{0}; |   uint32_t samples_per_ducking_step_{0}; | ||||||
|  |  | ||||||
|   uint32_t accumulated_frames_read_{0}; |   uint32_t pending_playback_frames_{0}; | ||||||
|  |  | ||||||
|   uint32_t pending_playback_ms_{0}; |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class MixerSpeaker : public Component { | class MixerSpeaker : public Component { | ||||||
|   | |||||||
| @@ -43,12 +43,17 @@ void ResamplerSpeaker::setup() { | |||||||
|     return; |     return; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   this->output_speaker_->add_audio_output_callback( |   this->output_speaker_->add_audio_output_callback([this](uint32_t new_frames, int64_t write_timestamp) { | ||||||
|       [this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) { |     if (this->audio_stream_info_.get_sample_rate() != this->target_stream_info_.get_sample_rate()) { | ||||||
|         int32_t adjustment = this->playback_differential_ms_; |       // Convert the number of frames from the target sample rate to the source sample rate. Track the remainder to | ||||||
|         this->playback_differential_ms_ -= adjustment; |       // avoid losing frames from integer division truncation. | ||||||
|         int32_t adjusted_playback_ms = static_cast<int32_t>(new_playback_ms) + adjustment; |       const uint64_t numerator = new_frames * this->audio_stream_info_.get_sample_rate() + this->callback_remainder_; | ||||||
|         this->audio_output_callback_(adjusted_playback_ms, remainder_us, pending_ms, write_timestamp); |       const uint64_t denominator = this->target_stream_info_.get_sample_rate(); | ||||||
|  |       this->callback_remainder_ = numerator % denominator; | ||||||
|  |       this->audio_output_callback_(numerator / denominator, write_timestamp); | ||||||
|  |     } else { | ||||||
|  |       this->audio_output_callback_(new_frames, write_timestamp); | ||||||
|  |     } | ||||||
|   }); |   }); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -283,7 +288,6 @@ void ResamplerSpeaker::resample_task(void *params) { | |||||||
|     xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED); |     xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   this_resampler->playback_differential_ms_ = 0; |  | ||||||
|   while (err == ESP_OK) { |   while (err == ESP_OK) { | ||||||
|     uint32_t event_bits = xEventGroupGetBits(this_resampler->event_group_); |     uint32_t event_bits = xEventGroupGetBits(this_resampler->event_group_); | ||||||
|  |  | ||||||
| @@ -295,8 +299,6 @@ void ResamplerSpeaker::resample_task(void *params) { | |||||||
|     int32_t ms_differential = 0; |     int32_t ms_differential = 0; | ||||||
|     audio::AudioResamplerState resampler_state = resampler->resample(false, &ms_differential); |     audio::AudioResamplerState resampler_state = resampler->resample(false, &ms_differential); | ||||||
|  |  | ||||||
|     this_resampler->playback_differential_ms_ += ms_differential; |  | ||||||
|  |  | ||||||
|     if (resampler_state == audio::AudioResamplerState::FINISHED) { |     if (resampler_state == audio::AudioResamplerState::FINISHED) { | ||||||
|       break; |       break; | ||||||
|     } else if (resampler_state == audio::AudioResamplerState::FAILED) { |     } else if (resampler_state == audio::AudioResamplerState::FAILED) { | ||||||
|   | |||||||
| @@ -100,7 +100,7 @@ class ResamplerSpeaker : public Component, public speaker::Speaker { | |||||||
|  |  | ||||||
|   uint32_t buffer_duration_ms_; |   uint32_t buffer_duration_ms_; | ||||||
|  |  | ||||||
|   int32_t playback_differential_ms_{0}; |   uint64_t callback_remainder_{0}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| }  // namespace resampler | }  // namespace resampler | ||||||
|   | |||||||
| @@ -106,16 +106,6 @@ void SpeakerMediaPlayer::setup() { | |||||||
|       ESP_LOGE(TAG, "Failed to create media pipeline"); |       ESP_LOGE(TAG, "Failed to create media pipeline"); | ||||||
|       this->mark_failed(); |       this->mark_failed(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Setup callback to track the duration of audio played by the media pipeline |  | ||||||
|     this->media_speaker_->add_audio_output_callback( |  | ||||||
|         [this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) { |  | ||||||
|           this->playback_ms_ += new_playback_ms; |  | ||||||
|           this->remainder_us_ = remainder_us; |  | ||||||
|           this->pending_ms_ = pending_ms; |  | ||||||
|           this->last_audio_write_timestamp_ = write_timestamp; |  | ||||||
|           this->playback_us_ = this->playback_ms_ * 1000 + this->remainder_us_; |  | ||||||
|         }); |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   ESP_LOGI(TAG, "Set up speaker media player"); |   ESP_LOGI(TAG, "Set up speaker media player"); | ||||||
| @@ -321,7 +311,6 @@ void SpeakerMediaPlayer::loop() { | |||||||
|   AudioPipelineState old_media_pipeline_state = this->media_pipeline_state_; |   AudioPipelineState old_media_pipeline_state = this->media_pipeline_state_; | ||||||
|   if (this->media_pipeline_ != nullptr) { |   if (this->media_pipeline_ != nullptr) { | ||||||
|     this->media_pipeline_state_ = this->media_pipeline_->process_state(); |     this->media_pipeline_state_ = this->media_pipeline_->process_state(); | ||||||
|     this->decoded_playback_ms_ = this->media_pipeline_->get_playback_ms(); |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) { |   if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) { | ||||||
| @@ -379,13 +368,6 @@ void SpeakerMediaPlayer::loop() { | |||||||
|       } else if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) { |       } else if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) { | ||||||
|         this->state = media_player::MEDIA_PLAYER_STATE_PLAYING; |         this->state = media_player::MEDIA_PLAYER_STATE_PLAYING; | ||||||
|       } else if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) { |       } else if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) { | ||||||
|         // Reset playback durations |  | ||||||
|         this->decoded_playback_ms_ = 0; |  | ||||||
|         this->playback_us_ = 0; |  | ||||||
|         this->playback_ms_ = 0; |  | ||||||
|         this->remainder_us_ = 0; |  | ||||||
|         this->pending_ms_ = 0; |  | ||||||
|  |  | ||||||
|         if (!media_playlist_.empty()) { |         if (!media_playlist_.empty()) { | ||||||
|           uint32_t timeout_ms = 0; |           uint32_t timeout_ms = 0; | ||||||
|           if (old_media_pipeline_state == AudioPipelineState::PLAYING) { |           if (old_media_pipeline_state == AudioPipelineState::PLAYING) { | ||||||
|   | |||||||
| @@ -73,10 +73,6 @@ class SpeakerMediaPlayer : public Component, public media_player::MediaPlayer { | |||||||
|  |  | ||||||
|   void play_file(audio::AudioFile *media_file, bool announcement, bool enqueue); |   void play_file(audio::AudioFile *media_file, bool announcement, bool enqueue); | ||||||
|  |  | ||||||
|   uint32_t get_playback_ms() const { return this->playback_ms_; } |  | ||||||
|   uint32_t get_playback_us() const { return this->playback_us_; } |  | ||||||
|   uint32_t get_decoded_playback_ms() const { return this->decoded_playback_ms_; } |  | ||||||
|  |  | ||||||
|   void set_playlist_delay_ms(AudioPipelineType pipeline_type, uint32_t delay_ms); |   void set_playlist_delay_ms(AudioPipelineType pipeline_type, uint32_t delay_ms); | ||||||
|  |  | ||||||
|  protected: |  protected: | ||||||
| @@ -141,13 +137,6 @@ class SpeakerMediaPlayer : public Component, public media_player::MediaPlayer { | |||||||
|   Trigger<> *mute_trigger_ = new Trigger<>(); |   Trigger<> *mute_trigger_ = new Trigger<>(); | ||||||
|   Trigger<> *unmute_trigger_ = new Trigger<>(); |   Trigger<> *unmute_trigger_ = new Trigger<>(); | ||||||
|   Trigger<float> *volume_trigger_ = new Trigger<float>(); |   Trigger<float> *volume_trigger_ = new Trigger<float>(); | ||||||
|  |  | ||||||
|   uint32_t decoded_playback_ms_{0}; |  | ||||||
|   uint32_t playback_us_{0}; |  | ||||||
|   uint32_t playback_ms_{0}; |  | ||||||
|   uint32_t remainder_us_{0}; |  | ||||||
|   uint32_t pending_ms_{0}; |  | ||||||
|   uint32_t last_audio_write_timestamp_{0}; |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| }  // namespace speaker | }  // namespace speaker | ||||||
|   | |||||||
| @@ -104,12 +104,9 @@ class Speaker { | |||||||
|  |  | ||||||
|   /// Callback function for sending the duration of the audio written to the speaker since the last callback. |   /// Callback function for sending the duration of the audio written to the speaker since the last callback. | ||||||
|   /// Parameters: |   /// Parameters: | ||||||
|   ///   - Duration in milliseconds. Never rounded and should always be less than or equal to the actual duration. |   ///   - Frames played | ||||||
|   ///   - Remainder duration in microseconds. Rounded duration after subtracting the previous parameter from the actual |   ///   - System time in microseconds when the frames were written to the DAC | ||||||
|   ///     duration. |   void add_audio_output_callback(std::function<void(uint32_t, int64_t)> &&callback) { | ||||||
|   ///   - Duration of remaining, unwritten audio buffered in the speaker in milliseconds. |  | ||||||
|   ///   - System time in microseconds when the last write was completed. |  | ||||||
|   void add_audio_output_callback(std::function<void(uint32_t, uint32_t, uint32_t, uint32_t)> &&callback) { |  | ||||||
|     this->audio_output_callback_.add(std::move(callback)); |     this->audio_output_callback_.add(std::move(callback)); | ||||||
|   } |   } | ||||||
|  |  | ||||||
| @@ -123,7 +120,7 @@ class Speaker { | |||||||
|   audio_dac::AudioDac *audio_dac_{nullptr}; |   audio_dac::AudioDac *audio_dac_{nullptr}; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|   CallbackManager<void(uint32_t, uint32_t, uint32_t, uint32_t)> audio_output_callback_{}; |   CallbackManager<void(uint32_t, int64_t)> audio_output_callback_{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| }  // namespace speaker | }  // namespace speaker | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user