mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-30 22:53:59 +00:00 
			
		
		
		
	[audio] Media Player Components PR6 (#8168)
Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
		
							
								
								
									
										159
									
								
								esphome/components/audio/audio_resampler.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										159
									
								
								esphome/components/audio/audio_resampler.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,159 @@ | |||||||
|  | #include "audio_resampler.h" | ||||||
|  |  | ||||||
|  | #ifdef USE_ESP32 | ||||||
|  |  | ||||||
|  | #include "esphome/core/hal.h" | ||||||
|  |  | ||||||
|  | namespace esphome { | ||||||
|  | namespace audio { | ||||||
|  |  | ||||||
|  | static const uint32_t READ_WRITE_TIMEOUT_MS = 20; | ||||||
|  |  | ||||||
|  | AudioResampler::AudioResampler(size_t input_buffer_size, size_t output_buffer_size) | ||||||
|  |     : input_buffer_size_(input_buffer_size), output_buffer_size_(output_buffer_size) { | ||||||
|  |   this->input_transfer_buffer_ = AudioSourceTransferBuffer::create(input_buffer_size); | ||||||
|  |   this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | esp_err_t AudioResampler::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) { | ||||||
|  |   if (this->input_transfer_buffer_ != nullptr) { | ||||||
|  |     this->input_transfer_buffer_->set_source(input_ring_buffer); | ||||||
|  |     return ESP_OK; | ||||||
|  |   } | ||||||
|  |   return ESP_ERR_NO_MEM; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | esp_err_t AudioResampler::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) { | ||||||
|  |   if (this->output_transfer_buffer_ != nullptr) { | ||||||
|  |     this->output_transfer_buffer_->set_sink(output_ring_buffer); | ||||||
|  |     return ESP_OK; | ||||||
|  |   } | ||||||
|  |   return ESP_ERR_NO_MEM; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #ifdef USE_SPEAKER | ||||||
|  | esp_err_t AudioResampler::add_sink(speaker::Speaker *speaker) { | ||||||
|  |   if (this->output_transfer_buffer_ != nullptr) { | ||||||
|  |     this->output_transfer_buffer_->set_sink(speaker); | ||||||
|  |     return ESP_OK; | ||||||
|  |   } | ||||||
|  |   return ESP_ERR_NO_MEM; | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | esp_err_t AudioResampler::start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info, | ||||||
|  |                                 uint16_t number_of_taps, uint16_t number_of_filters) { | ||||||
|  |   this->input_stream_info_ = input_stream_info; | ||||||
|  |   this->output_stream_info_ = output_stream_info; | ||||||
|  |  | ||||||
|  |   if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) { | ||||||
|  |     return ESP_ERR_NO_MEM; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ((input_stream_info.get_bits_per_sample() > 32) || (output_stream_info.get_bits_per_sample() > 32) || | ||||||
|  |       (input_stream_info_.get_channels() != output_stream_info.get_channels())) { | ||||||
|  |     return ESP_ERR_NOT_SUPPORTED; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if ((input_stream_info.get_sample_rate() != output_stream_info.get_sample_rate()) || | ||||||
|  |       (input_stream_info.get_bits_per_sample() != output_stream_info.get_bits_per_sample())) { | ||||||
|  |     this->resampler_ = make_unique<esp_audio_libs::resampler::Resampler>( | ||||||
|  |         input_stream_info.bytes_to_samples(this->input_buffer_size_), | ||||||
|  |         output_stream_info.bytes_to_samples(this->output_buffer_size_)); | ||||||
|  |  | ||||||
|  |     // Use cascaded biquad filters when downsampling to avoid aliasing | ||||||
|  |     bool use_pre_filter = output_stream_info.get_sample_rate() < input_stream_info.get_sample_rate(); | ||||||
|  |  | ||||||
|  |     esp_audio_libs::resampler::ResamplerConfiguration resample_config = { | ||||||
|  |         .source_sample_rate = static_cast<float>(input_stream_info.get_sample_rate()), | ||||||
|  |         .target_sample_rate = static_cast<float>(output_stream_info.get_sample_rate()), | ||||||
|  |         .source_bits_per_sample = input_stream_info.get_bits_per_sample(), | ||||||
|  |         .target_bits_per_sample = output_stream_info.get_bits_per_sample(), | ||||||
|  |         .channels = input_stream_info_.get_channels(), | ||||||
|  |         .use_pre_or_post_filter = use_pre_filter, | ||||||
|  |         .subsample_interpolate = false,  // Doubles the CPU load. Using more filters is a better alternative | ||||||
|  |         .number_of_taps = number_of_taps, | ||||||
|  |         .number_of_filters = number_of_filters, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     if (!this->resampler_->initialize(resample_config)) { | ||||||
|  |       // Failed to allocate the resampler's internal buffers | ||||||
|  |       return ESP_ERR_NO_MEM; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return ESP_OK; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_differential) { | ||||||
|  |   if (stop_gracefully) { | ||||||
|  |     if (!this->input_transfer_buffer_->has_buffered_data() && (this->output_transfer_buffer_->available() == 0)) { | ||||||
|  |       return AudioResamplerState::FINISHED; | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   if (!this->pause_output_) { | ||||||
|  |     // Move audio data to the sink | ||||||
|  |     this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS)); | ||||||
|  |   } else { | ||||||
|  |     // If paused, block to avoid wasting CPU resources | ||||||
|  |     delay(READ_WRITE_TIMEOUT_MS); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS)); | ||||||
|  |  | ||||||
|  |   if (this->input_transfer_buffer_->available() == 0) { | ||||||
|  |     // No samples available to process | ||||||
|  |     return AudioResamplerState::RESAMPLING; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   const size_t bytes_free = this->output_transfer_buffer_->free(); | ||||||
|  |   const uint32_t frames_free = this->output_stream_info_.bytes_to_frames(bytes_free); | ||||||
|  |  | ||||||
|  |   const size_t bytes_available = this->input_transfer_buffer_->available(); | ||||||
|  |   const uint32_t frames_available = this->input_stream_info_.bytes_to_frames(bytes_available); | ||||||
|  |  | ||||||
|  |   if ((this->input_stream_info_.get_sample_rate() != this->output_stream_info_.get_sample_rate()) || | ||||||
|  |       (this->input_stream_info_.get_bits_per_sample() != this->output_stream_info_.get_bits_per_sample())) { | ||||||
|  |     esp_audio_libs::resampler::ResamplerResults results = | ||||||
|  |         this->resampler_->resample(this->input_transfer_buffer_->get_buffer_start(), | ||||||
|  |                                    this->output_transfer_buffer_->get_buffer_end(), frames_available, frames_free, -3); | ||||||
|  |  | ||||||
|  |     this->input_transfer_buffer_->decrease_buffer_length(this->input_stream_info_.frames_to_bytes(results.frames_used)); | ||||||
|  |     this->output_transfer_buffer_->increase_buffer_length( | ||||||
|  |         this->output_stream_info_.frames_to_bytes(results.frames_generated)); | ||||||
|  |  | ||||||
|  |     // Resampling causes slight differences in the durations used versus generated. Computes the difference in | ||||||
|  |     // millisconds. The callback function passing the played audio duration uses the difference to convert from output | ||||||
|  |     // duration to input duration. | ||||||
|  |     this->accumulated_frames_used_ += results.frames_used; | ||||||
|  |     this->accumulated_frames_generated_ += results.frames_generated; | ||||||
|  |  | ||||||
|  |     const int32_t used_ms = | ||||||
|  |         this->input_stream_info_.frames_to_milliseconds_with_remainder(&this->accumulated_frames_used_); | ||||||
|  |     const int32_t generated_ms = | ||||||
|  |         this->output_stream_info_.frames_to_milliseconds_with_remainder(&this->accumulated_frames_generated_); | ||||||
|  |  | ||||||
|  |     *ms_differential = used_ms - generated_ms; | ||||||
|  |  | ||||||
|  |   } else { | ||||||
|  |     // No resampling required, copy samples directly to the output transfer buffer | ||||||
|  |     *ms_differential = 0; | ||||||
|  |  | ||||||
|  |     const size_t bytes_to_transfer = std::min(this->output_stream_info_.frames_to_bytes(frames_free), | ||||||
|  |                                               this->input_stream_info_.frames_to_bytes(frames_available)); | ||||||
|  |  | ||||||
|  |     std::memcpy((void *) this->output_transfer_buffer_->get_buffer_end(), | ||||||
|  |                 (void *) this->input_transfer_buffer_->get_buffer_start(), bytes_to_transfer); | ||||||
|  |  | ||||||
|  |     this->input_transfer_buffer_->decrease_buffer_length(bytes_to_transfer); | ||||||
|  |     this->output_transfer_buffer_->increase_buffer_length(bytes_to_transfer); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return AudioResamplerState::RESAMPLING; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | }  // namespace audio | ||||||
|  | }  // namespace esphome | ||||||
|  |  | ||||||
|  | #endif | ||||||
							
								
								
									
										100
									
								
								esphome/components/audio/audio_resampler.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								esphome/components/audio/audio_resampler.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | |||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #ifdef USE_ESP32 | ||||||
|  |  | ||||||
|  | #include "audio.h" | ||||||
|  | #include "audio_transfer_buffer.h" | ||||||
|  |  | ||||||
|  | #ifdef USE_SPEAKER | ||||||
|  | #include "esphome/components/speaker/speaker.h" | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  | #include "esphome/core/ring_buffer.h" | ||||||
|  |  | ||||||
|  | #include "esp_err.h" | ||||||
|  |  | ||||||
|  | #include <resampler.h>  // esp-audio-libs | ||||||
|  |  | ||||||
|  | namespace esphome { | ||||||
|  | namespace audio { | ||||||
|  |  | ||||||
|  | enum class AudioResamplerState : uint8_t { | ||||||
|  |   RESAMPLING,  // More data is available to resample | ||||||
|  |   FINISHED,    // All file data has been resampled and transferred | ||||||
|  |   FAILED,      // Unused state included for consistency among Audio classes | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class AudioResampler { | ||||||
|  |   /* | ||||||
|  |    * @brief Class that facilitates resampling audio. | ||||||
|  |    * The audio data is read from a ring buffer source, resampled, and sent to an audio sink (ring buffer or speaker | ||||||
|  |    * component). Also supports converting bits per sample. | ||||||
|  |    */ | ||||||
|  |  public: | ||||||
|  |   /// @brief Allocates the input and output transfer buffers | ||||||
|  |   /// @param input_buffer_size Size of the input transfer buffer in bytes. | ||||||
|  |   /// @param output_buffer_size Size of the output transfer buffer in bytes. | ||||||
|  |   AudioResampler(size_t input_buffer_size, size_t output_buffer_size); | ||||||
|  |  | ||||||
|  |   /// @brief Adds a source ring buffer for audio data. Takes ownership of the ring buffer in a shared_ptr. | ||||||
|  |   /// @param input_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership | ||||||
|  |   /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated | ||||||
|  |   esp_err_t add_source(std::weak_ptr<RingBuffer> &input_ring_buffer); | ||||||
|  |  | ||||||
|  |   /// @brief Adds a sink ring buffer for resampled audio. Takes ownership of the ring buffer in a shared_ptr. | ||||||
|  |   /// @param output_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership | ||||||
|  |   /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated | ||||||
|  |   esp_err_t add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer); | ||||||
|  |  | ||||||
|  | #ifdef USE_SPEAKER | ||||||
|  |   /// @brief Adds a sink speaker for decoded audio. | ||||||
|  |   /// @param speaker pointer to speaker component | ||||||
|  |   /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated | ||||||
|  |   esp_err_t add_sink(speaker::Speaker *speaker); | ||||||
|  | #endif | ||||||
|  |  | ||||||
|  |   /// @brief Sets up the class to resample. | ||||||
|  |   /// @param input_stream_info The incoming sample rate, bits per sample, and number of channels | ||||||
|  |   /// @param output_stream_info The desired outgoing sample rate, bits per sample, and number of channels | ||||||
|  |   /// @param number_of_taps Number of taps per FIR filter | ||||||
|  |   /// @param number_of_filters Number of FIR filters | ||||||
|  |   /// @return ESP_OK if it is able to convert the incoming stream, | ||||||
|  |   ///         ESP_ERR_NO_MEM if the transfer buffers failed to allocate, | ||||||
|  |   ///         ESP_ERR_NOT_SUPPORTED if the stream can't be converted. | ||||||
|  |   esp_err_t start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info, uint16_t number_of_taps, | ||||||
|  |                   uint16_t number_of_filters); | ||||||
|  |  | ||||||
|  |   /// @brief Resamples audio from the ring buffer source and writes to the sink. | ||||||
|  |   /// @param stop_gracefully If true, it indicates the file decoder is finished. The resampler will resample all the | ||||||
|  |   ///                        remaining audio and then finish. | ||||||
|  |   /// @param ms_differential Pointer to a (int32_t) variable that will store the difference, in milliseconds, between | ||||||
|  |   ///                        the duration of input audio used and the duration of output audio generated. | ||||||
|  |   /// @return AudioResamplerState | ||||||
|  |   AudioResamplerState resample(bool stop_gracefully, int32_t *ms_differential); | ||||||
|  |  | ||||||
|  |   /// @brief Pauses sending resampled audio to the sink. If paused, it will continue to process internal buffers. | ||||||
|  |   /// @param pause_state If true, audio data is not sent to the sink. | ||||||
|  |   void set_pause_output_state(bool pause_state) { this->pause_output_ = pause_state; } | ||||||
|  |  | ||||||
|  |  protected: | ||||||
|  |   std::unique_ptr<AudioSourceTransferBuffer> input_transfer_buffer_; | ||||||
|  |   std::unique_ptr<AudioSinkTransferBuffer> output_transfer_buffer_; | ||||||
|  |  | ||||||
|  |   size_t input_buffer_size_; | ||||||
|  |   size_t output_buffer_size_; | ||||||
|  |  | ||||||
|  |   uint32_t accumulated_frames_used_{0}; | ||||||
|  |   uint32_t accumulated_frames_generated_{0}; | ||||||
|  |  | ||||||
|  |   bool pause_output_{false}; | ||||||
|  |  | ||||||
|  |   AudioStreamInfo input_stream_info_; | ||||||
|  |   AudioStreamInfo output_stream_info_; | ||||||
|  |  | ||||||
|  |   std::unique_ptr<esp_audio_libs::resampler::Resampler> resampler_; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | }  // namespace audio | ||||||
|  | }  // namespace esphome | ||||||
|  |  | ||||||
|  | #endif | ||||||
		Reference in New Issue
	
	Block a user