mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-31 07:03:55 +00:00 
			
		
		
		
	[audio, i2s_audio, speaker] Media Player Components PR2 (#8164)
Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
		| @@ -1,9 +1,121 @@ | |||||||
| import esphome.codegen as cg | import esphome.codegen as cg | ||||||
| import esphome.config_validation as cv | import esphome.config_validation as cv | ||||||
|  | from esphome.const import CONF_BITS_PER_SAMPLE, CONF_NUM_CHANNELS, CONF_SAMPLE_RATE | ||||||
|  | import esphome.final_validate as fv | ||||||
|  |  | ||||||
| CODEOWNERS = ["@kahrendt"] | CODEOWNERS = ["@kahrendt"] | ||||||
| audio_ns = cg.esphome_ns.namespace("audio") | audio_ns = cg.esphome_ns.namespace("audio") | ||||||
|  |  | ||||||
|  | AudioFile = audio_ns.struct("AudioFile") | ||||||
|  | AudioFileType = audio_ns.enum("AudioFileType", is_class=True) | ||||||
|  | AUDIO_FILE_TYPE_ENUM = { | ||||||
|  |     "NONE": AudioFileType.NONE, | ||||||
|  |     "WAV": AudioFileType.WAV, | ||||||
|  |     "MP3": AudioFileType.MP3, | ||||||
|  |     "FLAC": AudioFileType.FLAC, | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | CONF_MIN_BITS_PER_SAMPLE = "min_bits_per_sample" | ||||||
|  | CONF_MAX_BITS_PER_SAMPLE = "max_bits_per_sample" | ||||||
|  | CONF_MIN_CHANNELS = "min_channels" | ||||||
|  | CONF_MAX_CHANNELS = "max_channels" | ||||||
|  | CONF_MIN_SAMPLE_RATE = "min_sample_rate" | ||||||
|  | CONF_MAX_SAMPLE_RATE = "max_sample_rate" | ||||||
|  |  | ||||||
|  |  | ||||||
| CONFIG_SCHEMA = cv.All( | CONFIG_SCHEMA = cv.All( | ||||||
|     cv.Schema({}), |     cv.Schema({}), | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | AUDIO_COMPONENT_SCHEMA = cv.Schema( | ||||||
|  |     { | ||||||
|  |         cv.Optional(CONF_BITS_PER_SAMPLE): cv.int_range(8, 32), | ||||||
|  |         cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2), | ||||||
|  |         cv.Optional(CONF_SAMPLE_RATE): cv.int_range(8000, 48000), | ||||||
|  |     } | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | _UNDEF = object() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def set_stream_limits( | ||||||
|  |     min_bits_per_sample: int = _UNDEF, | ||||||
|  |     max_bits_per_sample: int = _UNDEF, | ||||||
|  |     min_channels: int = _UNDEF, | ||||||
|  |     max_channels: int = _UNDEF, | ||||||
|  |     min_sample_rate: int = _UNDEF, | ||||||
|  |     max_sample_rate: int = _UNDEF, | ||||||
|  | ): | ||||||
|  |     def set_limits_in_config(config): | ||||||
|  |         if min_bits_per_sample is not _UNDEF: | ||||||
|  |             config[CONF_MIN_BITS_PER_SAMPLE] = min_bits_per_sample | ||||||
|  |         if max_bits_per_sample is not _UNDEF: | ||||||
|  |             config[CONF_MAX_BITS_PER_SAMPLE] = max_bits_per_sample | ||||||
|  |         if min_channels is not _UNDEF: | ||||||
|  |             config[CONF_MIN_CHANNELS] = min_channels | ||||||
|  |         if max_channels is not _UNDEF: | ||||||
|  |             config[CONF_MAX_CHANNELS] = max_channels | ||||||
|  |         if min_sample_rate is not _UNDEF: | ||||||
|  |             config[CONF_MIN_SAMPLE_RATE] = min_sample_rate | ||||||
|  |         if max_sample_rate is not _UNDEF: | ||||||
|  |             config[CONF_MAX_SAMPLE_RATE] = max_sample_rate | ||||||
|  |  | ||||||
|  |     return set_limits_in_config | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def final_validate_audio_schema( | ||||||
|  |     name: str, | ||||||
|  |     *, | ||||||
|  |     audio_device: str, | ||||||
|  |     bits_per_sample: int, | ||||||
|  |     channels: int, | ||||||
|  |     sample_rate: int, | ||||||
|  | ): | ||||||
|  |     def validate_audio_compatiblity(audio_config): | ||||||
|  |         audio_schema = {} | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             cv.int_range( | ||||||
|  |                 min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE), | ||||||
|  |                 max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE), | ||||||
|  |             )(bits_per_sample) | ||||||
|  |         except cv.Invalid as exc: | ||||||
|  |             raise cv.Invalid( | ||||||
|  |                 f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}" | ||||||
|  |             ) from exc | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             cv.int_range( | ||||||
|  |                 min=audio_config.get(CONF_MIN_CHANNELS), | ||||||
|  |                 max=audio_config.get(CONF_MAX_CHANNELS), | ||||||
|  |             )(channels) | ||||||
|  |         except cv.Invalid as exc: | ||||||
|  |             raise cv.Invalid( | ||||||
|  |                 f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}" | ||||||
|  |             ) from exc | ||||||
|  |  | ||||||
|  |         try: | ||||||
|  |             cv.int_range( | ||||||
|  |                 min=audio_config.get(CONF_MIN_SAMPLE_RATE), | ||||||
|  |                 max=audio_config.get(CONF_MAX_SAMPLE_RATE), | ||||||
|  |             )(sample_rate) | ||||||
|  |             return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config) | ||||||
|  |         except cv.Invalid as exc: | ||||||
|  |             raise cv.Invalid( | ||||||
|  |                 f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}" | ||||||
|  |             ) from exc | ||||||
|  |  | ||||||
|  |     return cv.Schema( | ||||||
|  |         { | ||||||
|  |             cv.Required(audio_device): fv.id_declaration_match_schema( | ||||||
|  |                 validate_audio_compatiblity | ||||||
|  |             ) | ||||||
|  |         }, | ||||||
|  |         extra=cv.ALLOW_EXTRA, | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | async def to_code(config): | ||||||
|  |     cg.add_library("esphome/esp-audio-libs", "1.1.1") | ||||||
|   | |||||||
							
								
								
									
										67
									
								
								esphome/components/audio/audio.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								esphome/components/audio/audio.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | |||||||
|  | #include "audio.h" | ||||||
|  |  | ||||||
|  | namespace esphome { | ||||||
|  | namespace audio { | ||||||
|  |  | ||||||
|  | // Euclidean's algorithm for finding the greatest common divisor | ||||||
|  | static uint32_t gcd(uint32_t a, uint32_t b) { | ||||||
|  |   while (b != 0) { | ||||||
|  |     uint32_t t = b; | ||||||
|  |     b = a % b; | ||||||
|  |     a = t; | ||||||
|  |   } | ||||||
|  |   return a; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | AudioStreamInfo::AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate) | ||||||
|  |     : bits_per_sample_(bits_per_sample), channels_(channels), sample_rate_(sample_rate) { | ||||||
|  |   this->ms_sample_rate_gcd_ = gcd(1000, this->sample_rate_); | ||||||
|  |   this->bytes_per_sample_ = (this->bits_per_sample_ + 7) / 8; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint32_t AudioStreamInfo::frames_to_microseconds(uint32_t frames) const { | ||||||
|  |   return (frames * 1000000 + (this->sample_rate_ >> 1)) / this->sample_rate_; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | uint32_t AudioStreamInfo::frames_to_milliseconds_with_remainder(uint32_t *total_frames) const { | ||||||
|  |   uint32_t unprocessable_frames = *total_frames % (this->sample_rate_ / this->ms_sample_rate_gcd_); | ||||||
|  |   uint32_t frames_for_ms_calculation = *total_frames - unprocessable_frames; | ||||||
|  |  | ||||||
|  |   uint32_t playback_ms = (frames_for_ms_calculation * 1000) / this->sample_rate_; | ||||||
|  |   *total_frames = unprocessable_frames; | ||||||
|  |   return playback_ms; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool AudioStreamInfo::operator==(const AudioStreamInfo &rhs) const { | ||||||
|  |   return (this->bits_per_sample_ == rhs.get_bits_per_sample()) && (this->channels_ == rhs.get_channels()) && | ||||||
|  |          (this->sample_rate_ == rhs.get_sample_rate()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | const char *audio_file_type_to_string(AudioFileType file_type) { | ||||||
|  |   switch (file_type) { | ||||||
|  | #ifdef USE_AUDIO_FLAC_SUPPORT | ||||||
|  |     case AudioFileType::FLAC: | ||||||
|  |       return "FLAC"; | ||||||
|  | #endif | ||||||
|  | #ifdef USE_AUDIO_MP3_SUPPORT | ||||||
|  |     case AudioFileType::MP3: | ||||||
|  |       return "MP3"; | ||||||
|  | #endif | ||||||
|  |     case AudioFileType::WAV: | ||||||
|  |       return "WAV"; | ||||||
|  |     default: | ||||||
|  |       return "unknown"; | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, | ||||||
|  |                          size_t samples_to_scale) { | ||||||
|  |   // Note the assembly dsps_mulc function has audio glitches if the input and output buffers are the same. | ||||||
|  |   for (int i = 0; i < samples_to_scale; i++) { | ||||||
|  |     int32_t acc = (int32_t) audio_samples[i] * (int32_t) scale_factor; | ||||||
|  |     output_buffer[i] = (int16_t) (acc >> 15); | ||||||
|  |   } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | }  // namespace audio | ||||||
|  | }  // namespace esphome | ||||||
| @@ -1,21 +1,139 @@ | |||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
|  | #include "esphome/core/defines.h" | ||||||
|  |  | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <cstdint> | #include <cstdint> | ||||||
|  |  | ||||||
| namespace esphome { | namespace esphome { | ||||||
| namespace audio { | namespace audio { | ||||||
|  |  | ||||||
| struct AudioStreamInfo { | class AudioStreamInfo { | ||||||
|   bool operator==(const AudioStreamInfo &rhs) const { |   /* Class to respresent important parameters of the audio stream that also provides helper function to convert between | ||||||
|     return (channels == rhs.channels) && (bits_per_sample == rhs.bits_per_sample) && (sample_rate == rhs.sample_rate); |    * various audio related units. | ||||||
|  |    * | ||||||
|  |    *  - An audio sample represents a unit of audio for one channel. | ||||||
|  |    *  - A frame represents a unit of audio with a sample for every channel. | ||||||
|  |    * | ||||||
|  |    * In gneneral, converting between bytes, samples, and frames shouldn't result in rounding errors so long as frames | ||||||
|  |    * are used as the main unit when transferring audio data. Durations may result in rounding for certain sample rates; | ||||||
|  |    * e.g., 44.1 KHz. The ``frames_to_milliseconds_with_remainder`` function should be used for accuracy, as it takes | ||||||
|  |    * into account the remainder rather than just ignoring any rounding. | ||||||
|  |    */ | ||||||
|  |  public: | ||||||
|  |   AudioStreamInfo() | ||||||
|  |       : AudioStreamInfo(16, 1, 16000){};  // Default values represent ESPHome's audio components historical values | ||||||
|  |   AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate); | ||||||
|  |  | ||||||
|  |   uint8_t get_bits_per_sample() const { return this->bits_per_sample_; } | ||||||
|  |   uint8_t get_channels() const { return this->channels_; } | ||||||
|  |   uint32_t get_sample_rate() const { return this->sample_rate_; } | ||||||
|  |  | ||||||
|  |   /// @brief Convert bytes to duration in milliseconds. | ||||||
|  |   /// @param bytes Number of bytes to convert | ||||||
|  |   /// @return Duration in milliseconds that will store `bytes` bytes of audio. May round down for certain sample rates | ||||||
|  |   ///         or values of `bytes`. | ||||||
|  |   uint32_t bytes_to_ms(size_t bytes) const { | ||||||
|  |     return bytes * 1000 / (this->sample_rate_ * this->bytes_per_sample_ * this->channels_); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   /// @brief Convert bytes to frames. | ||||||
|  |   /// @param bytes Number of bytes to convert | ||||||
|  |   /// @return Audio frames that will store `bytes` bytes. | ||||||
|  |   uint32_t bytes_to_frames(size_t bytes) const { return (bytes / (this->bytes_per_sample_ * this->channels_)); } | ||||||
|  |  | ||||||
|  |   /// @brief Convert bytes to samples. | ||||||
|  |   /// @param bytes Number of bytes to convert | ||||||
|  |   /// @return Audio samples that will store `bytes` bytes. | ||||||
|  |   uint32_t bytes_to_samples(size_t bytes) const { return (bytes / this->bytes_per_sample_); } | ||||||
|  |  | ||||||
|  |   /// @brief Converts frames to bytes. | ||||||
|  |   /// @param frames Number of frames to convert. | ||||||
|  |   /// @return Number of bytes that will store `frames` frames of audio. | ||||||
|  |   size_t frames_to_bytes(uint32_t frames) const { return frames * this->bytes_per_sample_ * this->channels_; } | ||||||
|  |  | ||||||
|  |   /// @brief Converts samples to bytes. | ||||||
|  |   /// @param samples Number of samples to convert. | ||||||
|  |   /// @return Number of bytes that will store `samples` samples of audio. | ||||||
|  |   size_t samples_to_bytes(uint32_t samples) const { return samples * this->bytes_per_sample_; } | ||||||
|  |  | ||||||
|  |   /// @brief Converts duration to frames. | ||||||
|  |   /// @param ms Duration in milliseconds | ||||||
|  |   /// @return Audio frames that will store `ms` milliseconds of audio.  May round down for certain sample rates. | ||||||
|  |   uint32_t ms_to_frames(uint32_t ms) const { return (ms * this->sample_rate_) / 1000; } | ||||||
|  |  | ||||||
|  |   /// @brief Converts duration to samples. | ||||||
|  |   /// @param ms Duration in milliseconds | ||||||
|  |   /// @return Audio samples that will store `ms` milliseconds of audio.  May round down for certain sample rates. | ||||||
|  |   uint32_t ms_to_samples(uint32_t ms) const { return (ms * this->channels_ * this->sample_rate_) / 1000; } | ||||||
|  |  | ||||||
|  |   /// @brief Converts duration to bytes. May round down for certain sample rates. | ||||||
|  |   /// @param ms Duration in milliseconds | ||||||
|  |   /// @return Bytes that will store `ms` milliseconds of audio.  May round down for certain sample rates. | ||||||
|  |   size_t ms_to_bytes(uint32_t ms) const { | ||||||
|  |     return (ms * this->bytes_per_sample_ * this->channels_ * this->sample_rate_) / 1000; | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   /// @brief Computes the duration, in microseconds, the given amount of frames represents. | ||||||
|  |   /// @param frames Number of audio frames | ||||||
|  |   /// @return Duration in microseconds `frames` respresents. May be slightly inaccurate due to integer divison rounding | ||||||
|  |   ///         for certain sample rates. | ||||||
|  |   uint32_t frames_to_microseconds(uint32_t frames) const; | ||||||
|  |  | ||||||
|  |   /// @brief Computes the duration, in milliseconds, the given amount of frames represents. Avoids | ||||||
|  |   /// accumulating rounding errors by updating `frames` with the remainder after converting. | ||||||
|  |   /// @param frames Pointer to uint32_t with the number of audio frames. Replaced with the remainder. | ||||||
|  |   /// @return Duration in milliseconds `frames` represents. Always less than or equal to the actual value due to | ||||||
|  |   ///         rounding. | ||||||
|  |   uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const; | ||||||
|  |  | ||||||
|  |   // Class comparison operators | ||||||
|  |   bool operator==(const AudioStreamInfo &rhs) const; | ||||||
|   bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); } |   bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); } | ||||||
|   size_t get_bytes_per_sample() const { return bits_per_sample / 8; } |  | ||||||
|   uint8_t channels = 1; |  protected: | ||||||
|   uint8_t bits_per_sample = 16; |   uint8_t bits_per_sample_; | ||||||
|   uint32_t sample_rate = 16000; |   uint8_t channels_; | ||||||
|  |   uint32_t sample_rate_; | ||||||
|  |  | ||||||
|  |   // The greatest common divisor between 1000 ms = 1 second and the sample rate. Used to avoid accumulating error when | ||||||
|  |   // converting from frames to duration. Computed at construction. | ||||||
|  |   uint32_t ms_sample_rate_gcd_; | ||||||
|  |  | ||||||
|  |   // Conversion factor derived from the number of bits per sample. Assumes audio data is aligned to the byte. Computed | ||||||
|  |   // at construction. | ||||||
|  |   size_t bytes_per_sample_; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | enum class AudioFileType : uint8_t { | ||||||
|  |   NONE = 0, | ||||||
|  | #ifdef USE_AUDIO_FLAC_SUPPORT | ||||||
|  |   FLAC, | ||||||
|  | #endif | ||||||
|  | #ifdef USE_AUDIO_MP3_SUPPORT | ||||||
|  |   MP3, | ||||||
|  | #endif | ||||||
|  |   WAV, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct AudioFile { | ||||||
|  |   const uint8_t *data; | ||||||
|  |   size_t length; | ||||||
|  |   AudioFileType file_type; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | /// @brief Helper function to convert file type to a const char string | ||||||
|  | /// @param file_type | ||||||
|  | /// @return const char pointer to the readable file type | ||||||
|  | const char *audio_file_type_to_string(AudioFileType file_type); | ||||||
|  |  | ||||||
|  | /// @brief Scales Q15 fixed point audio samples. Scales in place if audio_samples == output_buffer. | ||||||
|  | /// @param audio_samples PCM int16 audio samples | ||||||
|  | /// @param output_buffer Buffer to store the scaled samples | ||||||
|  | /// @param scale_factor Q15 fixed point scaling factor | ||||||
|  | /// @param samples_to_scale Number of samples to scale | ||||||
|  | void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, | ||||||
|  |                          size_t samples_to_scale); | ||||||
|  |  | ||||||
| }  // namespace audio | }  // namespace audio | ||||||
| }  // namespace esphome | }  // namespace esphome | ||||||
|   | |||||||
| @@ -1,20 +1,25 @@ | |||||||
| from esphome import pins | from esphome import pins | ||||||
| import esphome.codegen as cg | import esphome.codegen as cg | ||||||
| from esphome.components import esp32, speaker | from esphome.components import audio, esp32, speaker | ||||||
| import esphome.config_validation as cv | import esphome.config_validation as cv | ||||||
| from esphome.const import ( | from esphome.const import ( | ||||||
|  |     CONF_BITS_PER_SAMPLE, | ||||||
|     CONF_BUFFER_DURATION, |     CONF_BUFFER_DURATION, | ||||||
|     CONF_CHANNEL, |     CONF_CHANNEL, | ||||||
|     CONF_ID, |     CONF_ID, | ||||||
|     CONF_MODE, |     CONF_MODE, | ||||||
|     CONF_NEVER, |     CONF_NEVER, | ||||||
|  |     CONF_NUM_CHANNELS, | ||||||
|  |     CONF_SAMPLE_RATE, | ||||||
|     CONF_TIMEOUT, |     CONF_TIMEOUT, | ||||||
| ) | ) | ||||||
|  |  | ||||||
| from .. import ( | from .. import ( | ||||||
|     CONF_I2S_DOUT_PIN, |     CONF_I2S_DOUT_PIN, | ||||||
|  |     CONF_I2S_MODE, | ||||||
|     CONF_LEFT, |     CONF_LEFT, | ||||||
|     CONF_MONO, |     CONF_MONO, | ||||||
|  |     CONF_PRIMARY, | ||||||
|     CONF_RIGHT, |     CONF_RIGHT, | ||||||
|     CONF_STEREO, |     CONF_STEREO, | ||||||
|     I2SAudioOut, |     I2SAudioOut, | ||||||
| @@ -58,7 +63,41 @@ I2C_COMM_FMT_OPTIONS = { | |||||||
| NO_INTERNAL_DAC_VARIANTS = [esp32.const.VARIANT_ESP32S2] | NO_INTERNAL_DAC_VARIANTS = [esp32.const.VARIANT_ESP32S2] | ||||||
|  |  | ||||||
|  |  | ||||||
| def validate_esp32_variant(config): | def _set_num_channels_from_config(config): | ||||||
|  |     if config[CONF_CHANNEL] in (CONF_MONO, CONF_LEFT, CONF_RIGHT): | ||||||
|  |         config[CONF_NUM_CHANNELS] = 1 | ||||||
|  |     else: | ||||||
|  |         config[CONF_NUM_CHANNELS] = 2 | ||||||
|  |  | ||||||
|  |     return config | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _set_stream_limits(config): | ||||||
|  |     if config[CONF_I2S_MODE] == CONF_PRIMARY: | ||||||
|  |         # Primary mode has modifiable stream settings | ||||||
|  |         audio.set_stream_limits( | ||||||
|  |             min_bits_per_sample=8, | ||||||
|  |             max_bits_per_sample=32, | ||||||
|  |             min_channels=1, | ||||||
|  |             max_channels=2, | ||||||
|  |             min_sample_rate=16000, | ||||||
|  |             max_sample_rate=48000, | ||||||
|  |         )(config) | ||||||
|  |     else: | ||||||
|  |         # Secondary mode has unmodifiable max bits per sample and min/max sample rates | ||||||
|  |         audio.set_stream_limits( | ||||||
|  |             min_bits_per_sample=8, | ||||||
|  |             max_bits_per_sample=config.get(CONF_BITS_PER_SAMPLE), | ||||||
|  |             min_channels=1, | ||||||
|  |             max_channels=2, | ||||||
|  |             min_sample_rate=config.get(CONF_SAMPLE_RATE), | ||||||
|  |             max_sample_rate=config.get(CONF_SAMPLE_RATE), | ||||||
|  |         ) | ||||||
|  |  | ||||||
|  |     return config | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def _validate_esp32_variant(config): | ||||||
|     if config[CONF_DAC_TYPE] != "internal": |     if config[CONF_DAC_TYPE] != "internal": | ||||||
|         return config |         return config | ||||||
|     variant = esp32.get_esp32_variant() |     variant = esp32.get_esp32_variant() | ||||||
| @@ -90,6 +129,7 @@ BASE_SCHEMA = ( | |||||||
|     .extend(cv.COMPONENT_SCHEMA) |     .extend(cv.COMPONENT_SCHEMA) | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| CONFIG_SCHEMA = cv.All( | CONFIG_SCHEMA = cv.All( | ||||||
|     cv.typed_schema( |     cv.typed_schema( | ||||||
|         { |         { | ||||||
| @@ -111,7 +151,9 @@ CONFIG_SCHEMA = cv.All( | |||||||
|         }, |         }, | ||||||
|         key=CONF_DAC_TYPE, |         key=CONF_DAC_TYPE, | ||||||
|     ), |     ), | ||||||
|     validate_esp32_variant, |     _validate_esp32_variant, | ||||||
|  |     _set_num_channels_from_config, | ||||||
|  |     _set_stream_limits, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -148,9 +148,11 @@ void I2SAudioSpeaker::loop() { | |||||||
|     this->status_set_error("Failed to adjust I2S bus to match the incoming audio"); |     this->status_set_error("Failed to adjust I2S bus to match the incoming audio"); | ||||||
|     ESP_LOGE(TAG, |     ESP_LOGE(TAG, | ||||||
|              "Incompatible audio format: sample rate = %" PRIu32 ", channels = %" PRIu8 ", bits per sample = %" PRIu8, |              "Incompatible audio format: sample rate = %" PRIu32 ", channels = %" PRIu8 ", bits per sample = %" PRIu8, | ||||||
|              this->audio_stream_info_.sample_rate, this->audio_stream_info_.channels, |              this->audio_stream_info_.get_sample_rate(), this->audio_stream_info_.get_channels(), | ||||||
|              this->audio_stream_info_.bits_per_sample); |              this->audio_stream_info_.get_bits_per_sample()); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   xEventGroupClearBits(this->event_group_, ALL_ERR_ESP_BITS); | ||||||
| } | } | ||||||
|  |  | ||||||
| void I2SAudioSpeaker::set_volume(float volume) { | void I2SAudioSpeaker::set_volume(float volume) { | ||||||
| @@ -201,6 +203,12 @@ size_t I2SAudioSpeaker::play(const uint8_t *data, size_t length, TickType_t tick | |||||||
|     this->start(); |     this->start(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   if ((this->state_ != speaker::STATE_RUNNING) || (this->audio_ring_buffer_.use_count() == 1)) { | ||||||
|  |     // Unable to write data to a running speaker, so delay the max amount of time so it can get ready | ||||||
|  |     vTaskDelay(ticks_to_wait); | ||||||
|  |     ticks_to_wait = 0; | ||||||
|  |   } | ||||||
|  |  | ||||||
|   size_t bytes_written = 0; |   size_t bytes_written = 0; | ||||||
|   if ((this->state_ == speaker::STATE_RUNNING) && (this->audio_ring_buffer_.use_count() == 1)) { |   if ((this->state_ == speaker::STATE_RUNNING) && (this->audio_ring_buffer_.use_count() == 1)) { | ||||||
|     // Only one owner of the ring buffer (the speaker task), so the ring buffer is allocated and no other components are |     // Only one owner of the ring buffer (the speaker task), so the ring buffer is allocated and no other components are | ||||||
| @@ -223,6 +231,8 @@ bool I2SAudioSpeaker::has_buffered_data() const { | |||||||
|  |  | ||||||
| void I2SAudioSpeaker::speaker_task(void *params) { | void I2SAudioSpeaker::speaker_task(void *params) { | ||||||
|   I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) params; |   I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) params; | ||||||
|  |   this_speaker->task_created_ = true; | ||||||
|  |  | ||||||
|   uint32_t event_group_bits = |   uint32_t event_group_bits = | ||||||
|       xEventGroupWaitBits(this_speaker->event_group_, |       xEventGroupWaitBits(this_speaker->event_group_, | ||||||
|                           SpeakerEventGroupBits::COMMAND_START | SpeakerEventGroupBits::COMMAND_STOP | |                           SpeakerEventGroupBits::COMMAND_START | SpeakerEventGroupBits::COMMAND_STOP | | ||||||
| @@ -240,19 +250,20 @@ void I2SAudioSpeaker::speaker_task(void *params) { | |||||||
|  |  | ||||||
|   audio::AudioStreamInfo audio_stream_info = this_speaker->audio_stream_info_; |   audio::AudioStreamInfo audio_stream_info = this_speaker->audio_stream_info_; | ||||||
|  |  | ||||||
|   const uint32_t bytes_per_ms = |   const uint32_t dma_buffers_duration_ms = DMA_BUFFER_DURATION_MS * DMA_BUFFERS_COUNT; | ||||||
|       audio_stream_info.channels * audio_stream_info.get_bytes_per_sample() * audio_stream_info.sample_rate / 1000; |   // Ensure ring buffer duration is at least the duration of all DMA buffers | ||||||
|  |   const uint32_t ring_buffer_duration = std::max(dma_buffers_duration_ms, this_speaker->buffer_duration_ms_); | ||||||
|  |  | ||||||
|   const size_t dma_buffers_size = DMA_BUFFERS_COUNT * DMA_BUFFER_DURATION_MS * bytes_per_ms; |   // The DMA buffers may have more bits per sample, so calculate buffer sizes based in the input audio stream info | ||||||
|  |   const size_t data_buffer_size = audio_stream_info.ms_to_bytes(dma_buffers_duration_ms); | ||||||
|  |   const size_t ring_buffer_size = audio_stream_info.ms_to_bytes(ring_buffer_duration); | ||||||
|  |  | ||||||
|   // Ensure ring buffer is at least as large as the total size of the DMA buffers |   const size_t single_dma_buffer_input_size = data_buffer_size / DMA_BUFFERS_COUNT; | ||||||
|   const size_t ring_buffer_size = |  | ||||||
|       std::max((uint32_t) dma_buffers_size, this_speaker->buffer_duration_ms_ * bytes_per_ms); |  | ||||||
|  |  | ||||||
|   if (this_speaker->send_esp_err_to_event_group_(this_speaker->allocate_buffers_(dma_buffers_size, ring_buffer_size))) { |   if (this_speaker->send_esp_err_to_event_group_(this_speaker->allocate_buffers_(data_buffer_size, ring_buffer_size))) { | ||||||
|     // Failed to allocate buffers |     // Failed to allocate buffers | ||||||
|     xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM); |     xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM); | ||||||
|     this_speaker->delete_task_(dma_buffers_size); |     this_speaker->delete_task_(data_buffer_size); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if (!this_speaker->send_esp_err_to_event_group_(this_speaker->start_i2s_driver_(audio_stream_info))) { |   if (!this_speaker->send_esp_err_to_event_group_(this_speaker->start_i2s_driver_(audio_stream_info))) { | ||||||
| @@ -262,20 +273,25 @@ void I2SAudioSpeaker::speaker_task(void *params) { | |||||||
|     uint32_t last_data_received_time = millis(); |     uint32_t last_data_received_time = millis(); | ||||||
|     bool tx_dma_underflow = false; |     bool tx_dma_underflow = false; | ||||||
|  |  | ||||||
|     while (!this_speaker->timeout_.has_value() || |     this_speaker->accumulated_frames_written_ = 0; | ||||||
|  |  | ||||||
|  |     // Keep looping if paused, there is no timeout configured, or data was received more recently than the configured | ||||||
|  |     // timeout | ||||||
|  |     while (this_speaker->pause_state_ || !this_speaker->timeout_.has_value() || | ||||||
|            (millis() - last_data_received_time) <= this_speaker->timeout_.value()) { |            (millis() - last_data_received_time) <= this_speaker->timeout_.value()) { | ||||||
|       event_group_bits = xEventGroupGetBits(this_speaker->event_group_); |       event_group_bits = xEventGroupGetBits(this_speaker->event_group_); | ||||||
|  |  | ||||||
|       if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) { |       if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) { | ||||||
|  |         xEventGroupClearBits(this_speaker->event_group_, SpeakerEventGroupBits::COMMAND_STOP); | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|       if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY) { |       if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY) { | ||||||
|  |         xEventGroupClearBits(this_speaker->event_group_, SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY); | ||||||
|         stop_gracefully = true; |         stop_gracefully = true; | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       if (this_speaker->audio_stream_info_ != audio_stream_info) { |       if (this_speaker->audio_stream_info_ != audio_stream_info) { | ||||||
|         // Audio stream info has changed, stop the speaker task so it will restart with the proper settings. |         // Audio stream info changed, stop the speaker task so it will restart with the proper settings. | ||||||
|  |  | ||||||
|         break; |         break; | ||||||
|       } |       } | ||||||
|  |  | ||||||
| @@ -286,33 +302,64 @@ void I2SAudioSpeaker::speaker_task(void *params) { | |||||||
|         } |         } | ||||||
|       } |       } | ||||||
|  |  | ||||||
|       size_t bytes_to_read = dma_buffers_size; |       if (this_speaker->pause_state_) { | ||||||
|       size_t bytes_read = this_speaker->audio_ring_buffer_->read((void *) this_speaker->data_buffer_, bytes_to_read, |         // Pause state is accessed atomically, so thread safe | ||||||
|  |         // Delay so the task can yields, then skip transferring audio data | ||||||
|  |         delay(TASK_DELAY_MS); | ||||||
|  |         continue; | ||||||
|  |       } | ||||||
|  |  | ||||||
|  |       size_t bytes_read = this_speaker->audio_ring_buffer_->read((void *) this_speaker->data_buffer_, data_buffer_size, | ||||||
|                                                                  pdMS_TO_TICKS(TASK_DELAY_MS)); |                                                                  pdMS_TO_TICKS(TASK_DELAY_MS)); | ||||||
|  |  | ||||||
|       if (bytes_read > 0) { |       if (bytes_read > 0) { | ||||||
|         size_t bytes_written = 0; |         if ((audio_stream_info.get_bits_per_sample() == 16) && (this_speaker->q15_volume_factor_ < INT16_MAX)) { | ||||||
|  |  | ||||||
|         if ((audio_stream_info.bits_per_sample == 16) && (this_speaker->q15_volume_factor_ < INT16_MAX)) { |  | ||||||
|           // Scale samples by the volume factor in place |           // Scale samples by the volume factor in place | ||||||
|           q15_multiplication((int16_t *) this_speaker->data_buffer_, (int16_t *) this_speaker->data_buffer_, |           q15_multiplication((int16_t *) this_speaker->data_buffer_, (int16_t *) this_speaker->data_buffer_, | ||||||
|                              bytes_read / sizeof(int16_t), this_speaker->q15_volume_factor_); |                              bytes_read / sizeof(int16_t), this_speaker->q15_volume_factor_); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         if (audio_stream_info.bits_per_sample == (uint8_t) this_speaker->bits_per_sample_) { |         // Write the audio data to a single DMA buffer at a time to reduce latency for the audio duration played | ||||||
|           i2s_write(this_speaker->parent_->get_port(), this_speaker->data_buffer_, bytes_read, &bytes_written, |         // callback. | ||||||
|                     portMAX_DELAY); |         const uint32_t batches = (bytes_read + single_dma_buffer_input_size - 1) / single_dma_buffer_input_size; | ||||||
|         } else if (audio_stream_info.bits_per_sample < (uint8_t) this_speaker->bits_per_sample_) { |  | ||||||
|           i2s_write_expand(this_speaker->parent_->get_port(), this_speaker->data_buffer_, bytes_read, |         for (uint32_t i = 0; i < batches; ++i) { | ||||||
|                            audio_stream_info.bits_per_sample, this_speaker->bits_per_sample_, &bytes_written, |           size_t bytes_written = 0; | ||||||
|                            portMAX_DELAY); |           size_t bytes_to_write = std::min(single_dma_buffer_input_size, bytes_read); | ||||||
|  |  | ||||||
|  |           if (audio_stream_info.get_bits_per_sample() == (uint8_t) this_speaker->bits_per_sample_) { | ||||||
|  |             i2s_write(this_speaker->parent_->get_port(), this_speaker->data_buffer_ + i * single_dma_buffer_input_size, | ||||||
|  |                       bytes_to_write, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5)); | ||||||
|  |           } else if (audio_stream_info.get_bits_per_sample() < (uint8_t) this_speaker->bits_per_sample_) { | ||||||
|  |             i2s_write_expand(this_speaker->parent_->get_port(), | ||||||
|  |                              this_speaker->data_buffer_ + i * single_dma_buffer_input_size, bytes_to_write, | ||||||
|  |                              audio_stream_info.get_bits_per_sample(), this_speaker->bits_per_sample_, &bytes_written, | ||||||
|  |                              pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5)); | ||||||
|           } |           } | ||||||
|  |  | ||||||
|         if (bytes_written != bytes_read) { |           uint32_t write_timestamp = micros(); | ||||||
|  |  | ||||||
|  |           if (bytes_written != bytes_to_write) { | ||||||
|             xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE); |             xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE); | ||||||
|           } |           } | ||||||
|  |  | ||||||
|  |           bytes_read -= bytes_written; | ||||||
|  |  | ||||||
|  |           this_speaker->accumulated_frames_written_ += audio_stream_info.bytes_to_frames(bytes_written); | ||||||
|  |           const uint32_t new_playback_ms = | ||||||
|  |               audio_stream_info.frames_to_milliseconds_with_remainder(&this_speaker->accumulated_frames_written_); | ||||||
|  |           const uint32_t remainder_us = | ||||||
|  |               audio_stream_info.frames_to_microseconds(this_speaker->accumulated_frames_written_); | ||||||
|  |  | ||||||
|  |           uint32_t pending_frames = | ||||||
|  |               audio_stream_info.bytes_to_frames(bytes_read + this_speaker->audio_ring_buffer_->available()); | ||||||
|  |           const uint32_t pending_ms = audio_stream_info.frames_to_milliseconds_with_remainder(&pending_frames); | ||||||
|  |  | ||||||
|  |           this_speaker->audio_output_callback_(new_playback_ms, remainder_us, pending_ms, write_timestamp); | ||||||
|  |  | ||||||
|           tx_dma_underflow = false; |           tx_dma_underflow = false; | ||||||
|           last_data_received_time = millis(); |           last_data_received_time = millis(); | ||||||
|  |         } | ||||||
|       } else { |       } else { | ||||||
|         // No data received |         // No data received | ||||||
|         if (stop_gracefully && tx_dma_underflow) { |         if (stop_gracefully && tx_dma_underflow) { | ||||||
| @@ -328,7 +375,7 @@ void I2SAudioSpeaker::speaker_task(void *params) { | |||||||
|     this_speaker->parent_->unlock(); |     this_speaker->parent_->unlock(); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   this_speaker->delete_task_(dma_buffers_size); |   this_speaker->delete_task_(data_buffer_size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void I2SAudioSpeaker::start() { | void I2SAudioSpeaker::start() { | ||||||
| @@ -337,18 +384,17 @@ void I2SAudioSpeaker::start() { | |||||||
|   if ((this->state_ == speaker::STATE_STARTING) || (this->state_ == speaker::STATE_RUNNING)) |   if ((this->state_ == speaker::STATE_STARTING) || (this->state_ == speaker::STATE_RUNNING)) | ||||||
|     return; |     return; | ||||||
|  |  | ||||||
|   if (this->speaker_task_handle_ == nullptr) { |   if (!this->task_created_ && (this->speaker_task_handle_ == nullptr)) { | ||||||
|     xTaskCreate(I2SAudioSpeaker::speaker_task, "speaker_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY, |     xTaskCreate(I2SAudioSpeaker::speaker_task, "speaker_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY, | ||||||
|                 &this->speaker_task_handle_); |                 &this->speaker_task_handle_); | ||||||
|   } |  | ||||||
|  |  | ||||||
|     if (this->speaker_task_handle_ != nullptr) { |     if (this->speaker_task_handle_ != nullptr) { | ||||||
|       xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START); |       xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START); | ||||||
|     this->task_created_ = true; |  | ||||||
|     } else { |     } else { | ||||||
|       xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START); |       xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|  | } | ||||||
|  |  | ||||||
| void I2SAudioSpeaker::stop() { this->stop_(false); } | void I2SAudioSpeaker::stop() { this->stop_(false); } | ||||||
|  |  | ||||||
| @@ -416,12 +462,12 @@ esp_err_t I2SAudioSpeaker::allocate_buffers_(size_t data_buffer_size, size_t rin | |||||||
| } | } | ||||||
|  |  | ||||||
| esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_stream_info) { | esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_stream_info) { | ||||||
|   if ((this->i2s_mode_ & I2S_MODE_SLAVE) && (this->sample_rate_ != audio_stream_info.sample_rate)) {  // NOLINT |   if ((this->i2s_mode_ & I2S_MODE_SLAVE) && (this->sample_rate_ != audio_stream_info.get_sample_rate())) {  // NOLINT | ||||||
|     // Can't reconfigure I2S bus, so the sample rate must match the configured value |     // Can't reconfigure I2S bus, so the sample rate must match the configured value | ||||||
|     return ESP_ERR_NOT_SUPPORTED; |     return ESP_ERR_NOT_SUPPORTED; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   if ((i2s_bits_per_sample_t) audio_stream_info.bits_per_sample > this->bits_per_sample_) { |   if ((i2s_bits_per_sample_t) audio_stream_info.get_bits_per_sample() > this->bits_per_sample_) { | ||||||
|     // Currently can't handle the case when the incoming audio has more bits per sample than the configured value |     // Currently can't handle the case when the incoming audio has more bits per sample than the configured value | ||||||
|     return ESP_ERR_NOT_SUPPORTED; |     return ESP_ERR_NOT_SUPPORTED; | ||||||
|   } |   } | ||||||
| @@ -432,21 +478,21 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea | |||||||
|  |  | ||||||
|   i2s_channel_fmt_t channel = this->channel_; |   i2s_channel_fmt_t channel = this->channel_; | ||||||
|  |  | ||||||
|   if (audio_stream_info.channels == 1) { |   if (audio_stream_info.get_channels() == 1) { | ||||||
|     if (this->channel_ == I2S_CHANNEL_FMT_ONLY_LEFT) { |     if (this->channel_ == I2S_CHANNEL_FMT_ONLY_LEFT) { | ||||||
|       channel = I2S_CHANNEL_FMT_ONLY_LEFT; |       channel = I2S_CHANNEL_FMT_ONLY_LEFT; | ||||||
|     } else { |     } else { | ||||||
|       channel = I2S_CHANNEL_FMT_ONLY_RIGHT; |       channel = I2S_CHANNEL_FMT_ONLY_RIGHT; | ||||||
|     } |     } | ||||||
|   } else if (audio_stream_info.channels == 2) { |   } else if (audio_stream_info.get_channels() == 2) { | ||||||
|     channel = I2S_CHANNEL_FMT_RIGHT_LEFT; |     channel = I2S_CHANNEL_FMT_RIGHT_LEFT; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   int dma_buffer_length = DMA_BUFFER_DURATION_MS * this->sample_rate_ / 1000; |   int dma_buffer_length = audio_stream_info.ms_to_frames(DMA_BUFFER_DURATION_MS); | ||||||
|  |  | ||||||
|   i2s_driver_config_t config = { |   i2s_driver_config_t config = { | ||||||
|     .mode = (i2s_mode_t) (this->i2s_mode_ | I2S_MODE_TX), |     .mode = (i2s_mode_t) (this->i2s_mode_ | I2S_MODE_TX), | ||||||
|     .sample_rate = audio_stream_info.sample_rate, |     .sample_rate = audio_stream_info.get_sample_rate(), | ||||||
|     .bits_per_sample = this->bits_per_sample_, |     .bits_per_sample = this->bits_per_sample_, | ||||||
|     .channel_format = channel, |     .channel_format = channel, | ||||||
|     .communication_format = this->i2s_comm_fmt_, |     .communication_format = this->i2s_comm_fmt_, | ||||||
| @@ -504,7 +550,7 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea | |||||||
| } | } | ||||||
|  |  | ||||||
| void I2SAudioSpeaker::delete_task_(size_t buffer_size) { | void I2SAudioSpeaker::delete_task_(size_t buffer_size) { | ||||||
|   this->audio_ring_buffer_.reset();  // Releases onwership of the shared_ptr |   this->audio_ring_buffer_.reset();  // Releases ownership of the shared_ptr | ||||||
|  |  | ||||||
|   if (this->data_buffer_ != nullptr) { |   if (this->data_buffer_ != nullptr) { | ||||||
|     ExternalRAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE); |     ExternalRAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE); | ||||||
|   | |||||||
| @@ -40,6 +40,9 @@ class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Comp | |||||||
|   void stop() override; |   void stop() override; | ||||||
|   void finish() override; |   void finish() override; | ||||||
|  |  | ||||||
|  |   void set_pause_state(bool pause_state) override { this->pause_state_ = pause_state; } | ||||||
|  |   bool get_pause_state() const override { return this->pause_state_; } | ||||||
|  |  | ||||||
|   /// @brief Plays the provided audio data. |   /// @brief Plays the provided audio data. | ||||||
|   /// Starts the speaker task, if necessary. Writes the audio data to the ring buffer. |   /// Starts the speaker task, if necessary. Writes the audio data to the ring buffer. | ||||||
|   /// @param data Audio data in the format set by the parent speaker classes ``set_audio_stream_info`` method. |   /// @param data Audio data in the format set by the parent speaker classes ``set_audio_stream_info`` method. | ||||||
| @@ -121,13 +124,18 @@ class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Comp | |||||||
|   uint8_t dout_pin_; |   uint8_t dout_pin_; | ||||||
|  |  | ||||||
|   bool task_created_{false}; |   bool task_created_{false}; | ||||||
|  |   bool pause_state_{false}; | ||||||
|  |  | ||||||
|   int16_t q15_volume_factor_{INT16_MAX}; |   int16_t q15_volume_factor_{INT16_MAX}; | ||||||
|  |  | ||||||
|  |   size_t bytes_written_{0}; | ||||||
|  |  | ||||||
| #if SOC_I2S_SUPPORTS_DAC | #if SOC_I2S_SUPPORTS_DAC | ||||||
|   i2s_dac_mode_t internal_dac_mode_{I2S_DAC_CHANNEL_DISABLE}; |   i2s_dac_mode_t internal_dac_mode_{I2S_DAC_CHANNEL_DISABLE}; | ||||||
| #endif | #endif | ||||||
|   i2s_comm_format_t i2s_comm_fmt_; |   i2s_comm_format_t i2s_comm_fmt_; | ||||||
|  |  | ||||||
|  |   uint32_t accumulated_frames_written_{0}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| }  // namespace i2s_audio | }  // namespace i2s_audio | ||||||
|   | |||||||
| @@ -1,7 +1,6 @@ | |||||||
| from esphome import automation | from esphome import automation | ||||||
| from esphome.automation import maybe_simple_id |  | ||||||
| import esphome.codegen as cg | import esphome.codegen as cg | ||||||
| from esphome.components import audio_dac | from esphome.components import audio, audio_dac | ||||||
| import esphome.config_validation as cv | import esphome.config_validation as cv | ||||||
| from esphome.const import CONF_DATA, CONF_ID, CONF_VOLUME | from esphome.const import CONF_DATA, CONF_ID, CONF_VOLUME | ||||||
| from esphome.core import CORE | from esphome.core import CORE | ||||||
| @@ -54,13 +53,15 @@ async def register_speaker(var, config): | |||||||
|     await setup_speaker_core_(var, config) |     await setup_speaker_core_(var, config) | ||||||
|  |  | ||||||
|  |  | ||||||
| SPEAKER_SCHEMA = cv.Schema( | SPEAKER_SCHEMA = cv.Schema.extend(audio.AUDIO_COMPONENT_SCHEMA).extend( | ||||||
|     { |     { | ||||||
|         cv.Optional(CONF_AUDIO_DAC): cv.use_id(audio_dac.AudioDac), |         cv.Optional(CONF_AUDIO_DAC): cv.use_id(audio_dac.AudioDac), | ||||||
|     } |     } | ||||||
| ) | ) | ||||||
|  |  | ||||||
| SPEAKER_AUTOMATION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(Speaker)}) | SPEAKER_AUTOMATION_SCHEMA = automation.maybe_simple_id( | ||||||
|  |     {cv.GenerateID(): cv.use_id(Speaker)} | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| async def speaker_action(config, action_id, template_arg, args): | async def speaker_action(config, action_id, template_arg, args): | ||||||
|   | |||||||
| @@ -9,6 +9,7 @@ | |||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #include "esphome/core/defines.h" | #include "esphome/core/defines.h" | ||||||
|  | #include "esphome/core/helpers.h" | ||||||
|  |  | ||||||
| #include "esphome/components/audio/audio.h" | #include "esphome/components/audio/audio.h" | ||||||
| #ifdef USE_AUDIO_DAC | #ifdef USE_AUDIO_DAC | ||||||
| @@ -56,6 +57,10 @@ class Speaker { | |||||||
|   // When finish() is not implemented on the platform component it should just do a normal stop. |   // When finish() is not implemented on the platform component it should just do a normal stop. | ||||||
|   virtual void finish() { this->stop(); } |   virtual void finish() { this->stop(); } | ||||||
|  |  | ||||||
|  |   // Pauses processing incoming audio. Needs to be implemented specifically per speaker component | ||||||
|  |   virtual void set_pause_state(bool pause_state) {} | ||||||
|  |   virtual bool get_pause_state() const { return false; } | ||||||
|  |  | ||||||
|   virtual bool has_buffered_data() const = 0; |   virtual bool has_buffered_data() const = 0; | ||||||
|  |  | ||||||
|   bool is_running() const { return this->state_ == STATE_RUNNING; } |   bool is_running() const { return this->state_ == STATE_RUNNING; } | ||||||
| @@ -95,6 +100,19 @@ class Speaker { | |||||||
|     this->audio_stream_info_ = audio_stream_info; |     this->audio_stream_info_ = audio_stream_info; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|  |   audio::AudioStreamInfo &get_audio_stream_info() { return this->audio_stream_info_; } | ||||||
|  |  | ||||||
|  |   /// Callback function for sending the duration of the audio written to the speaker since the last callback. | ||||||
|  |   /// Parameters: | ||||||
|  |   ///   - Duration in milliseconds. Never rounded and should always be less than or equal to the actual duration. | ||||||
|  |   ///   - Remainder duration in microseconds. Rounded duration after subtracting the previous parameter from the actual | ||||||
|  |   ///     duration. | ||||||
|  |   ///   - Duration of remaining, unwritten audio buffered in the speaker in milliseconds. | ||||||
|  |   ///   - System time in microseconds when the last write was completed. | ||||||
|  |   void add_audio_output_callback(std::function<void(uint32_t, uint32_t, uint32_t, uint32_t)> &&callback) { | ||||||
|  |     this->audio_output_callback_.add(std::move(callback)); | ||||||
|  |   } | ||||||
|  |  | ||||||
|  protected: |  protected: | ||||||
|   State state_{STATE_STOPPED}; |   State state_{STATE_STOPPED}; | ||||||
|   audio::AudioStreamInfo audio_stream_info_; |   audio::AudioStreamInfo audio_stream_info_; | ||||||
| @@ -104,6 +122,8 @@ class Speaker { | |||||||
| #ifdef USE_AUDIO_DAC | #ifdef USE_AUDIO_DAC | ||||||
|   audio_dac::AudioDac *audio_dac_{nullptr}; |   audio_dac::AudioDac *audio_dac_{nullptr}; | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
|  |   CallbackManager<void(uint32_t, uint32_t, uint32_t, uint32_t)> audio_output_callback_{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| }  // namespace speaker | }  // namespace speaker | ||||||
|   | |||||||
| @@ -128,6 +128,7 @@ lib_deps = | |||||||
|     DNSServer                            ; captive_portal (Arduino built-in) |     DNSServer                            ; captive_portal (Arduino built-in) | ||||||
|     esphome/ESP32-audioI2S@2.0.7         ; i2s_audio |     esphome/ESP32-audioI2S@2.0.7         ; i2s_audio | ||||||
|     droscy/esp_wireguard@0.4.2           ; wireguard |     droscy/esp_wireguard@0.4.2           ; wireguard | ||||||
|  |     esphome/esp-audio-libs@1.1.1         ; audio | ||||||
|  |  | ||||||
| build_flags = | build_flags = | ||||||
|     ${common:arduino.build_flags} |     ${common:arduino.build_flags} | ||||||
| @@ -148,6 +149,7 @@ lib_deps = | |||||||
|     ${common:idf.lib_deps} |     ${common:idf.lib_deps} | ||||||
|     droscy/esp_wireguard@0.4.2              ; wireguard |     droscy/esp_wireguard@0.4.2              ; wireguard | ||||||
|     kahrendt/ESPMicroSpeechFeatures@1.1.0   ; micro_wake_word |     kahrendt/ESPMicroSpeechFeatures@1.1.0   ; micro_wake_word | ||||||
|  |     esphome/esp-audio-libs@1.1.1            ; audio | ||||||
| build_flags = | build_flags = | ||||||
|     ${common:idf.build_flags} |     ${common:idf.build_flags} | ||||||
|     -Wno-nonnull-compare |     -Wno-nonnull-compare | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user