Merge branch 'dev' into vornado-ir

2025-10-31 15:12:06 +00:00 · 2025-02-04 16:56:37 -08:00
parent fff4bc21b7 1215d2ffeb
commit d9b8a7bc17
1210 changed files with 12002 additions and 20523 deletions
--- a/.github/workflows/ci-docker.yml
+++ b/.github/workflows/ci-docker.yml
@@ -37,7 +37,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        arch: [amd64, armv7, aarch64]
+        arch: [amd64, aarch64]
        build_type: ["ha-addon", "docker", "lint"]
    steps:
      - uses: actions/checkout@v4.1.7
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -80,7 +80,6 @@ jobs:
      matrix:
        platform:
          - linux/amd64
          - linux/arm/v7
          - linux/arm64
    steps:
      - uses: actions/checkout@v4.1.7
--- a/3
+++ b/3
@@ -277,6 +277,7 @@ esphome/components/mics_4514/* @jesserockz
 esphome/components/midea/* @dudanov
 esphome/components/midea_ir/* @dudanov
 esphome/components/mitsubishi/* @RubyBailey
 esphome/components/mixer/speaker/* @kahrendt
 esphome/components/mlx90393/* @functionpointer
 esphome/components/mlx90614/* @jesserockz
 esphome/components/mmc5603/* @benhoff
@@ -343,6 +344,7 @@ esphome/components/radon_eye_rd200/* @jeffeb3
 esphome/components/rc522/* @glmnet
 esphome/components/rc522_i2c/* @glmnet
 esphome/components/rc522_spi/* @glmnet
 esphome/components/resampler/speaker/* @kahrendt
 esphome/components/restart/* @esphome/core
 esphome/components/rf_bridge/* @jesserockz
 esphome/components/rgbct/* @jesserockz
@@ -499,5 +501,6 @@ esphome/components/xiaomi_mhoc401/* @vevsvevs
 esphome/components/xiaomi_rtcgq02lm/* @jesserockz
 esphome/components/xl9535/* @mreditor97
 esphome/components/xpt2046/touchscreen/* @nielsnl68 @numo68
 esphome/components/xxtea/* @clydebarrow
 esphome/components/zhlt01/* @cfeenstra1024
 esphome/components/zio_ultrasonic/* @kahrendt
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -51,19 +51,7 @@ ENV \
  # Store globally installed pio libs in /piolibs
  PLATFORMIO_GLOBALLIB_DIR=/piolibs
 # Support legacy binaries on Debian multiarch system. There is no "correct" way
 # to do this, other than using properly built toolchains...
 # See: https://unix.stackexchange.com/questions/553743/correct-way-to-add-lib-ld-linux-so-3-in-debian
 RUN \
    if [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]; then \
        ln -s /lib/arm-linux-gnueabihf/ld-linux-armhf.so.3 /lib/ld-linux.so.3; \
    fi
 RUN \
    # Ubuntu python3-pip is missing wheel
    if [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]; then \
        export PIP_EXTRA_INDEX_URL="https://www.piwheels.org/simple"; \
    fi; \
    pip3 install \
    --break-system-packages --no-cache-dir \
    # Keep platformio version in sync with requirements.txt
@@ -82,14 +70,6 @@ RUN --mount=type=tmpfs,target=/root/.cargo <<END-OF-RUN
 # Fail on any non-zero status
 set -e
 if [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]
 then
    curl -L https://www.piwheels.org/cp311/cryptography-43.0.0-cp37-abi3-linux_armv7l.whl -o /tmp/cryptography-43.0.0-cp37-abi3-linux_armv7l.whl
    pip3 install --break-system-packages --no-cache-dir /tmp/cryptography-43.0.0-cp37-abi3-linux_armv7l.whl
    rm /tmp/cryptography-43.0.0-cp37-abi3-linux_armv7l.whl
    export PIP_EXTRA_INDEX_URL="https://www.piwheels.org/simple";
 fi
 # install build tools in case wheels are not available
 BUILD_DEPS="
    build-essential=12.9
@@ -106,7 +86,7 @@ LIB_DEPS="
    libtiff6=4.5.0-6+deb12u1
    libopenjp2-7=2.5.0-2
 "
-if [ "$TARGETARCH$TARGETVARIANT" = "arm64" ] || [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]
+if [ "$TARGETARCH$TARGETVARIANT" = "arm64" ]
 then
    apt-get update
    apt-get install -y --no-install-recommends $BUILD_DEPS $LIB_DEPS
@@ -115,7 +95,7 @@ fi
 CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse CARGO_HOME=/root/.cargo
 pip3 install --break-system-packages --no-cache-dir -r /requirements.txt -r /requirements_optional.txt
-if [ "$TARGETARCH$TARGETVARIANT" = "arm64" ] || [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]
+if [ "$TARGETARCH$TARGETVARIANT" = "arm64" ]
 then
    apt-get remove -y --purge --auto-remove $BUILD_DEPS
    rm -rf /tmp/* /var/{cache,log}/* /var/lib/apt/lists/*
@@ -135,11 +115,7 @@ FROM base AS docker
 # Copy esphome and install
 COPY . /esphome
-RUN if [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]; then \
+RUN pip3 install --break-system-packages --no-cache-dir -e /esphome
        export PIP_EXTRA_INDEX_URL="https://www.piwheels.org/simple"; \
  fi; \
  pip3 install \
  --break-system-packages --no-cache-dir -e /esphome
 # Settings for dashboard
 ENV USERNAME="" PASSWORD=""
@@ -197,11 +173,7 @@ COPY docker/ha-addon-rootfs/ /
 # Copy esphome and install
 COPY . /esphome
-RUN if [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]; then \
+RUN pip3 install --break-system-packages --no-cache-dir -e /esphome
        export PIP_EXTRA_INDEX_URL="https://www.piwheels.org/simple"; \
  fi; \
  pip3 install \
  --break-system-packages --no-cache-dir -e /esphome
 # Labels
 LABEL \
@@ -232,21 +204,14 @@ RUN \
        nano=7.2-1+deb12u1 \
        build-essential=12.9 \
        python3-dev=3.11.2-1+b1 \
-    && if [ "$TARGETARCH$TARGETVARIANT" != "armv7" ]; then \
+        clang-tidy-18=1:18.1.8~++20240731024826+3b5b5c1ec4a3-1~exp1~20240731144843.145 \
-    # move this up after armv7 is retired
+    && rm -rf \
    apt-get install -y --no-install-recommends clang-tidy-18=1:18.1.8~++20240731024826+3b5b5c1ec4a3-1~exp1~20240731144843.145 ; \
    fi; \
    rm -rf \
        /tmp/* \
        /var/{cache,log}/* \
        /var/lib/apt/lists/*
 COPY requirements_test.txt /
-RUN if [ "$TARGETARCH$TARGETVARIANT" = "armv7" ]; then \
+RUN pip3 install --break-system-packages --no-cache-dir -r /requirements_test.txt
        export PIP_EXTRA_INDEX_URL="https://www.piwheels.org/simple"; \
  fi; \
  pip3 install \
  --break-system-packages --no-cache-dir -r /requirements_test.txt
 VOLUME ["/esphome"]
 WORKDIR /esphome
--- a/docker/build.py
+++ b/docker/build.py
@@ -1,22 +1,19 @@
 #!/usr/bin/env python3
 from dataclasses import dataclass
 import subprocess
 import argparse
-from platform import machine
+from dataclasses import dataclass
 import shlex
 import re
 import shlex
 import subprocess
 import sys
 CHANNEL_DEV = "dev"
 CHANNEL_BETA = "beta"
 CHANNEL_RELEASE = "release"
 CHANNELS = [CHANNEL_DEV, CHANNEL_BETA, CHANNEL_RELEASE]
 ARCH_AMD64 = "amd64"
 ARCH_ARMV7 = "armv7"
 ARCH_AARCH64 = "aarch64"
-ARCHS = [ARCH_AMD64, ARCH_ARMV7, ARCH_AARCH64]
+ARCHS = [ARCH_AMD64, ARCH_AARCH64]
 TYPE_DOCKER = "docker"
 TYPE_HA_ADDON = "ha-addon"
@@ -76,7 +73,6 @@ class DockerParams:
        }[build_type]
        platform = {
            ARCH_AMD64: "linux/amd64",
            ARCH_ARMV7: "linux/arm/v7",
            ARCH_AARCH64: "linux/arm64",
        }[arch]
        target = {
--- a/esphome/components/audio/init.py
+++ b/esphome/components/audio/init.py
@@ -1,9 +1,121 @@
 import esphome.codegen as cg
 import esphome.config_validation as cv
 from esphome.const import CONF_BITS_PER_SAMPLE, CONF_NUM_CHANNELS, CONF_SAMPLE_RATE
 import esphome.final_validate as fv
 CODEOWNERS = ["@kahrendt"]
 audio_ns = cg.esphome_ns.namespace("audio")
 AudioFile = audio_ns.struct("AudioFile")
 AudioFileType = audio_ns.enum("AudioFileType", is_class=True)
 AUDIO_FILE_TYPE_ENUM = {
    "NONE": AudioFileType.NONE,
    "WAV": AudioFileType.WAV,
    "MP3": AudioFileType.MP3,
    "FLAC": AudioFileType.FLAC,
 }
 CONF_MIN_BITS_PER_SAMPLE = "min_bits_per_sample"
 CONF_MAX_BITS_PER_SAMPLE = "max_bits_per_sample"
 CONF_MIN_CHANNELS = "min_channels"
 CONF_MAX_CHANNELS = "max_channels"
 CONF_MIN_SAMPLE_RATE = "min_sample_rate"
 CONF_MAX_SAMPLE_RATE = "max_sample_rate"
 CONFIG_SCHEMA = cv.All(
    cv.Schema({}),
 )
 AUDIO_COMPONENT_SCHEMA = cv.Schema(
    {
        cv.Optional(CONF_BITS_PER_SAMPLE): cv.int_range(8, 32),
        cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2),
        cv.Optional(CONF_SAMPLE_RATE): cv.int_range(8000, 48000),
    }
 )
 _UNDEF = object()
 def set_stream_limits(
    min_bits_per_sample: int = _UNDEF,
    max_bits_per_sample: int = _UNDEF,
    min_channels: int = _UNDEF,
    max_channels: int = _UNDEF,
    min_sample_rate: int = _UNDEF,
    max_sample_rate: int = _UNDEF,
 ):
    def set_limits_in_config(config):
        if min_bits_per_sample is not _UNDEF:
            config[CONF_MIN_BITS_PER_SAMPLE] = min_bits_per_sample
        if max_bits_per_sample is not _UNDEF:
            config[CONF_MAX_BITS_PER_SAMPLE] = max_bits_per_sample
        if min_channels is not _UNDEF:
            config[CONF_MIN_CHANNELS] = min_channels
        if max_channels is not _UNDEF:
            config[CONF_MAX_CHANNELS] = max_channels
        if min_sample_rate is not _UNDEF:
            config[CONF_MIN_SAMPLE_RATE] = min_sample_rate
        if max_sample_rate is not _UNDEF:
            config[CONF_MAX_SAMPLE_RATE] = max_sample_rate
    return set_limits_in_config
 def final_validate_audio_schema(
    name: str,
    *,
    audio_device: str,
    bits_per_sample: int,
    channels: int,
    sample_rate: int,
 ):
    def validate_audio_compatiblity(audio_config):
        audio_schema = {}
        try:
            cv.int_range(
                min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE),
                max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE),
            )(bits_per_sample)
        except cv.Invalid as exc:
            raise cv.Invalid(
                f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}"
            ) from exc
        try:
            cv.int_range(
                min=audio_config.get(CONF_MIN_CHANNELS),
                max=audio_config.get(CONF_MAX_CHANNELS),
            )(channels)
        except cv.Invalid as exc:
            raise cv.Invalid(
                f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}"
            ) from exc
        try:
            cv.int_range(
                min=audio_config.get(CONF_MIN_SAMPLE_RATE),
                max=audio_config.get(CONF_MAX_SAMPLE_RATE),
            )(sample_rate)
            return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config)
        except cv.Invalid as exc:
            raise cv.Invalid(
                f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}"
            ) from exc
    return cv.Schema(
        {
            cv.Required(audio_device): fv.id_declaration_match_schema(
                validate_audio_compatiblity
            )
        },
        extra=cv.ALLOW_EXTRA,
    )
 async def to_code(config):
    cg.add_library("esphome/esp-audio-libs", "1.1.1")
--- a/esphome/components/audio/audio.cpp
+++ b/esphome/components/audio/audio.cpp
@@ -0,0 +1,67 @@
 #include "audio.h"
 namespace esphome {
 namespace audio {
 // Euclidean's algorithm for finding the greatest common divisor
 static uint32_t gcd(uint32_t a, uint32_t b) {
  while (b != 0) {
    uint32_t t = b;
    b = a % b;
    a = t;
  }
  return a;
 }
 AudioStreamInfo::AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate)
    : bits_per_sample_(bits_per_sample), channels_(channels), sample_rate_(sample_rate) {
  this->ms_sample_rate_gcd_ = gcd(1000, this->sample_rate_);
  this->bytes_per_sample_ = (this->bits_per_sample_ + 7) / 8;
 }
 uint32_t AudioStreamInfo::frames_to_microseconds(uint32_t frames) const {
  return (frames * 1000000 + (this->sample_rate_ >> 1)) / this->sample_rate_;
 }
 uint32_t AudioStreamInfo::frames_to_milliseconds_with_remainder(uint32_t *total_frames) const {
  uint32_t unprocessable_frames = *total_frames % (this->sample_rate_ / this->ms_sample_rate_gcd_);
  uint32_t frames_for_ms_calculation = *total_frames - unprocessable_frames;
  uint32_t playback_ms = (frames_for_ms_calculation * 1000) / this->sample_rate_;
  *total_frames = unprocessable_frames;
  return playback_ms;
 }
 bool AudioStreamInfo::operator==(const AudioStreamInfo &rhs) const {
  return (this->bits_per_sample_ == rhs.get_bits_per_sample()) && (this->channels_ == rhs.get_channels()) &&
         (this->sample_rate_ == rhs.get_sample_rate());
 }
 const char *audio_file_type_to_string(AudioFileType file_type) {
  switch (file_type) {
 #ifdef USE_AUDIO_FLAC_SUPPORT
    case AudioFileType::FLAC:
      return "FLAC";
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
    case AudioFileType::MP3:
      return "MP3";
 #endif
    case AudioFileType::WAV:
      return "WAV";
    default:
      return "unknown";
  }
 }
 void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
                         size_t samples_to_scale) {
  // Note the assembly dsps_mulc function has audio glitches if the input and output buffers are the same.
  for (int i = 0; i < samples_to_scale; i++) {
    int32_t acc = (int32_t) audio_samples[i] * (int32_t) scale_factor;
    output_buffer[i] = (int16_t) (acc >> 15);
  }
 }
 }  // namespace audio
 }  // namespace esphome
--- a/esphome/components/audio/audio.h
+++ b/esphome/components/audio/audio.h
@@ -1,21 +1,139 @@
 #pragma once
 #include "esphome/core/defines.h"
 #include <cstddef>
 #include <cstdint>
 namespace esphome {
 namespace audio {
-struct AudioStreamInfo {
+class AudioStreamInfo {
-  bool operator==(const AudioStreamInfo &rhs) const {
+  /* Class to respresent important parameters of the audio stream that also provides helper function to convert between
-    return (channels == rhs.channels) && (bits_per_sample == rhs.bits_per_sample) && (sample_rate == rhs.sample_rate);
+   * various audio related units.
   *
   *  - An audio sample represents a unit of audio for one channel.
   *  - A frame represents a unit of audio with a sample for every channel.
   *
   * In gneneral, converting between bytes, samples, and frames shouldn't result in rounding errors so long as frames
   * are used as the main unit when transferring audio data. Durations may result in rounding for certain sample rates;
   * e.g., 44.1 KHz. The ``frames_to_milliseconds_with_remainder`` function should be used for accuracy, as it takes
   * into account the remainder rather than just ignoring any rounding.
   */
 public:
  AudioStreamInfo()
      : AudioStreamInfo(16, 1, 16000){};  // Default values represent ESPHome's audio components historical values
  AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate);
  uint8_t get_bits_per_sample() const { return this->bits_per_sample_; }
  uint8_t get_channels() const { return this->channels_; }
  uint32_t get_sample_rate() const { return this->sample_rate_; }
  /// @brief Convert bytes to duration in milliseconds.
  /// @param bytes Number of bytes to convert
  /// @return Duration in milliseconds that will store `bytes` bytes of audio. May round down for certain sample rates
  ///         or values of `bytes`.
  uint32_t bytes_to_ms(size_t bytes) const {
    return bytes * 1000 / (this->sample_rate_ * this->bytes_per_sample_ * this->channels_);
  }
  /// @brief Convert bytes to frames.
  /// @param bytes Number of bytes to convert
  /// @return Audio frames that will store `bytes` bytes.
  uint32_t bytes_to_frames(size_t bytes) const { return (bytes / (this->bytes_per_sample_ * this->channels_)); }
  /// @brief Convert bytes to samples.
  /// @param bytes Number of bytes to convert
  /// @return Audio samples that will store `bytes` bytes.
  uint32_t bytes_to_samples(size_t bytes) const { return (bytes / this->bytes_per_sample_); }
  /// @brief Converts frames to bytes.
  /// @param frames Number of frames to convert.
  /// @return Number of bytes that will store `frames` frames of audio.
  size_t frames_to_bytes(uint32_t frames) const { return frames * this->bytes_per_sample_ * this->channels_; }
  /// @brief Converts samples to bytes.
  /// @param samples Number of samples to convert.
  /// @return Number of bytes that will store `samples` samples of audio.
  size_t samples_to_bytes(uint32_t samples) const { return samples * this->bytes_per_sample_; }
  /// @brief Converts duration to frames.
  /// @param ms Duration in milliseconds
  /// @return Audio frames that will store `ms` milliseconds of audio.  May round down for certain sample rates.
  uint32_t ms_to_frames(uint32_t ms) const { return (ms * this->sample_rate_) / 1000; }
  /// @brief Converts duration to samples.
  /// @param ms Duration in milliseconds
  /// @return Audio samples that will store `ms` milliseconds of audio.  May round down for certain sample rates.
  uint32_t ms_to_samples(uint32_t ms) const { return (ms * this->channels_ * this->sample_rate_) / 1000; }
  /// @brief Converts duration to bytes. May round down for certain sample rates.
  /// @param ms Duration in milliseconds
  /// @return Bytes that will store `ms` milliseconds of audio.  May round down for certain sample rates.
  size_t ms_to_bytes(uint32_t ms) const {
    return (ms * this->bytes_per_sample_ * this->channels_ * this->sample_rate_) / 1000;
  }
  /// @brief Computes the duration, in microseconds, the given amount of frames represents.
  /// @param frames Number of audio frames
  /// @return Duration in microseconds `frames` respresents. May be slightly inaccurate due to integer divison rounding
  ///         for certain sample rates.
  uint32_t frames_to_microseconds(uint32_t frames) const;
  /// @brief Computes the duration, in milliseconds, the given amount of frames represents. Avoids
  /// accumulating rounding errors by updating `frames` with the remainder after converting.
  /// @param frames Pointer to uint32_t with the number of audio frames. Replaced with the remainder.
  /// @return Duration in milliseconds `frames` represents. Always less than or equal to the actual value due to
  ///         rounding.
  uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const;
  // Class comparison operators
  bool operator==(const AudioStreamInfo &rhs) const;
  bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); }
-  size_t get_bytes_per_sample() const { return bits_per_sample / 8; }
+
-  uint8_t channels = 1;
+ protected:
-  uint8_t bits_per_sample = 16;
+  uint8_t bits_per_sample_;
-  uint32_t sample_rate = 16000;
+  uint8_t channels_;
  uint32_t sample_rate_;
  // The greatest common divisor between 1000 ms = 1 second and the sample rate. Used to avoid accumulating error when
  // converting from frames to duration. Computed at construction.
  uint32_t ms_sample_rate_gcd_;
  // Conversion factor derived from the number of bits per sample. Assumes audio data is aligned to the byte. Computed
  // at construction.
  size_t bytes_per_sample_;
 };
 enum class AudioFileType : uint8_t {
  NONE = 0,
 #ifdef USE_AUDIO_FLAC_SUPPORT
  FLAC,
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
  MP3,
 #endif
  WAV,
 };
 struct AudioFile {
  const uint8_t *data;
  size_t length;
  AudioFileType file_type;
 };
 /// @brief Helper function to convert file type to a const char string
 /// @param file_type
 /// @return const char pointer to the readable file type
 const char *audio_file_type_to_string(AudioFileType file_type);
 /// @brief Scales Q15 fixed point audio samples. Scales in place if audio_samples == output_buffer.
 /// @param audio_samples PCM int16 audio samples
 /// @param output_buffer Buffer to store the scaled samples
 /// @param scale_factor Q15 fixed point scaling factor
 /// @param samples_to_scale Number of samples to scale
 void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
                         size_t samples_to_scale);
 }  // namespace audio
 }  // namespace esphome
--- a/esphome/components/audio/audio_decoder.cpp
+++ b/esphome/components/audio/audio_decoder.cpp
@@ -0,0 +1,361 @@
 #include "audio_decoder.h"
 #ifdef USE_ESP32
 #include "esphome/core/hal.h"
 namespace esphome {
 namespace audio {
 static const uint32_t DECODING_TIMEOUT_MS = 50;    // The decode function will yield after this duration
 static const uint32_t READ_WRITE_TIMEOUT_MS = 20;  // Timeout for transferring audio data
 static const uint32_t MAX_POTENTIALLY_FAILED_COUNT = 10;
 AudioDecoder::AudioDecoder(size_t input_buffer_size, size_t output_buffer_size) {
  this->input_transfer_buffer_ = AudioSourceTransferBuffer::create(input_buffer_size);
  this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
 }
 AudioDecoder::~AudioDecoder() {
 #ifdef USE_AUDIO_MP3_SUPPORT
  if (this->audio_file_type_ == AudioFileType::MP3) {
    esp_audio_libs::helix_decoder::MP3FreeDecoder(this->mp3_decoder_);
  }
 #endif
 }
 esp_err_t AudioDecoder::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
  if (this->input_transfer_buffer_ != nullptr) {
    this->input_transfer_buffer_->set_source(input_ring_buffer);
    return ESP_OK;
  }
  return ESP_ERR_NO_MEM;
 }
 esp_err_t AudioDecoder::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
  if (this->output_transfer_buffer_ != nullptr) {
    this->output_transfer_buffer_->set_sink(output_ring_buffer);
    return ESP_OK;
  }
  return ESP_ERR_NO_MEM;
 }
 #ifdef USE_SPEAKER
 esp_err_t AudioDecoder::add_sink(speaker::Speaker *speaker) {
  if (this->output_transfer_buffer_ != nullptr) {
    this->output_transfer_buffer_->set_sink(speaker);
    return ESP_OK;
  }
  return ESP_ERR_NO_MEM;
 }
 #endif
 esp_err_t AudioDecoder::start(AudioFileType audio_file_type) {
  if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
    return ESP_ERR_NO_MEM;
  }
  this->audio_file_type_ = audio_file_type;
  this->potentially_failed_count_ = 0;
  this->end_of_file_ = false;
  switch (this->audio_file_type_) {
 #ifdef USE_AUDIO_FLAC_SUPPORT
    case AudioFileType::FLAC:
      this->flac_decoder_ = make_unique<esp_audio_libs::flac::FLACDecoder>();
      this->free_buffer_required_ =
          this->output_transfer_buffer_->capacity();  // We'll revise this after reading the header
      break;
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
    case AudioFileType::MP3:
      this->mp3_decoder_ = esp_audio_libs::helix_decoder::MP3InitDecoder();
      this->free_buffer_required_ = 1152 * sizeof(int16_t) * 2;  // samples * size per sample * channels
      break;
 #endif
    case AudioFileType::WAV:
      this->wav_decoder_ = make_unique<esp_audio_libs::wav_decoder::WAVDecoder>();
      this->wav_decoder_->reset();
      this->free_buffer_required_ = 1024;
      break;
    case AudioFileType::NONE:
    default:
      return ESP_ERR_NOT_SUPPORTED;
      break;
  }
  return ESP_OK;
 }
 AudioDecoderState AudioDecoder::decode(bool stop_gracefully) {
  if (stop_gracefully) {
    if (this->output_transfer_buffer_->available() == 0) {
      if (this->end_of_file_) {
        // The file decoder indicates it reached the end of file
        return AudioDecoderState::FINISHED;
      }
      if (!this->input_transfer_buffer_->has_buffered_data()) {
        // If all the internal buffers are empty, the decoding is done
        return AudioDecoderState::FINISHED;
      }
    }
  }
  if (this->potentially_failed_count_ > MAX_POTENTIALLY_FAILED_COUNT) {
    if (stop_gracefully) {
      // No more new data is going to come in, so decoding is done
      return AudioDecoderState::FINISHED;
    }
    return AudioDecoderState::FAILED;
  }
  FileDecoderState state = FileDecoderState::MORE_TO_PROCESS;
  uint32_t decoding_start = millis();
  while (state == FileDecoderState::MORE_TO_PROCESS) {
    // Transfer decoded out
    if (!this->pause_output_) {
      size_t bytes_written = this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
      if (this->audio_stream_info_.has_value()) {
        this->accumulated_frames_written_ += this->audio_stream_info_.value().bytes_to_frames(bytes_written);
        this->playback_ms_ +=
            this->audio_stream_info_.value().frames_to_milliseconds_with_remainder(&this->accumulated_frames_written_);
      }
    } else {
      // If paused, block to avoid wasting CPU resources
      delay(READ_WRITE_TIMEOUT_MS);
    }
    // Verify there is enough space to store more decoded audio and that the function hasn't been running too long
    if ((this->output_transfer_buffer_->free() < this->free_buffer_required_) ||
        (millis() - decoding_start > DECODING_TIMEOUT_MS)) {
      return AudioDecoderState::DECODING;
    }
    // Decode more audio
    size_t bytes_read = this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
    if ((this->potentially_failed_count_ > 0) && (bytes_read == 0)) {
      // Failed to decode in last attempt and there is no new data
      if (this->input_transfer_buffer_->free() == 0) {
        // The input buffer is full. Since it previously failed on the exact same data, we can never recover
        state = FileDecoderState::FAILED;
      } else {
        // Attempt to get more data next time
        state = FileDecoderState::IDLE;
      }
    } else if (this->input_transfer_buffer_->available() == 0) {
      // No data to decode, attempt to get more data next time
      state = FileDecoderState::IDLE;
    } else {
      switch (this->audio_file_type_) {
 #ifdef USE_AUDIO_FLAC_SUPPORT
        case AudioFileType::FLAC:
          state = this->decode_flac_();
          break;
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
        case AudioFileType::MP3:
          state = this->decode_mp3_();
          break;
 #endif
        case AudioFileType::WAV:
          state = this->decode_wav_();
          break;
        case AudioFileType::NONE:
        default:
          state = FileDecoderState::IDLE;
          break;
      }
    }
    if (state == FileDecoderState::POTENTIALLY_FAILED) {
      ++this->potentially_failed_count_;
    } else if (state == FileDecoderState::END_OF_FILE) {
      this->end_of_file_ = true;
    } else if (state == FileDecoderState::FAILED) {
      return AudioDecoderState::FAILED;
    } else if (state == FileDecoderState::MORE_TO_PROCESS) {
      this->potentially_failed_count_ = 0;
    }
  }
  return AudioDecoderState::DECODING;
 }
 #ifdef USE_AUDIO_FLAC_SUPPORT
 FileDecoderState AudioDecoder::decode_flac_() {
  if (!this->audio_stream_info_.has_value()) {
    // Header hasn't been read
    auto result = this->flac_decoder_->read_header(this->input_transfer_buffer_->get_buffer_start(),
                                                   this->input_transfer_buffer_->available());
    if (result == esp_audio_libs::flac::FLAC_DECODER_HEADER_OUT_OF_DATA) {
      return FileDecoderState::POTENTIALLY_FAILED;
    }
    if (result != esp_audio_libs::flac::FLAC_DECODER_SUCCESS) {
      // Couldn't read FLAC header
      return FileDecoderState::FAILED;
    }
    size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
    this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
    this->free_buffer_required_ = flac_decoder_->get_output_buffer_size_bytes();
    if (this->output_transfer_buffer_->capacity() < this->free_buffer_required_) {
      // Output buffer is not big enough
      if (!this->output_transfer_buffer_->reallocate(this->free_buffer_required_)) {
        // Couldn't reallocate output buffer
        return FileDecoderState::FAILED;
      }
    }
    this->audio_stream_info_ =
        audio::AudioStreamInfo(this->flac_decoder_->get_sample_depth(), this->flac_decoder_->get_num_channels(),
                               this->flac_decoder_->get_sample_rate());
    return FileDecoderState::MORE_TO_PROCESS;
  }
  uint32_t output_samples = 0;
  auto result = this->flac_decoder_->decode_frame(
      this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available(),
      reinterpret_cast<int16_t *>(this->output_transfer_buffer_->get_buffer_end()), &output_samples);
  if (result == esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
    // Not an issue, just needs more data that we'll get next time.
    return FileDecoderState::POTENTIALLY_FAILED;
  }
  size_t bytes_consumed = this->flac_decoder_->get_bytes_index();
  this->input_transfer_buffer_->decrease_buffer_length(bytes_consumed);
  if (result > esp_audio_libs::flac::FLAC_DECODER_ERROR_OUT_OF_DATA) {
    // Corrupted frame, don't retry with current buffer content, wait for new sync
    return FileDecoderState::POTENTIALLY_FAILED;
  }
  // We have successfully decoded some input data and have new output data
  this->output_transfer_buffer_->increase_buffer_length(
      this->audio_stream_info_.value().samples_to_bytes(output_samples));
  if (result == esp_audio_libs::flac::FLAC_DECODER_NO_MORE_FRAMES) {
    return FileDecoderState::END_OF_FILE;
  }
  return FileDecoderState::MORE_TO_PROCESS;
 }
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
 FileDecoderState AudioDecoder::decode_mp3_() {
  // Look for the next sync word
  int buffer_length = (int) this->input_transfer_buffer_->available();
  int32_t offset =
      esp_audio_libs::helix_decoder::MP3FindSyncWord(this->input_transfer_buffer_->get_buffer_start(), buffer_length);
  if (offset < 0) {
    // New data may have the sync word
    this->input_transfer_buffer_->decrease_buffer_length(buffer_length);
    return FileDecoderState::POTENTIALLY_FAILED;
  }
  // Advance read pointer to match the offset for the syncword
  this->input_transfer_buffer_->decrease_buffer_length(offset);
  uint8_t *buffer_start = this->input_transfer_buffer_->get_buffer_start();
  buffer_length = (int) this->input_transfer_buffer_->available();
  int err = esp_audio_libs::helix_decoder::MP3Decode(this->mp3_decoder_, &buffer_start, &buffer_length,
                                                     (int16_t *) this->output_transfer_buffer_->get_buffer_end(), 0);
  size_t consumed = this->input_transfer_buffer_->available() - buffer_length;
  this->input_transfer_buffer_->decrease_buffer_length(consumed);
  if (err) {
    switch (err) {
      case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
        // Intentional fallthrough
      case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
        return FileDecoderState::FAILED;
        break;
      default:
        // Most errors are recoverable by moving on to the next frame, so mark as potentailly failed
        return FileDecoderState::POTENTIALLY_FAILED;
        break;
    }
  } else {
    esp_audio_libs::helix_decoder::MP3FrameInfo mp3_frame_info;
    esp_audio_libs::helix_decoder::MP3GetLastFrameInfo(this->mp3_decoder_, &mp3_frame_info);
    if (mp3_frame_info.outputSamps > 0) {
      int bytes_per_sample = (mp3_frame_info.bitsPerSample / 8);
      this->output_transfer_buffer_->increase_buffer_length(mp3_frame_info.outputSamps * bytes_per_sample);
      if (!this->audio_stream_info_.has_value()) {
        this->audio_stream_info_ =
            audio::AudioStreamInfo(mp3_frame_info.bitsPerSample, mp3_frame_info.nChans, mp3_frame_info.samprate);
      }
    }
  }
  return FileDecoderState::MORE_TO_PROCESS;
 }
 #endif
 FileDecoderState AudioDecoder::decode_wav_() {
  if (!this->audio_stream_info_.has_value()) {
    // Header hasn't been processed
    esp_audio_libs::wav_decoder::WAVDecoderResult result = this->wav_decoder_->decode_header(
        this->input_transfer_buffer_->get_buffer_start(), this->input_transfer_buffer_->available());
    if (result == esp_audio_libs::wav_decoder::WAV_DECODER_SUCCESS_IN_DATA) {
      this->input_transfer_buffer_->decrease_buffer_length(this->wav_decoder_->bytes_processed());
      this->audio_stream_info_ = audio::AudioStreamInfo(
          this->wav_decoder_->bits_per_sample(), this->wav_decoder_->num_channels(), this->wav_decoder_->sample_rate());
      this->wav_bytes_left_ = this->wav_decoder_->chunk_bytes_left();
      this->wav_has_known_end_ = (this->wav_bytes_left_ > 0);
      return FileDecoderState::MORE_TO_PROCESS;
    } else if (result == esp_audio_libs::wav_decoder::WAV_DECODER_WARNING_INCOMPLETE_DATA) {
      // Available data didn't have the full header
      return FileDecoderState::POTENTIALLY_FAILED;
    } else {
      return FileDecoderState::FAILED;
    }
  } else {
    if (!this->wav_has_known_end_ || (this->wav_bytes_left_ > 0)) {
      size_t bytes_to_copy = this->input_transfer_buffer_->available();
      if (this->wav_has_known_end_) {
        bytes_to_copy = std::min(bytes_to_copy, this->wav_bytes_left_);
      }
      bytes_to_copy = std::min(bytes_to_copy, this->output_transfer_buffer_->free());
      if (bytes_to_copy > 0) {
        std::memcpy(this->output_transfer_buffer_->get_buffer_end(), this->input_transfer_buffer_->get_buffer_start(),
                    bytes_to_copy);
        this->input_transfer_buffer_->decrease_buffer_length(bytes_to_copy);
        this->output_transfer_buffer_->increase_buffer_length(bytes_to_copy);
        if (this->wav_has_known_end_) {
          this->wav_bytes_left_ -= bytes_to_copy;
        }
      }
      return FileDecoderState::IDLE;
    }
  }
  return FileDecoderState::END_OF_FILE;
 }
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/audio/audio_decoder.h
+++ b/esphome/components/audio/audio_decoder.h
@@ -0,0 +1,135 @@
 #pragma once
 #ifdef USE_ESP32
 #include "audio.h"
 #include "audio_transfer_buffer.h"
 #include "esphome/core/defines.h"
 #include "esphome/core/helpers.h"
 #include "esphome/core/ring_buffer.h"
 #ifdef USE_SPEAKER
 #include "esphome/components/speaker/speaker.h"
 #endif
 #include "esp_err.h"
 // esp-audio-libs
 #ifdef USE_AUDIO_FLAC_SUPPORT
 #include <flac_decoder.h>
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
 #include <mp3_decoder.h>
 #endif
 #include <wav_decoder.h>
 namespace esphome {
 namespace audio {
 enum class AudioDecoderState : uint8_t {
  DECODING = 0,  // More data is available to decode
  FINISHED,      // All file data has been decoded and transferred
  FAILED,        // Encountered an error
 };
 // Only used within the AudioDecoder class; conveys the state of the particular file type decoder
 enum class FileDecoderState : uint8_t {
  MORE_TO_PROCESS,     // Successsfully read a file chunk and more data is available to decode
  IDLE,                // Not enough data to decode, waiting for more to be transferred
  POTENTIALLY_FAILED,  // Decoder encountered a potentially recoverable error if more file data is available
  FAILED,              // Decoder encoutnered an uncrecoverable error
  END_OF_FILE,         // The specific file decoder knows its the end of the file
 };
 class AudioDecoder {
  /*
   * @brief Class that facilitates decoding an audio file.
   * The audio file is read from a ring buffer source, decoded, and sent to an audio sink (ring buffer or speaker
   * component).
   * Supports wav, flac, and mp3 formats.
   */
 public:
  /// @brief Allocates the input and output transfer buffers
  /// @param input_buffer_size Size of the input transfer buffer in bytes.
  /// @param output_buffer_size Size of the output transfer buffer in bytes.
  AudioDecoder(size_t input_buffer_size, size_t output_buffer_size);
  /// @brief Deallocates the MP3 decoder (the flac and wav decoders are deallocated automatically)
  ~AudioDecoder();
  /// @brief Adds a source ring buffer for raw file data. Takes ownership of the ring buffer in a shared_ptr.
  /// @param input_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership
  /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated
  esp_err_t add_source(std::weak_ptr<RingBuffer> &input_ring_buffer);
  /// @brief Adds a sink ring buffer for decoded audio. Takes ownership of the ring buffer in a shared_ptr.
  /// @param output_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership
  /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated
  esp_err_t add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer);
 #ifdef USE_SPEAKER
  /// @brief Adds a sink speaker for decoded audio.
  /// @param speaker pointer to speaker component
  /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated
  esp_err_t add_sink(speaker::Speaker *speaker);
 #endif
  /// @brief Sets up decoding the file
  /// @param audio_file_type AudioFileType of the file
  /// @return ESP_OK if successful, ESP_ERR_NO_MEM if the transfer buffers fail to allocate, or ESP_ERR_NOT_SUPPORTED if
  /// the format isn't supported.
  esp_err_t start(AudioFileType audio_file_type);
  /// @brief Decodes audio from the ring buffer source and writes to the sink.
  /// @param stop_gracefully If true, it indicates the file source is finished. The decoder will decode all the
  /// reamining data and then finish.
  /// @return AudioDecoderState
  AudioDecoderState decode(bool stop_gracefully);
  /// @brief Gets the audio stream information, if it has been decoded from the files header
  /// @return optional<AudioStreamInfo> with the audio information. If not available yet, returns no value.
  const optional<audio::AudioStreamInfo> &get_audio_stream_info() const { return this->audio_stream_info_; }
  /// @brief Returns the duration of audio (in milliseconds) decoded and sent to the sink
  /// @return Duration of decoded audio in milliseconds
  uint32_t get_playback_ms() const { return this->playback_ms_; }
  /// @brief Pauses sending resampled audio to the sink. If paused, it will continue to process internal buffers.
  /// @param pause_state If true, audio data is not sent to the sink.
  void set_pause_output_state(bool pause_state) { this->pause_output_ = pause_state; }
 protected:
  std::unique_ptr<esp_audio_libs::wav_decoder::WAVDecoder> wav_decoder_;
 #ifdef USE_AUDIO_FLAC_SUPPORT
  FileDecoderState decode_flac_();
  std::unique_ptr<esp_audio_libs::flac::FLACDecoder> flac_decoder_;
 #endif
 #ifdef USE_AUDIO_MP3_SUPPORT
  FileDecoderState decode_mp3_();
  esp_audio_libs::helix_decoder::HMP3Decoder mp3_decoder_;
 #endif
  FileDecoderState decode_wav_();
  std::unique_ptr<AudioSourceTransferBuffer> input_transfer_buffer_;
  std::unique_ptr<AudioSinkTransferBuffer> output_transfer_buffer_;
  AudioFileType audio_file_type_{AudioFileType::NONE};
  optional<AudioStreamInfo> audio_stream_info_{};
  size_t free_buffer_required_{0};
  size_t wav_bytes_left_{0};
  uint32_t potentially_failed_count_{0};
  bool end_of_file_{false};
  bool wav_has_known_end_{false};
  bool pause_output_{false};
  uint32_t accumulated_frames_written_{0};
  uint32_t playback_ms_{0};
 };
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/audio/audio_reader.cpp
+++ b/esphome/components/audio/audio_reader.cpp
@@ -0,0 +1,308 @@
 #include "audio_reader.h"
 #ifdef USE_ESP_IDF
 #include "esphome/core/defines.h"
 #include "esphome/core/hal.h"
 #include "esphome/core/helpers.h"
 #if CONFIG_MBEDTLS_CERTIFICATE_BUNDLE
 #include "esp_crt_bundle.h"
 #endif
 namespace esphome {
 namespace audio {
 static const uint32_t READ_WRITE_TIMEOUT_MS = 20;
 // The number of times the http read times out with no data before throwing an error
 static const uint32_t ERROR_COUNT_NO_DATA_READ_TIMEOUT = 100;
 static const size_t HTTP_STREAM_BUFFER_SIZE = 2048;
 static const uint8_t MAX_REDIRECTION = 5;
 // Some common HTTP status codes - borrowed from http_request component accessed 20241224
 enum HttpStatus {
  HTTP_STATUS_OK = 200,
  HTTP_STATUS_NO_CONTENT = 204,
  HTTP_STATUS_PARTIAL_CONTENT = 206,
  /* 3xx - Redirection */
  HTTP_STATUS_MULTIPLE_CHOICES = 300,
  HTTP_STATUS_MOVED_PERMANENTLY = 301,
  HTTP_STATUS_FOUND = 302,
  HTTP_STATUS_SEE_OTHER = 303,
  HTTP_STATUS_NOT_MODIFIED = 304,
  HTTP_STATUS_TEMPORARY_REDIRECT = 307,
  HTTP_STATUS_PERMANENT_REDIRECT = 308,
  /* 4XX - CLIENT ERROR */
  HTTP_STATUS_BAD_REQUEST = 400,
  HTTP_STATUS_UNAUTHORIZED = 401,
  HTTP_STATUS_FORBIDDEN = 403,
  HTTP_STATUS_NOT_FOUND = 404,
  HTTP_STATUS_METHOD_NOT_ALLOWED = 405,
  HTTP_STATUS_NOT_ACCEPTABLE = 406,
  HTTP_STATUS_LENGTH_REQUIRED = 411,
  /* 5xx - Server Error */
  HTTP_STATUS_INTERNAL_ERROR = 500
 };
 AudioReader::~AudioReader() { this->cleanup_connection_(); }
 esp_err_t AudioReader::add_sink(const std::weak_ptr<RingBuffer> &output_ring_buffer) {
  if (current_audio_file_ != nullptr) {
    // A transfer buffer isn't ncessary for a local file
    this->file_ring_buffer_ = output_ring_buffer.lock();
    return ESP_OK;
  }
  if (this->output_transfer_buffer_ != nullptr) {
    this->output_transfer_buffer_->set_sink(output_ring_buffer);
    return ESP_OK;
  }
  return ESP_ERR_INVALID_STATE;
 }
 esp_err_t AudioReader::start(AudioFile *audio_file, AudioFileType &file_type) {
  file_type = AudioFileType::NONE;
  this->current_audio_file_ = audio_file;
  this->file_current_ = audio_file->data;
  file_type = audio_file->file_type;
  return ESP_OK;
 }
 esp_err_t AudioReader::start(const std::string &uri, AudioFileType &file_type) {
  file_type = AudioFileType::NONE;
  this->cleanup_connection_();
  if (uri.empty()) {
    return ESP_ERR_INVALID_ARG;
  }
  esp_http_client_config_t client_config = {};
  client_config.url = uri.c_str();
  client_config.cert_pem = nullptr;
  client_config.disable_auto_redirect = false;
  client_config.max_redirection_count = 10;
  client_config.event_handler = http_event_handler;
  client_config.user_data = this;
  client_config.buffer_size = HTTP_STREAM_BUFFER_SIZE;
  client_config.keep_alive_enable = true;
  client_config.timeout_ms = 5000;  // Shouldn't trigger watchdog resets if caller runs in a task
 #if CONFIG_MBEDTLS_CERTIFICATE_BUNDLE
  if (uri.find("https:") != std::string::npos) {
    client_config.crt_bundle_attach = esp_crt_bundle_attach;
  }
 #endif
  this->client_ = esp_http_client_init(&client_config);
  if (this->client_ == nullptr) {
    return ESP_FAIL;
  }
  esp_err_t err = esp_http_client_open(this->client_, 0);
  if (err != ESP_OK) {
    this->cleanup_connection_();
    return err;
  }
  int64_t header_length = esp_http_client_fetch_headers(this->client_);
  if (header_length < 0) {
    this->cleanup_connection_();
    return ESP_FAIL;
  }
  int status_code = esp_http_client_get_status_code(this->client_);
  if ((status_code < HTTP_STATUS_OK) || (status_code > HTTP_STATUS_PERMANENT_REDIRECT)) {
    this->cleanup_connection_();
    return ESP_FAIL;
  }
  ssize_t redirect_count = 0;
  while ((esp_http_client_set_redirection(this->client_) == ESP_OK) && (redirect_count < MAX_REDIRECTION)) {
    err = esp_http_client_open(this->client_, 0);
    if (err != ESP_OK) {
      this->cleanup_connection_();
      return ESP_FAIL;
    }
    header_length = esp_http_client_fetch_headers(this->client_);
    if (header_length < 0) {
      this->cleanup_connection_();
      return ESP_FAIL;
    }
    status_code = esp_http_client_get_status_code(this->client_);
    if ((status_code < HTTP_STATUS_OK) || (status_code > HTTP_STATUS_PERMANENT_REDIRECT)) {
      this->cleanup_connection_();
      return ESP_FAIL;
    }
    ++redirect_count;
  }
  if (this->audio_file_type_ == AudioFileType::NONE) {
    // Failed to determine the file type from the header, fallback to using the url
    char url[500];
    err = esp_http_client_get_url(this->client_, url, 500);
    if (err != ESP_OK) {
      this->cleanup_connection_();
      return err;
    }
    std::string url_string = str_lower_case(url);
    if (str_endswith(url_string, ".wav")) {
      file_type = AudioFileType::WAV;
    }
 #ifdef USE_AUDIO_MP3_SUPPORT
    else if (str_endswith(url_string, ".mp3")) {
      file_type = AudioFileType::MP3;
    }
 #endif
 #ifdef USE_AUDIO_FLAC_SUPPORT
    else if (str_endswith(url_string, ".flac")) {
      file_type = AudioFileType::FLAC;
    }
 #endif
    else {
      file_type = AudioFileType::NONE;
      this->cleanup_connection_();
      return ESP_ERR_NOT_SUPPORTED;
    }
  } else {
    file_type = this->audio_file_type_;
  }
  this->no_data_read_count_ = 0;
  this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(this->buffer_size_);
  if (this->output_transfer_buffer_ == nullptr) {
    return ESP_ERR_NO_MEM;
  }
  return ESP_OK;
 }
 AudioReaderState AudioReader::read() {
  if (this->client_ != nullptr) {
    return this->http_read_();
  } else if (this->current_audio_file_ != nullptr) {
    return this->file_read_();
  }
  return AudioReaderState::FAILED;
 }
 AudioFileType AudioReader::get_audio_type(const char *content_type) {
 #ifdef USE_AUDIO_MP3_SUPPORT
  if (strcasecmp(content_type, "mp3") == 0 || strcasecmp(content_type, "audio/mp3") == 0 ||
      strcasecmp(content_type, "audio/mpeg") == 0) {
    return AudioFileType::MP3;
  }
 #endif
  if (strcasecmp(content_type, "audio/wav") == 0) {
    return AudioFileType::WAV;
  }
 #ifdef USE_AUDIO_FLAC_SUPPORT
  if (strcasecmp(content_type, "audio/flac") == 0 || strcasecmp(content_type, "audio/x-flac") == 0) {
    return AudioFileType::FLAC;
  }
 #endif
  return AudioFileType::NONE;
 }
 esp_err_t AudioReader::http_event_handler(esp_http_client_event_t *evt) {
  // Based on https://github.com/maroc81/WeatherLily/tree/main/main/net accessed 20241224
  AudioReader *this_reader = (AudioReader *) evt->user_data;
  switch (evt->event_id) {
    case HTTP_EVENT_ON_HEADER:
      if (strcasecmp(evt->header_key, "Content-Type") == 0) {
        this_reader->audio_file_type_ = get_audio_type(evt->header_value);
      }
      break;
    default:
      break;
  }
  return ESP_OK;
 }
 AudioReaderState AudioReader::file_read_() {
  size_t remaining_bytes = this->current_audio_file_->length - (this->file_current_ - this->current_audio_file_->data);
  if (remaining_bytes > 0) {
    size_t bytes_written = this->file_ring_buffer_->write_without_replacement(this->file_current_, remaining_bytes,
                                                                              pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
    this->file_current_ += bytes_written;
    return AudioReaderState::READING;
  }
  return AudioReaderState::FINISHED;
 }
 AudioReaderState AudioReader::http_read_() {
  this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
  if (esp_http_client_is_complete_data_received(this->client_)) {
    if (this->output_transfer_buffer_->available() == 0) {
      this->cleanup_connection_();
      return AudioReaderState::FINISHED;
    }
  } else {
    size_t bytes_to_read = this->output_transfer_buffer_->free();
    int received_len =
        esp_http_client_read(this->client_, (char *) this->output_transfer_buffer_->get_buffer_end(), bytes_to_read);
    if (received_len > 0) {
      this->output_transfer_buffer_->increase_buffer_length(received_len);
      this->no_data_read_count_ = 0;
    } else if (received_len < 0) {
      // HTTP read error
      this->cleanup_connection_();
      return AudioReaderState::FAILED;
    } else {
      if (bytes_to_read > 0) {
        // Read timed out
        ++this->no_data_read_count_;
        if (this->no_data_read_count_ >= ERROR_COUNT_NO_DATA_READ_TIMEOUT) {
          // Timed out with no data read too many times, so the http read has failed
          this->cleanup_connection_();
          return AudioReaderState::FAILED;
        }
        delay(READ_WRITE_TIMEOUT_MS);
      }
    }
  }
  return AudioReaderState::READING;
 }
 void AudioReader::cleanup_connection_() {
  if (this->client_ != nullptr) {
    esp_http_client_close(this->client_);
    esp_http_client_cleanup(this->client_);
    this->client_ = nullptr;
  }
 }
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/audio/audio_reader.h
+++ b/esphome/components/audio/audio_reader.h
@@ -0,0 +1,85 @@
 #pragma once
 #ifdef USE_ESP_IDF
 #include "audio.h"
 #include "audio_transfer_buffer.h"
 #include "esphome/core/ring_buffer.h"
 #include "esp_err.h"
 #include <esp_http_client.h>
 namespace esphome {
 namespace audio {
 enum class AudioReaderState : uint8_t {
  READING = 0,  // More data is available to read
  FINISHED,     // All data has been read and transferred
  FAILED,       // Encountered an error
 };
 class AudioReader {
  /*
   * @brief Class that facilitates reading a raw audio file.
   * Files can be read from flash (stored in a AudioFile struct) or from an http source.
   * The file data is sent to a ring buffer sink.
   */
 public:
  /// @brief Constructs an AudioReader object.
  /// The transfer buffer isn't allocated here, but only if necessary (an http source) in the start function.
  /// @param buffer_size Transfer buffer size in bytes.
  AudioReader(size_t buffer_size) : buffer_size_(buffer_size) {}
  ~AudioReader();
  /// @brief Adds a sink ring buffer for audio data. Takes ownership of the ring buffer in a shared_ptr
  /// @param output_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership
  /// @return  ESP_OK if successful, ESP_ERR_INVALID_STATE otherwise
  esp_err_t add_sink(const std::weak_ptr<RingBuffer> &output_ring_buffer);
  /// @brief Starts reading an audio file from an http source. The transfer buffer is allocated here.
  /// @param uri Web url to the http file.
  /// @param file_type AudioFileType variable passed-by-reference indicating the type of file being read.
  /// @return ESP_OK if successful, an ESP_ERR* code otherwise.
  esp_err_t start(const std::string &uri, AudioFileType &file_type);
  /// @brief Starts reading an audio file from flash. No transfer buffer is allocated.
  /// @param audio_file AudioFile struct containing the file.
  /// @param file_type AudioFileType variable passed-by-reference indicating the type of file being read.
  /// @return ESP_OK
  esp_err_t start(AudioFile *audio_file, AudioFileType &file_type);
  /// @brief Reads new file data from the source and sends to the ring buffer sink.
  /// @return AudioReaderState
  AudioReaderState read();
 protected:
  /// @brief Monitors the http client events to attempt determining the file type from the Content-Type header
  static esp_err_t http_event_handler(esp_http_client_event_t *evt);
  /// @brief Determines the audio file type from the http header's Content-Type key
  /// @param content_type string with the Content-Type key
  /// @return AudioFileType of the url, if it can be determined. If not, return AudioFileType::NONE.
  static AudioFileType get_audio_type(const char *content_type);
  AudioReaderState file_read_();
  AudioReaderState http_read_();
  std::shared_ptr<RingBuffer> file_ring_buffer_;
  std::unique_ptr<AudioSinkTransferBuffer> output_transfer_buffer_;
  void cleanup_connection_();
  size_t buffer_size_;
  uint32_t no_data_read_count_;
  esp_http_client_handle_t client_{nullptr};
  AudioFile *current_audio_file_{nullptr};
  AudioFileType audio_file_type_{AudioFileType::NONE};
  const uint8_t *file_current_{nullptr};
 };
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/audio/audio_resampler.cpp
+++ b/esphome/components/audio/audio_resampler.cpp
@@ -0,0 +1,159 @@
 #include "audio_resampler.h"
 #ifdef USE_ESP32
 #include "esphome/core/hal.h"
 namespace esphome {
 namespace audio {
 static const uint32_t READ_WRITE_TIMEOUT_MS = 20;
 AudioResampler::AudioResampler(size_t input_buffer_size, size_t output_buffer_size)
    : input_buffer_size_(input_buffer_size), output_buffer_size_(output_buffer_size) {
  this->input_transfer_buffer_ = AudioSourceTransferBuffer::create(input_buffer_size);
  this->output_transfer_buffer_ = AudioSinkTransferBuffer::create(output_buffer_size);
 }
 esp_err_t AudioResampler::add_source(std::weak_ptr<RingBuffer> &input_ring_buffer) {
  if (this->input_transfer_buffer_ != nullptr) {
    this->input_transfer_buffer_->set_source(input_ring_buffer);
    return ESP_OK;
  }
  return ESP_ERR_NO_MEM;
 }
 esp_err_t AudioResampler::add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer) {
  if (this->output_transfer_buffer_ != nullptr) {
    this->output_transfer_buffer_->set_sink(output_ring_buffer);
    return ESP_OK;
  }
  return ESP_ERR_NO_MEM;
 }
 #ifdef USE_SPEAKER
 esp_err_t AudioResampler::add_sink(speaker::Speaker *speaker) {
  if (this->output_transfer_buffer_ != nullptr) {
    this->output_transfer_buffer_->set_sink(speaker);
    return ESP_OK;
  }
  return ESP_ERR_NO_MEM;
 }
 #endif
 esp_err_t AudioResampler::start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info,
                                uint16_t number_of_taps, uint16_t number_of_filters) {
  this->input_stream_info_ = input_stream_info;
  this->output_stream_info_ = output_stream_info;
  if ((this->input_transfer_buffer_ == nullptr) || (this->output_transfer_buffer_ == nullptr)) {
    return ESP_ERR_NO_MEM;
  }
  if ((input_stream_info.get_bits_per_sample() > 32) || (output_stream_info.get_bits_per_sample() > 32) ||
      (input_stream_info_.get_channels() != output_stream_info.get_channels())) {
    return ESP_ERR_NOT_SUPPORTED;
  }
  if ((input_stream_info.get_sample_rate() != output_stream_info.get_sample_rate()) ||
      (input_stream_info.get_bits_per_sample() != output_stream_info.get_bits_per_sample())) {
    this->resampler_ = make_unique<esp_audio_libs::resampler::Resampler>(
        input_stream_info.bytes_to_samples(this->input_buffer_size_),
        output_stream_info.bytes_to_samples(this->output_buffer_size_));
    // Use cascaded biquad filters when downsampling to avoid aliasing
    bool use_pre_filter = output_stream_info.get_sample_rate() < input_stream_info.get_sample_rate();
    esp_audio_libs::resampler::ResamplerConfiguration resample_config = {
        .source_sample_rate = static_cast<float>(input_stream_info.get_sample_rate()),
        .target_sample_rate = static_cast<float>(output_stream_info.get_sample_rate()),
        .source_bits_per_sample = input_stream_info.get_bits_per_sample(),
        .target_bits_per_sample = output_stream_info.get_bits_per_sample(),
        .channels = input_stream_info_.get_channels(),
        .use_pre_or_post_filter = use_pre_filter,
        .subsample_interpolate = false,  // Doubles the CPU load. Using more filters is a better alternative
        .number_of_taps = number_of_taps,
        .number_of_filters = number_of_filters,
    };
    if (!this->resampler_->initialize(resample_config)) {
      // Failed to allocate the resampler's internal buffers
      return ESP_ERR_NO_MEM;
    }
  }
  return ESP_OK;
 }
 AudioResamplerState AudioResampler::resample(bool stop_gracefully, int32_t *ms_differential) {
  if (stop_gracefully) {
    if (!this->input_transfer_buffer_->has_buffered_data() && (this->output_transfer_buffer_->available() == 0)) {
      return AudioResamplerState::FINISHED;
    }
  }
  if (!this->pause_output_) {
    // Move audio data to the sink
    this->output_transfer_buffer_->transfer_data_to_sink(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
  } else {
    // If paused, block to avoid wasting CPU resources
    delay(READ_WRITE_TIMEOUT_MS);
  }
  this->input_transfer_buffer_->transfer_data_from_source(pdMS_TO_TICKS(READ_WRITE_TIMEOUT_MS));
  if (this->input_transfer_buffer_->available() == 0) {
    // No samples available to process
    return AudioResamplerState::RESAMPLING;
  }
  const size_t bytes_free = this->output_transfer_buffer_->free();
  const uint32_t frames_free = this->output_stream_info_.bytes_to_frames(bytes_free);
  const size_t bytes_available = this->input_transfer_buffer_->available();
  const uint32_t frames_available = this->input_stream_info_.bytes_to_frames(bytes_available);
  if ((this->input_stream_info_.get_sample_rate() != this->output_stream_info_.get_sample_rate()) ||
      (this->input_stream_info_.get_bits_per_sample() != this->output_stream_info_.get_bits_per_sample())) {
    esp_audio_libs::resampler::ResamplerResults results =
        this->resampler_->resample(this->input_transfer_buffer_->get_buffer_start(),
                                   this->output_transfer_buffer_->get_buffer_end(), frames_available, frames_free, -3);
    this->input_transfer_buffer_->decrease_buffer_length(this->input_stream_info_.frames_to_bytes(results.frames_used));
    this->output_transfer_buffer_->increase_buffer_length(
        this->output_stream_info_.frames_to_bytes(results.frames_generated));
    // Resampling causes slight differences in the durations used versus generated. Computes the difference in
    // millisconds. The callback function passing the played audio duration uses the difference to convert from output
    // duration to input duration.
    this->accumulated_frames_used_ += results.frames_used;
    this->accumulated_frames_generated_ += results.frames_generated;
    const int32_t used_ms =
        this->input_stream_info_.frames_to_milliseconds_with_remainder(&this->accumulated_frames_used_);
    const int32_t generated_ms =
        this->output_stream_info_.frames_to_milliseconds_with_remainder(&this->accumulated_frames_generated_);
    *ms_differential = used_ms - generated_ms;
  } else {
    // No resampling required, copy samples directly to the output transfer buffer
    *ms_differential = 0;
    const size_t bytes_to_transfer = std::min(this->output_stream_info_.frames_to_bytes(frames_free),
                                              this->input_stream_info_.frames_to_bytes(frames_available));
    std::memcpy((void *) this->output_transfer_buffer_->get_buffer_end(),
                (void *) this->input_transfer_buffer_->get_buffer_start(), bytes_to_transfer);
    this->input_transfer_buffer_->decrease_buffer_length(bytes_to_transfer);
    this->output_transfer_buffer_->increase_buffer_length(bytes_to_transfer);
  }
  return AudioResamplerState::RESAMPLING;
 }
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/audio/audio_resampler.h
+++ b/esphome/components/audio/audio_resampler.h
@@ -0,0 +1,101 @@
 #pragma once
 #ifdef USE_ESP32
 #include "audio.h"
 #include "audio_transfer_buffer.h"
 #include "esphome/core/defines.h"
 #include "esphome/core/ring_buffer.h"
 #ifdef USE_SPEAKER
 #include "esphome/components/speaker/speaker.h"
 #endif
 #include "esp_err.h"
 #include <resampler.h>  // esp-audio-libs
 namespace esphome {
 namespace audio {
 enum class AudioResamplerState : uint8_t {
  RESAMPLING,  // More data is available to resample
  FINISHED,    // All file data has been resampled and transferred
  FAILED,      // Unused state included for consistency among Audio classes
 };
 class AudioResampler {
  /*
   * @brief Class that facilitates resampling audio.
   * The audio data is read from a ring buffer source, resampled, and sent to an audio sink (ring buffer or speaker
   * component). Also supports converting bits per sample.
   */
 public:
  /// @brief Allocates the input and output transfer buffers
  /// @param input_buffer_size Size of the input transfer buffer in bytes.
  /// @param output_buffer_size Size of the output transfer buffer in bytes.
  AudioResampler(size_t input_buffer_size, size_t output_buffer_size);
  /// @brief Adds a source ring buffer for audio data. Takes ownership of the ring buffer in a shared_ptr.
  /// @param input_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership
  /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated
  esp_err_t add_source(std::weak_ptr<RingBuffer> &input_ring_buffer);
  /// @brief Adds a sink ring buffer for resampled audio. Takes ownership of the ring buffer in a shared_ptr.
  /// @param output_ring_buffer weak_ptr of a shared_ptr of the sink ring buffer to transfer ownership
  /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated
  esp_err_t add_sink(std::weak_ptr<RingBuffer> &output_ring_buffer);
 #ifdef USE_SPEAKER
  /// @brief Adds a sink speaker for decoded audio.
  /// @param speaker pointer to speaker component
  /// @return ESP_OK if successsful, ESP_ERR_NO_MEM if the transfer buffer wasn't allocated
  esp_err_t add_sink(speaker::Speaker *speaker);
 #endif
  /// @brief Sets up the class to resample.
  /// @param input_stream_info The incoming sample rate, bits per sample, and number of channels
  /// @param output_stream_info The desired outgoing sample rate, bits per sample, and number of channels
  /// @param number_of_taps Number of taps per FIR filter
  /// @param number_of_filters Number of FIR filters
  /// @return ESP_OK if it is able to convert the incoming stream,
  ///         ESP_ERR_NO_MEM if the transfer buffers failed to allocate,
  ///         ESP_ERR_NOT_SUPPORTED if the stream can't be converted.
  esp_err_t start(AudioStreamInfo &input_stream_info, AudioStreamInfo &output_stream_info, uint16_t number_of_taps,
                  uint16_t number_of_filters);
  /// @brief Resamples audio from the ring buffer source and writes to the sink.
  /// @param stop_gracefully If true, it indicates the file decoder is finished. The resampler will resample all the
  ///                        remaining audio and then finish.
  /// @param ms_differential Pointer to a (int32_t) variable that will store the difference, in milliseconds, between
  ///                        the duration of input audio used and the duration of output audio generated.
  /// @return AudioResamplerState
  AudioResamplerState resample(bool stop_gracefully, int32_t *ms_differential);
  /// @brief Pauses sending resampled audio to the sink. If paused, it will continue to process internal buffers.
  /// @param pause_state If true, audio data is not sent to the sink.
  void set_pause_output_state(bool pause_state) { this->pause_output_ = pause_state; }
 protected:
  std::unique_ptr<AudioSourceTransferBuffer> input_transfer_buffer_;
  std::unique_ptr<AudioSinkTransferBuffer> output_transfer_buffer_;
  size_t input_buffer_size_;
  size_t output_buffer_size_;
  uint32_t accumulated_frames_used_{0};
  uint32_t accumulated_frames_generated_{0};
  bool pause_output_{false};
  AudioStreamInfo input_stream_info_;
  AudioStreamInfo output_stream_info_;
  std::unique_ptr<esp_audio_libs::resampler::Resampler> resampler_;
 };
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/audio/audio_transfer_buffer.cpp
+++ b/esphome/components/audio/audio_transfer_buffer.cpp
@@ -0,0 +1,165 @@
 #include "audio_transfer_buffer.h"
 #ifdef USE_ESP32
 #include "esphome/core/helpers.h"
 namespace esphome {
 namespace audio {
 AudioTransferBuffer::~AudioTransferBuffer() { this->deallocate_buffer_(); };
 std::unique_ptr<AudioSinkTransferBuffer> AudioSinkTransferBuffer::create(size_t buffer_size) {
  std::unique_ptr<AudioSinkTransferBuffer> sink_buffer = make_unique<AudioSinkTransferBuffer>();
  if (!sink_buffer->allocate_buffer_(buffer_size)) {
    return nullptr;
  }
  return sink_buffer;
 }
 std::unique_ptr<AudioSourceTransferBuffer> AudioSourceTransferBuffer::create(size_t buffer_size) {
  std::unique_ptr<AudioSourceTransferBuffer> source_buffer = make_unique<AudioSourceTransferBuffer>();
  if (!source_buffer->allocate_buffer_(buffer_size)) {
    return nullptr;
  }
  return source_buffer;
 }
 size_t AudioTransferBuffer::free() const {
  if (this->buffer_size_ == 0) {
    return 0;
  }
  return this->buffer_size_ - (this->buffer_length_ - (this->data_start_ - this->buffer_));
 }
 void AudioTransferBuffer::decrease_buffer_length(size_t bytes) {
  this->buffer_length_ -= bytes;
  this->data_start_ += bytes;
 }
 void AudioTransferBuffer::increase_buffer_length(size_t bytes) { this->buffer_length_ += bytes; }
 void AudioTransferBuffer::clear_buffered_data() {
  this->buffer_length_ = 0;
  if (this->ring_buffer_.use_count() > 0) {
    this->ring_buffer_->reset();
  }
 }
 void AudioSinkTransferBuffer::clear_buffered_data() {
  this->buffer_length_ = 0;
  if (this->ring_buffer_.use_count() > 0) {
    this->ring_buffer_->reset();
  }
 #ifdef USE_SPEAKER
  if (this->speaker_ != nullptr) {
    this->speaker_->stop();
  }
 #endif
 }
 bool AudioTransferBuffer::has_buffered_data() const {
  if (this->ring_buffer_.use_count() > 0) {
    return ((this->ring_buffer_->available() > 0) || (this->available() > 0));
  }
  return (this->available() > 0);
 }
 bool AudioTransferBuffer::reallocate(size_t new_buffer_size) {
  if (this->buffer_length_ > 0) {
    // Already has data in the buffer, fail
    return false;
  }
  this->deallocate_buffer_();
  return this->allocate_buffer_(new_buffer_size);
 }
 bool AudioTransferBuffer::allocate_buffer_(size_t buffer_size) {
  this->buffer_size_ = buffer_size;
  RAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
  this->buffer_ = allocator.allocate(this->buffer_size_);
  if (this->buffer_ == nullptr) {
    return false;
  }
  this->data_start_ = this->buffer_;
  this->buffer_length_ = 0;
  return true;
 }
 void AudioTransferBuffer::deallocate_buffer_() {
  if (this->buffer_ != nullptr) {
    RAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
    allocator.deallocate(this->buffer_, this->buffer_size_);
    this->buffer_ = nullptr;
    this->data_start_ = nullptr;
  }
  this->buffer_size_ = 0;
  this->buffer_length_ = 0;
 }
 size_t AudioSourceTransferBuffer::transfer_data_from_source(TickType_t ticks_to_wait) {
  // Shift data in buffer to start
  if (this->buffer_length_ > 0) {
    memmove(this->buffer_, this->data_start_, this->buffer_length_);
  }
  this->data_start_ = this->buffer_;
  size_t bytes_to_read = this->free();
  size_t bytes_read = 0;
  if (bytes_to_read > 0) {
    if (this->ring_buffer_.use_count() > 0) {
      bytes_read = this->ring_buffer_->read((void *) this->get_buffer_end(), bytes_to_read, ticks_to_wait);
    }
    this->increase_buffer_length(bytes_read);
  }
  return bytes_read;
 }
 size_t AudioSinkTransferBuffer::transfer_data_to_sink(TickType_t ticks_to_wait) {
  size_t bytes_written = 0;
  if (this->available()) {
 #ifdef USE_SPEAKER
    if (this->speaker_ != nullptr) {
      bytes_written = this->speaker_->play(this->data_start_, this->available(), ticks_to_wait);
    } else
 #endif
        if (this->ring_buffer_.use_count() > 0) {
      bytes_written =
          this->ring_buffer_->write_without_replacement((void *) this->data_start_, this->available(), ticks_to_wait);
    }
    this->decrease_buffer_length(bytes_written);
    // Shift unwritten data to the start of the buffer
    memmove(this->buffer_, this->data_start_, this->buffer_length_);
    this->data_start_ = this->buffer_;
  }
  return bytes_written;
 }
 bool AudioSinkTransferBuffer::has_buffered_data() const {
 #ifdef USE_SPEAKER
  if (this->speaker_ != nullptr) {
    return (this->speaker_->has_buffered_data() || (this->available() > 0));
  }
 #endif
  if (this->ring_buffer_.use_count() > 0) {
    return ((this->ring_buffer_->available() > 0) || (this->available() > 0));
  }
  return (this->available() > 0);
 }
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/audio/audio_transfer_buffer.h
+++ b/esphome/components/audio/audio_transfer_buffer.h
@@ -0,0 +1,139 @@
 #pragma once
 #ifdef USE_ESP32
 #include "esphome/core/defines.h"
 #include "esphome/core/ring_buffer.h"
 #ifdef USE_SPEAKER
 #include "esphome/components/speaker/speaker.h"
 #endif
 #include "esp_err.h"
 #include <freertos/FreeRTOS.h>
 namespace esphome {
 namespace audio {
 class AudioTransferBuffer {
  /*
   * @brief Class that facilitates tranferring data between a buffer and an audio source or sink.
   * The transfer buffer is a typical C array that temporarily holds data for processing in other audio components.
   * Both sink and source transfer buffers can use a ring buffer as the sink/source.
   *   - The ring buffer is stored in a shared_ptr, so destroying the transfer buffer object will release ownership.
   */
 public:
  /// @brief Destructor that deallocates the transfer buffer
  ~AudioTransferBuffer();
  /// @brief Returns a pointer to the start of the transfer buffer where available() bytes of exisiting data can be read
  uint8_t *get_buffer_start() const { return this->data_start_; }
  /// @brief Returns a pointer to the end of the transfer buffer where free() bytes of new data can be written
  uint8_t *get_buffer_end() const { return this->data_start_ + this->buffer_length_; }
  /// @brief Updates the internal state of the transfer buffer. This should be called after reading data
  /// @param bytes The number of bytes consumed/read
  void decrease_buffer_length(size_t bytes);
  /// @brief Updates the internal state of the transfer buffer. This should be called after writing data
  /// @param bytes The number of bytes written
  void increase_buffer_length(size_t bytes);
  /// @brief Returns the transfer buffer's currently available bytes to read
  size_t available() const { return this->buffer_length_; }
  /// @brief Returns the transfer buffers allocated bytes
  size_t capacity() const { return this->buffer_size_; }
  /// @brief Returns the transfer buffer's currrently free bytes available to write
  size_t free() const;
  /// @brief Clears data in the transfer buffer and, if possible, the source/sink.
  virtual void clear_buffered_data();
  /// @brief Tests if there is any data in the tranfer buffer or the source/sink.
  /// @return True if there is data, false otherwise.
  virtual bool has_buffered_data() const;
  bool reallocate(size_t new_buffer_size);
 protected:
  /// @brief Allocates the transfer buffer in external memory, if available.
  /// @return True is successful, false otherwise.
  bool allocate_buffer_(size_t buffer_size);
  /// @brief Deallocates the buffer and resets the class variables.
  void deallocate_buffer_();
  // A possible source or sink for the transfer buffer
  std::shared_ptr<RingBuffer> ring_buffer_;
  uint8_t *buffer_{nullptr};
  uint8_t *data_start_{nullptr};
  size_t buffer_size_{0};
  size_t buffer_length_{0};
 };
 class AudioSinkTransferBuffer : public AudioTransferBuffer {
  /*
   * @brief A class that implements a transfer buffer for audio sinks.
   * Supports writing processed data in the transfer buffer to a ring buffer or a speaker component.
   */
 public:
  /// @brief Creates a new sink transfer buffer.
  /// @param buffer_size Size of the transfer buffer in bytes.
  /// @return unique_ptr if successfully allocated, nullptr otherwise
  static std::unique_ptr<AudioSinkTransferBuffer> create(size_t buffer_size);
  /// @brief Writes any available data in the transfer buffer to the sink.
  /// @param ticks_to_wait FreeRTOS ticks to block while waiting for the sink to have enough space
  /// @return Number of bytes written
  size_t transfer_data_to_sink(TickType_t ticks_to_wait);
  /// @brief Adds a ring buffer as the transfer buffer's sink.
  /// @param ring_buffer weak_ptr to the allocated ring buffer
  void set_sink(const std::weak_ptr<RingBuffer> &ring_buffer) { this->ring_buffer_ = ring_buffer.lock(); }
 #ifdef USE_SPEAKER
  /// @brief Adds a speaker as the transfer buffer's sink.
  /// @param speaker Pointer to the speaker component
  void set_sink(speaker::Speaker *speaker) { this->speaker_ = speaker; }
 #endif
  void clear_buffered_data() override;
  bool has_buffered_data() const override;
 protected:
 #ifdef USE_SPEAKER
  speaker::Speaker *speaker_{nullptr};
 #endif
 };
 class AudioSourceTransferBuffer : public AudioTransferBuffer {
  /*
   * @brief A class that implements a transfer buffer for audio sources.
   * Supports reading audio data from a ring buffer into the transfer buffer for processing.
   */
 public:
  /// @brief Creates a new source transfer buffer.
  /// @param buffer_size Size of the transfer buffer in bytes.
  /// @return unique_ptr if successfully allocated, nullptr otherwise
  static std::unique_ptr<AudioSourceTransferBuffer> create(size_t buffer_size);
  /// @brief Reads any available data from the sink into the transfer buffer.
  /// @param ticks_to_wait FreeRTOS ticks to block while waiting for the source to have enough data
  /// @return Number of bytes read
  size_t transfer_data_from_source(TickType_t ticks_to_wait);
  /// @brief Adds a ring buffer as the transfer buffer's source.
  /// @param ring_buffer weak_ptr to the allocated ring buffer
  void set_source(const std::weak_ptr<RingBuffer> &ring_buffer) { this->ring_buffer_ = ring_buffer.lock(); };
 };
 }  // namespace audio
 }  // namespace esphome
 #endif
--- a/esphome/components/ch422g/ch422g.h
+++ b/esphome/components/ch422g/ch422g.h
@@ -57,6 +57,8 @@ class CH422GGPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { inverted_ = inverted; }
  void set_flags(gpio::Flags flags);
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  CH422GComponent *parent_{};
  uint8_t pin_{};
--- a/esphome/components/esp32/gpio.h
+++ b/esphome/components/esp32/gpio.h
@@ -13,6 +13,7 @@ class ESP32InternalGPIOPin : public InternalGPIOPin {
  void set_inverted(bool inverted) { inverted_ = inverted; }
  void set_drive_strength(gpio_drive_cap_t drive_strength) { drive_strength_ = drive_strength; }
  void set_flags(gpio::Flags flags) { flags_ = flags; }
  void setup() override;
  void pin_mode(gpio::Flags flags) override;
  bool digital_read() override;
@@ -21,6 +22,7 @@ class ESP32InternalGPIOPin : public InternalGPIOPin {
  void detach_interrupt() const override;
  ISRInternalGPIOPin to_isr() const override;
  uint8_t get_pin() const override { return (uint8_t) pin_; }
  gpio::Flags get_flags() const override { return flags_; }
  bool is_inverted() const override { return inverted_; }
 protected:
--- a/esphome/components/esp8266/gpio.h
+++ b/esphome/components/esp8266/gpio.h
@@ -22,6 +22,7 @@ class ESP8266GPIOPin : public InternalGPIOPin {
  void detach_interrupt() const override;
  ISRInternalGPIOPin to_isr() const override;
  uint8_t get_pin() const override { return pin_; }
  gpio::Flags get_flags() const override { return flags_; }
  bool is_inverted() const override { return inverted_; }
 protected:
--- a/esphome/components/host/gpio.h
+++ b/esphome/components/host/gpio.h
@@ -21,6 +21,7 @@ class HostGPIOPin : public InternalGPIOPin {
  void detach_interrupt() const override;
  ISRInternalGPIOPin to_isr() const override;
  uint8_t get_pin() const override { return pin_; }
  gpio::Flags get_flags() const override { return flags_; }
  bool is_inverted() const override { return inverted_; }
 protected:
--- a/esphome/components/i2c/i2c_bus_esp_idf.cpp
+++ b/esphome/components/i2c/i2c_bus_esp_idf.cpp
@@ -39,6 +39,10 @@ void IDFI2CBus::setup() {
  conf.scl_io_num = scl_pin_;
  conf.scl_pullup_en = scl_pullup_enabled_;
  conf.master.clk_speed = frequency_;
 #ifdef USE_ESP32_VARIANT_ESP32S2
  // workaround for https://github.com/esphome/issues/issues/6718
  conf.clk_flags = I2C_SCLK_SRC_FLAG_AWARE_DFS;
 #endif
  esp_err_t err = i2c_param_config(port_, &conf);
  if (err != ESP_OK) {
    ESP_LOGW(TAG, "i2c_param_config failed: %s", esp_err_to_name(err));
--- a/esphome/components/i2s_audio/speaker/init.py
+++ b/esphome/components/i2s_audio/speaker/init.py
@@ -1,13 +1,25 @@
 from esphome import pins
 import esphome.codegen as cg
-from esphome.components import esp32, speaker
+from esphome.components import audio, esp32, speaker
 import esphome.config_validation as cv
-from esphome.const import CONF_CHANNEL, CONF_ID, CONF_MODE, CONF_TIMEOUT
+from esphome.const import (
    CONF_BITS_PER_SAMPLE,
    CONF_BUFFER_DURATION,
    CONF_CHANNEL,
    CONF_ID,
    CONF_MODE,
    CONF_NEVER,
    CONF_NUM_CHANNELS,
    CONF_SAMPLE_RATE,
    CONF_TIMEOUT,
 )
 from .. import (
    CONF_I2S_DOUT_PIN,
    CONF_I2S_MODE,
    CONF_LEFT,
    CONF_MONO,
    CONF_PRIMARY,
    CONF_RIGHT,
    CONF_STEREO,
    I2SAudioOut,
@@ -24,10 +36,8 @@ I2SAudioSpeaker = i2s_audio_ns.class_(
    "I2SAudioSpeaker", cg.Component, speaker.Speaker, I2SAudioOut
 )
 CONF_BUFFER_DURATION = "buffer_duration"
 CONF_DAC_TYPE = "dac_type"
 CONF_I2S_COMM_FMT = "i2s_comm_fmt"
 CONF_NEVER = "never"
 i2s_dac_mode_t = cg.global_ns.enum("i2s_dac_mode_t")
 INTERNAL_DAC_OPTIONS = {
@@ -53,7 +63,41 @@ I2C_COMM_FMT_OPTIONS = {
 NO_INTERNAL_DAC_VARIANTS = [esp32.const.VARIANT_ESP32S2]
-def validate_esp32_variant(config):
+def _set_num_channels_from_config(config):
    if config[CONF_CHANNEL] in (CONF_MONO, CONF_LEFT, CONF_RIGHT):
        config[CONF_NUM_CHANNELS] = 1
    else:
        config[CONF_NUM_CHANNELS] = 2
    return config
 def _set_stream_limits(config):
    if config[CONF_I2S_MODE] == CONF_PRIMARY:
        # Primary mode has modifiable stream settings
        audio.set_stream_limits(
            min_bits_per_sample=8,
            max_bits_per_sample=32,
            min_channels=1,
            max_channels=2,
            min_sample_rate=16000,
            max_sample_rate=48000,
        )(config)
    else:
        # Secondary mode has unmodifiable max bits per sample and min/max sample rates
        audio.set_stream_limits(
            min_bits_per_sample=8,
            max_bits_per_sample=config.get(CONF_BITS_PER_SAMPLE),
            min_channels=1,
            max_channels=2,
            min_sample_rate=config.get(CONF_SAMPLE_RATE),
            max_sample_rate=config.get(CONF_SAMPLE_RATE),
        )
    return config
 def _validate_esp32_variant(config):
    if config[CONF_DAC_TYPE] != "internal":
        return config
    variant = esp32.get_esp32_variant()
@@ -85,6 +129,7 @@ BASE_SCHEMA = (
    .extend(cv.COMPONENT_SCHEMA)
 )
 CONFIG_SCHEMA = cv.All(
    cv.typed_schema(
        {
@@ -106,7 +151,9 @@ CONFIG_SCHEMA = cv.All(
        },
        key=CONF_DAC_TYPE,
    ),
-    validate_esp32_variant,
+    _validate_esp32_variant,
    _set_num_channels_from_config,
    _set_stream_limits,
 )
--- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp
+++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp
@@ -148,9 +148,11 @@ void I2SAudioSpeaker::loop() {
    this->status_set_error("Failed to adjust I2S bus to match the incoming audio");
    ESP_LOGE(TAG,
             "Incompatible audio format: sample rate = %" PRIu32 ", channels = %" PRIu8 ", bits per sample = %" PRIu8,
-             this->audio_stream_info_.sample_rate, this->audio_stream_info_.channels,
+             this->audio_stream_info_.get_sample_rate(), this->audio_stream_info_.get_channels(),
-             this->audio_stream_info_.bits_per_sample);
+             this->audio_stream_info_.get_bits_per_sample());
  }
  xEventGroupClearBits(this->event_group_, ALL_ERR_ESP_BITS);
 }
 void I2SAudioSpeaker::set_volume(float volume) {
@@ -201,6 +203,12 @@ size_t I2SAudioSpeaker::play(const uint8_t *data, size_t length, TickType_t tick
    this->start();
  }
  if ((this->state_ != speaker::STATE_RUNNING) || (this->audio_ring_buffer_.use_count() == 1)) {
    // Unable to write data to a running speaker, so delay the max amount of time so it can get ready
    vTaskDelay(ticks_to_wait);
    ticks_to_wait = 0;
  }
  size_t bytes_written = 0;
  if ((this->state_ == speaker::STATE_RUNNING) && (this->audio_ring_buffer_.use_count() == 1)) {
    // Only one owner of the ring buffer (the speaker task), so the ring buffer is allocated and no other components are
@@ -223,6 +231,8 @@ bool I2SAudioSpeaker::has_buffered_data() const {
 void I2SAudioSpeaker::speaker_task(void *params) {
  I2SAudioSpeaker *this_speaker = (I2SAudioSpeaker *) params;
  this_speaker->task_created_ = true;
  uint32_t event_group_bits =
      xEventGroupWaitBits(this_speaker->event_group_,
                          SpeakerEventGroupBits::COMMAND_START | SpeakerEventGroupBits::COMMAND_STOP |
@@ -240,19 +250,20 @@ void I2SAudioSpeaker::speaker_task(void *params) {
  audio::AudioStreamInfo audio_stream_info = this_speaker->audio_stream_info_;
-  const uint32_t bytes_per_ms =
+  const uint32_t dma_buffers_duration_ms = DMA_BUFFER_DURATION_MS * DMA_BUFFERS_COUNT;
-      audio_stream_info.channels * audio_stream_info.get_bytes_per_sample() * audio_stream_info.sample_rate / 1000;
+  // Ensure ring buffer duration is at least the duration of all DMA buffers
  const uint32_t ring_buffer_duration = std::max(dma_buffers_duration_ms, this_speaker->buffer_duration_ms_);
-  const size_t dma_buffers_size = DMA_BUFFERS_COUNT * DMA_BUFFER_DURATION_MS * bytes_per_ms;
+  // The DMA buffers may have more bits per sample, so calculate buffer sizes based in the input audio stream info
  const size_t data_buffer_size = audio_stream_info.ms_to_bytes(dma_buffers_duration_ms);
  const size_t ring_buffer_size = audio_stream_info.ms_to_bytes(ring_buffer_duration);
-  // Ensure ring buffer is at least as large as the total size of the DMA buffers
+  const size_t single_dma_buffer_input_size = data_buffer_size / DMA_BUFFERS_COUNT;
  const size_t ring_buffer_size =
      std::max((uint32_t) dma_buffers_size, this_speaker->buffer_duration_ms_ * bytes_per_ms);
-  if (this_speaker->send_esp_err_to_event_group_(this_speaker->allocate_buffers_(dma_buffers_size, ring_buffer_size))) {
+  if (this_speaker->send_esp_err_to_event_group_(this_speaker->allocate_buffers_(data_buffer_size, ring_buffer_size))) {
    // Failed to allocate buffers
    xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_NO_MEM);
-    this_speaker->delete_task_(dma_buffers_size);
+    this_speaker->delete_task_(data_buffer_size);
  }
  if (!this_speaker->send_esp_err_to_event_group_(this_speaker->start_i2s_driver_(audio_stream_info))) {
@@ -262,20 +273,25 @@ void I2SAudioSpeaker::speaker_task(void *params) {
    uint32_t last_data_received_time = millis();
    bool tx_dma_underflow = false;
-    while (!this_speaker->timeout_.has_value() ||
+    this_speaker->accumulated_frames_written_ = 0;
    // Keep looping if paused, there is no timeout configured, or data was received more recently than the configured
    // timeout
    while (this_speaker->pause_state_ || !this_speaker->timeout_.has_value() ||
           (millis() - last_data_received_time) <= this_speaker->timeout_.value()) {
      event_group_bits = xEventGroupGetBits(this_speaker->event_group_);
      if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP) {
        xEventGroupClearBits(this_speaker->event_group_, SpeakerEventGroupBits::COMMAND_STOP);
        break;
      }
      if (event_group_bits & SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY) {
        xEventGroupClearBits(this_speaker->event_group_, SpeakerEventGroupBits::COMMAND_STOP_GRACEFULLY);
        stop_gracefully = true;
      }
      if (this_speaker->audio_stream_info_ != audio_stream_info) {
-        // Audio stream info has changed, stop the speaker task so it will restart with the proper settings.
+        // Audio stream info changed, stop the speaker task so it will restart with the proper settings.
        break;
      }
@@ -286,33 +302,64 @@ void I2SAudioSpeaker::speaker_task(void *params) {
        }
      }
-      size_t bytes_to_read = dma_buffers_size;
+      if (this_speaker->pause_state_) {
-      size_t bytes_read = this_speaker->audio_ring_buffer_->read((void *) this_speaker->data_buffer_, bytes_to_read,
+        // Pause state is accessed atomically, so thread safe
        // Delay so the task can yields, then skip transferring audio data
        delay(TASK_DELAY_MS);
        continue;
      }
      size_t bytes_read = this_speaker->audio_ring_buffer_->read((void *) this_speaker->data_buffer_, data_buffer_size,
                                                                 pdMS_TO_TICKS(TASK_DELAY_MS));
      if (bytes_read > 0) {
-        size_t bytes_written = 0;
+        if ((audio_stream_info.get_bits_per_sample() == 16) && (this_speaker->q15_volume_factor_ < INT16_MAX)) {
        if ((audio_stream_info.bits_per_sample == 16) && (this_speaker->q15_volume_factor_ < INT16_MAX)) {
          // Scale samples by the volume factor in place
          q15_multiplication((int16_t *) this_speaker->data_buffer_, (int16_t *) this_speaker->data_buffer_,
                             bytes_read / sizeof(int16_t), this_speaker->q15_volume_factor_);
        }
-        if (audio_stream_info.bits_per_sample == (uint8_t) this_speaker->bits_per_sample_) {
+        // Write the audio data to a single DMA buffer at a time to reduce latency for the audio duration played
-          i2s_write(this_speaker->parent_->get_port(), this_speaker->data_buffer_, bytes_read, &bytes_written,
+        // callback.
-                    portMAX_DELAY);
+        const uint32_t batches = (bytes_read + single_dma_buffer_input_size - 1) / single_dma_buffer_input_size;
        } else if (audio_stream_info.bits_per_sample < (uint8_t) this_speaker->bits_per_sample_) {
          i2s_write_expand(this_speaker->parent_->get_port(), this_speaker->data_buffer_, bytes_read,
                           audio_stream_info.bits_per_sample, this_speaker->bits_per_sample_, &bytes_written,
                           portMAX_DELAY);
        }
-        if (bytes_written != bytes_read) {
+        for (uint32_t i = 0; i < batches; ++i) {
-          xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE);
+          size_t bytes_written = 0;
          size_t bytes_to_write = std::min(single_dma_buffer_input_size, bytes_read);
          if (audio_stream_info.get_bits_per_sample() == (uint8_t) this_speaker->bits_per_sample_) {
            i2s_write(this_speaker->parent_->get_port(), this_speaker->data_buffer_ + i * single_dma_buffer_input_size,
                      bytes_to_write, &bytes_written, pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5));
          } else if (audio_stream_info.get_bits_per_sample() < (uint8_t) this_speaker->bits_per_sample_) {
            i2s_write_expand(this_speaker->parent_->get_port(),
                             this_speaker->data_buffer_ + i * single_dma_buffer_input_size, bytes_to_write,
                             audio_stream_info.get_bits_per_sample(), this_speaker->bits_per_sample_, &bytes_written,
                             pdMS_TO_TICKS(DMA_BUFFER_DURATION_MS * 5));
          }
          uint32_t write_timestamp = micros();
          if (bytes_written != bytes_to_write) {
            xEventGroupSetBits(this_speaker->event_group_, SpeakerEventGroupBits::ERR_ESP_INVALID_SIZE);
          }
          bytes_read -= bytes_written;
          this_speaker->accumulated_frames_written_ += audio_stream_info.bytes_to_frames(bytes_written);
          const uint32_t new_playback_ms =
              audio_stream_info.frames_to_milliseconds_with_remainder(&this_speaker->accumulated_frames_written_);
          const uint32_t remainder_us =
              audio_stream_info.frames_to_microseconds(this_speaker->accumulated_frames_written_);
          uint32_t pending_frames =
              audio_stream_info.bytes_to_frames(bytes_read + this_speaker->audio_ring_buffer_->available());
          const uint32_t pending_ms = audio_stream_info.frames_to_milliseconds_with_remainder(&pending_frames);
          this_speaker->audio_output_callback_(new_playback_ms, remainder_us, pending_ms, write_timestamp);
          tx_dma_underflow = false;
          last_data_received_time = millis();
        }
        tx_dma_underflow = false;
        last_data_received_time = millis();
      } else {
        // No data received
        if (stop_gracefully && tx_dma_underflow) {
@@ -328,7 +375,7 @@ void I2SAudioSpeaker::speaker_task(void *params) {
    this_speaker->parent_->unlock();
  }
-  this_speaker->delete_task_(dma_buffers_size);
+  this_speaker->delete_task_(data_buffer_size);
 }
 void I2SAudioSpeaker::start() {
@@ -337,16 +384,15 @@ void I2SAudioSpeaker::start() {
  if ((this->state_ == speaker::STATE_STARTING) || (this->state_ == speaker::STATE_RUNNING))
    return;
-  if (this->speaker_task_handle_ == nullptr) {
+  if (!this->task_created_ && (this->speaker_task_handle_ == nullptr)) {
    xTaskCreate(I2SAudioSpeaker::speaker_task, "speaker_task", TASK_STACK_SIZE, (void *) this, TASK_PRIORITY,
                &this->speaker_task_handle_);
  }
-  if (this->speaker_task_handle_ != nullptr) {
+    if (this->speaker_task_handle_ != nullptr) {
-    xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START);
+      xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::COMMAND_START);
-    this->task_created_ = true;
+    } else {
-  } else {
+      xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START);
-    xEventGroupSetBits(this->event_group_, SpeakerEventGroupBits::ERR_TASK_FAILED_TO_START);
+    }
  }
 }
@@ -416,12 +462,12 @@ esp_err_t I2SAudioSpeaker::allocate_buffers_(size_t data_buffer_size, size_t rin
 }
 esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_stream_info) {
-  if ((this->i2s_mode_ & I2S_MODE_SLAVE) && (this->sample_rate_ != audio_stream_info.sample_rate)) {  // NOLINT
+  if ((this->i2s_mode_ & I2S_MODE_SLAVE) && (this->sample_rate_ != audio_stream_info.get_sample_rate())) {  // NOLINT
-    //  Can't reconfigure I2S bus, so the sample rate must match the configured value
+    // Can't reconfigure I2S bus, so the sample rate must match the configured value
    return ESP_ERR_NOT_SUPPORTED;
  }
-  if ((i2s_bits_per_sample_t) audio_stream_info.bits_per_sample > this->bits_per_sample_) {
+  if ((i2s_bits_per_sample_t) audio_stream_info.get_bits_per_sample() > this->bits_per_sample_) {
    // Currently can't handle the case when the incoming audio has more bits per sample than the configured value
    return ESP_ERR_NOT_SUPPORTED;
  }
@@ -432,21 +478,21 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
  i2s_channel_fmt_t channel = this->channel_;
-  if (audio_stream_info.channels == 1) {
+  if (audio_stream_info.get_channels() == 1) {
    if (this->channel_ == I2S_CHANNEL_FMT_ONLY_LEFT) {
      channel = I2S_CHANNEL_FMT_ONLY_LEFT;
    } else {
      channel = I2S_CHANNEL_FMT_ONLY_RIGHT;
    }
-  } else if (audio_stream_info.channels == 2) {
+  } else if (audio_stream_info.get_channels() == 2) {
    channel = I2S_CHANNEL_FMT_RIGHT_LEFT;
  }
-  int dma_buffer_length = DMA_BUFFER_DURATION_MS * this->sample_rate_ / 1000;
+  int dma_buffer_length = audio_stream_info.ms_to_frames(DMA_BUFFER_DURATION_MS);
  i2s_driver_config_t config = {
    .mode = (i2s_mode_t) (this->i2s_mode_ | I2S_MODE_TX),
-    .sample_rate = audio_stream_info.sample_rate,
+    .sample_rate = audio_stream_info.get_sample_rate(),
    .bits_per_sample = this->bits_per_sample_,
    .channel_format = channel,
    .communication_format = this->i2s_comm_fmt_,
@@ -504,7 +550,7 @@ esp_err_t I2SAudioSpeaker::start_i2s_driver_(audio::AudioStreamInfo &audio_strea
 }
 void I2SAudioSpeaker::delete_task_(size_t buffer_size) {
-  this->audio_ring_buffer_.reset();  // Releases onwership of the shared_ptr
+  this->audio_ring_buffer_.reset();  // Releases ownership of the shared_ptr
  if (this->data_buffer_ != nullptr) {
    ExternalRAMAllocator<uint8_t> allocator(ExternalRAMAllocator<uint8_t>::ALLOW_FAILURE);
--- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h
+++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.h
@@ -40,6 +40,9 @@ class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Comp
  void stop() override;
  void finish() override;
  void set_pause_state(bool pause_state) override { this->pause_state_ = pause_state; }
  bool get_pause_state() const override { return this->pause_state_; }
  /// @brief Plays the provided audio data.
  /// Starts the speaker task, if necessary. Writes the audio data to the ring buffer.
  /// @param data Audio data in the format set by the parent speaker classes ``set_audio_stream_info`` method.
@@ -121,13 +124,18 @@ class I2SAudioSpeaker : public I2SAudioOut, public speaker::Speaker, public Comp
  uint8_t dout_pin_;
  bool task_created_{false};
  bool pause_state_{false};
  int16_t q15_volume_factor_{INT16_MAX};
  size_t bytes_written_{0};
 #if SOC_I2S_SUPPORTS_DAC
  i2s_dac_mode_t internal_dac_mode_{I2S_DAC_CHANNEL_DISABLE};
 #endif
  i2s_comm_format_t i2s_comm_fmt_;
  uint32_t accumulated_frames_written_{0};
 };
 }  // namespace i2s_audio
--- a/esphome/components/libretiny/gpio_arduino.h
+++ b/esphome/components/libretiny/gpio_arduino.h
@@ -20,6 +20,7 @@ class ArduinoInternalGPIOPin : public InternalGPIOPin {
  void detach_interrupt() const override;
  ISRInternalGPIOPin to_isr() const override;
  uint8_t get_pin() const override { return pin_; }
  gpio::Flags get_flags() const override { return flags_; }
  bool is_inverted() const override { return inverted_; }
 protected:
--- a/esphome/components/lvgl/init.py
+++ b/esphome/components/lvgl/init.py
@@ -61,7 +61,14 @@ from .types import (
    lv_style_t,
    lvgl_ns,
 )
-from .widgets import Widget, add_widgets, get_scr_act, set_obj_properties, styles_used
+from .widgets import (
    LvScrActType,
    Widget,
    add_widgets,
    get_scr_act,
    set_obj_properties,
    styles_used,
 )
 from .widgets.animimg import animimg_spec
 from .widgets.arc import arc_spec
 from .widgets.button import button_spec
@@ -318,7 +325,7 @@ async def to_code(configs):
            config[df.CONF_RESUME_ON_INPUT],
        )
        await cg.register_component(lv_component, config)
-        Widget.create(config[CONF_ID], lv_component, obj_spec, config)
+        Widget.create(config[CONF_ID], lv_component, LvScrActType(), config)
        lv_scr_act = get_scr_act(lv_component)
        async with LvContext():
@@ -391,7 +398,7 @@ FINAL_VALIDATE_SCHEMA = final_validation
 LVGL_SCHEMA = (
    cv.polling_component_schema("1s")
-    .extend(obj_schema(obj_spec))
+    .extend(obj_schema(LvScrActType()))
    .extend(
        {
            cv.GenerateID(CONF_ID): cv.declare_id(LvglComponent),
--- a/esphome/components/lvgl/defines.py
+++ b/esphome/components/lvgl/defines.py
@@ -146,6 +146,8 @@ TYPE_FLEX = "flex"
 TYPE_GRID = "grid"
 TYPE_NONE = "none"
 DIRECTIONS = LvConstant("LV_DIR_", "LEFT", "RIGHT", "BOTTOM", "TOP")
 LV_FONTS = list(f"montserrat_{s}" for s in range(8, 50, 2)) + [
    "dejavu_16_persian_hebrew",
    "simsun_16_cjk",
@@ -169,9 +171,13 @@ LV_EVENT_MAP = {
    "CANCEL": "CANCEL",
    "ALL_EVENTS": "ALL",
    "CHANGE": "VALUE_CHANGED",
    "GESTURE": "GESTURE",
 }
 LV_EVENT_TRIGGERS = tuple(f"on_{x.lower()}" for x in LV_EVENT_MAP)
 SWIPE_TRIGGERS = tuple(
    f"on_swipe_{x.lower()}" for x in DIRECTIONS.choices + ("up", "down")
 )
 LV_ANIM = LvConstant(
@@ -250,7 +256,6 @@ KEYBOARD_MODES = LvConstant(
    "NUMBER",
 )
 ROLLER_MODES = LvConstant("LV_ROLLER_MODE_", "NORMAL", "INFINITE")
 DIRECTIONS = LvConstant("LV_DIR_", "LEFT", "RIGHT", "BOTTOM", "TOP")
 TILE_DIRECTIONS = DIRECTIONS.extend("HOR", "VER", "ALL")
 CHILD_ALIGNMENTS = LvConstant(
    "LV_ALIGN_",
--- a/esphome/components/lvgl/schemas.py
+++ b/esphome/components/lvgl/schemas.py
@@ -211,10 +211,9 @@ def part_schema(parts):
 def automation_schema(typ: LvType):
    events = df.LV_EVENT_TRIGGERS + df.SWIPE_TRIGGERS
    if typ.has_on_value:
-        events = df.LV_EVENT_TRIGGERS + (CONF_ON_VALUE,)
+        events = events + (CONF_ON_VALUE,)
    else:
        events = df.LV_EVENT_TRIGGERS
    args = typ.get_arg_type() if isinstance(typ, LvType) else []
    args.append(lv_event_t_ptr)
    return {
--- a/esphome/components/lvgl/trigger.py
+++ b/esphome/components/lvgl/trigger.py
@@ -7,8 +7,10 @@ from .defines import (
    CONF_ALIGN_TO,
    CONF_X,
    CONF_Y,
    DIRECTIONS,
    LV_EVENT_MAP,
    LV_EVENT_TRIGGERS,
    SWIPE_TRIGGERS,
    literal,
 )
 from .lvcode import (
@@ -23,7 +25,7 @@ from .lvcode import (
    lvgl_static,
 )
 from .types import LV_EVENT
-from .widgets import widget_map
+from .widgets import LvScrActType, get_scr_act, widget_map
 async def generate_triggers():
@@ -33,6 +35,9 @@ async def generate_triggers():
    """
    for w in widget_map.values():
        if isinstance(w.type, LvScrActType):
            w = get_scr_act(w.var)
        if w.config:
            for event, conf in {
                event: conf
@@ -43,6 +48,24 @@ async def generate_triggers():
                w.add_flag("LV_OBJ_FLAG_CLICKABLE")
                event = literal("LV_EVENT_" + LV_EVENT_MAP[event[3:].upper()])
                await add_trigger(conf, w, event)
            for event, conf in {
                event: conf
                for event, conf in w.config.items()
                if event in SWIPE_TRIGGERS
            }.items():
                conf = conf[0]
                dir = event[9:].upper()
                dir = {"UP": "TOP", "DOWN": "BOTTOM"}.get(dir, dir)
                dir = DIRECTIONS.mapper(dir)
                w.clear_flag("LV_OBJ_FLAG_SCROLLABLE")
                selected = literal(
                    f"lv_indev_get_gesture_dir(lv_indev_get_act()) == {dir}"
                )
                await add_trigger(
                    conf, w, literal("LV_EVENT_GESTURE"), is_selected=selected
                )
            for conf in w.config.get(CONF_ON_VALUE, ()):
                await add_trigger(
                    conf,
@@ -61,13 +84,14 @@ async def generate_triggers():
                lv.obj_align_to(w.obj, target, align, x, y)
-async def add_trigger(conf, w, *events):
+async def add_trigger(conf, w, *events, is_selected=None):
    is_selected = is_selected or w.is_selected()
    tid = conf[CONF_TRIGGER_ID]
    trigger = cg.new_Pvariable(tid)
    args = w.get_args() + [(lv_event_t_ptr, "event")]
    value = w.get_values()
    await automation.build_automation(trigger, args, conf)
    async with LambdaContext(EVENT_ARG, where=tid) as context:
-        with LvConditional(w.is_selected()):
+        with LvConditional(is_selected):
            lv_add(trigger.trigger(*value, literal("event")))
    lv_add(lvgl_static.add_event_cb(w.obj, await context.get_lambda(), *events))
--- a/esphome/components/max6956/max6956.h
+++ b/esphome/components/max6956/max6956.h
@@ -83,6 +83,8 @@ class MAX6956GPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  MAX6956 *parent_;
  uint8_t pin_;
--- a/esphome/components/mcp23016/mcp23016.h
+++ b/esphome/components/mcp23016/mcp23016.h
@@ -61,6 +61,8 @@ class MCP23016GPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  MCP23016 *parent_;
  uint8_t pin_;
--- a/esphome/components/mcp23xxx_base/mcp23xxx_base.h
+++ b/esphome/components/mcp23xxx_base/mcp23xxx_base.h
@@ -43,6 +43,8 @@ class MCP23XXXGPIOPin : public GPIOPin {
  void set_flags(gpio::Flags flags) { flags_ = flags; }
  void set_interrupt_mode(MCP23XXXInterruptMode interrupt_mode) { interrupt_mode_ = interrupt_mode; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  MCP23XXXBase *parent_;
  uint8_t pin_;
--- a/esphome/components/mixer/init.py
+++ b/esphome/components/mixer/init.py
--- a/esphome/components/mixer/speaker/init.py
+++ b/esphome/components/mixer/speaker/init.py
@@ -0,0 +1,172 @@
 from esphome import automation
 import esphome.codegen as cg
 from esphome.components import audio, esp32, speaker
 import esphome.config_validation as cv
 from esphome.const import (
    CONF_BITS_PER_SAMPLE,
    CONF_BUFFER_DURATION,
    CONF_DURATION,
    CONF_ID,
    CONF_NEVER,
    CONF_NUM_CHANNELS,
    CONF_OUTPUT_SPEAKER,
    CONF_SAMPLE_RATE,
    CONF_TASK_STACK_IN_PSRAM,
    CONF_TIMEOUT,
    PLATFORM_ESP32,
 )
 from esphome.core.entity_helpers import inherit_property_from
 import esphome.final_validate as fv
 AUTO_LOAD = ["audio"]
 CODEOWNERS = ["@kahrendt"]
 mixer_speaker_ns = cg.esphome_ns.namespace("mixer_speaker")
 MixerSpeaker = mixer_speaker_ns.class_("MixerSpeaker", cg.Component)
 SourceSpeaker = mixer_speaker_ns.class_("SourceSpeaker", cg.Component, speaker.Speaker)
 CONF_DECIBEL_REDUCTION = "decibel_reduction"
 CONF_QUEUE_MODE = "queue_mode"
 CONF_SOURCE_SPEAKERS = "source_speakers"
 DuckingApplyAction = mixer_speaker_ns.class_(
    "DuckingApplyAction", automation.Action, cg.Parented.template(SourceSpeaker)
 )
 SOURCE_SPEAKER_SCHEMA = speaker.SPEAKER_SCHEMA.extend(
    {
        cv.GenerateID(): cv.declare_id(SourceSpeaker),
        cv.Optional(
            CONF_BUFFER_DURATION, default="100ms"
        ): cv.positive_time_period_milliseconds,
        cv.Optional(CONF_TIMEOUT, default="500ms"): cv.Any(
            cv.positive_time_period_milliseconds,
            cv.one_of(CONF_NEVER, lower=True),
        ),
        cv.Optional(CONF_BITS_PER_SAMPLE, default=16): cv.int_range(16, 16),
    }
 )
 def _set_stream_limits(config):
    audio.set_stream_limits(
        min_bits_per_sample=16,
        max_bits_per_sample=16,
    )(config)
    return config
 def _validate_source_speaker(config):
    fconf = fv.full_config.get()
    # Get ID for the output speaker and add it to the source speakrs config to easily inherit properties
    path = fconf.get_path_for_id(config[CONF_ID])[:-3]
    path.append(CONF_OUTPUT_SPEAKER)
    output_speaker_id = fconf.get_config_for_path(path)
    config[CONF_OUTPUT_SPEAKER] = output_speaker_id
    inherit_property_from(CONF_NUM_CHANNELS, CONF_OUTPUT_SPEAKER)(config)
    inherit_property_from(CONF_SAMPLE_RATE, CONF_OUTPUT_SPEAKER)(config)
    audio.final_validate_audio_schema(
        "mixer",
        audio_device=CONF_OUTPUT_SPEAKER,
        bits_per_sample=config.get(CONF_BITS_PER_SAMPLE),
        channels=config.get(CONF_NUM_CHANNELS),
        sample_rate=config.get(CONF_SAMPLE_RATE),
    )(config)
    return config
 CONFIG_SCHEMA = cv.All(
    cv.Schema(
        {
            cv.GenerateID(): cv.declare_id(MixerSpeaker),
            cv.Required(CONF_OUTPUT_SPEAKER): cv.use_id(speaker.Speaker),
            cv.Required(CONF_SOURCE_SPEAKERS): cv.All(
                cv.ensure_list(SOURCE_SPEAKER_SCHEMA),
                cv.Length(min=2, max=8),
                [_set_stream_limits],
            ),
            cv.Optional(CONF_NUM_CHANNELS): cv.int_range(min=1, max=2),
            cv.Optional(CONF_QUEUE_MODE, default=False): cv.boolean,
            cv.SplitDefault(CONF_TASK_STACK_IN_PSRAM, esp32_idf=False): cv.All(
                cv.boolean, cv.only_with_esp_idf
            ),
        }
    ),
    cv.only_on([PLATFORM_ESP32]),
 )
 FINAL_VALIDATE_SCHEMA = cv.All(
    cv.Schema(
        {
            cv.Optional(CONF_SOURCE_SPEAKERS): [_validate_source_speaker],
        },
        extra=cv.ALLOW_EXTRA,
    ),
    inherit_property_from(CONF_NUM_CHANNELS, CONF_OUTPUT_SPEAKER),
 )
 async def to_code(config):
    var = cg.new_Pvariable(config[CONF_ID])
    await cg.register_component(var, config)
    spkr = await cg.get_variable(config[CONF_OUTPUT_SPEAKER])
    cg.add(var.set_output_channels(config[CONF_NUM_CHANNELS]))
    cg.add(var.set_output_speaker(spkr))
    cg.add(var.set_queue_mode(config[CONF_QUEUE_MODE]))
    if task_stack_in_psram := config.get(CONF_TASK_STACK_IN_PSRAM):
        cg.add(var.set_task_stack_in_psram(task_stack_in_psram))
        if task_stack_in_psram:
            if config[CONF_TASK_STACK_IN_PSRAM]:
                esp32.add_idf_sdkconfig_option(
                    "CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True
                )
    for speaker_config in config[CONF_SOURCE_SPEAKERS]:
        source_speaker = cg.new_Pvariable(speaker_config[CONF_ID])
        cg.add(source_speaker.set_buffer_duration(speaker_config[CONF_BUFFER_DURATION]))
        if speaker_config[CONF_TIMEOUT] != CONF_NEVER:
            cg.add(source_speaker.set_timeout(speaker_config[CONF_TIMEOUT]))
        await cg.register_component(source_speaker, speaker_config)
        await cg.register_parented(source_speaker, config[CONF_ID])
        await speaker.register_speaker(source_speaker, speaker_config)
        cg.add(var.add_source_speaker(source_speaker))
@automation.register_action(
    "mixer_speaker.apply_ducking",
    DuckingApplyAction,
    cv.Schema(
        {
            cv.GenerateID(): cv.use_id(SourceSpeaker),
            cv.Required(CONF_DECIBEL_REDUCTION): cv.templatable(
                cv.int_range(min=0, max=51)
            ),
            cv.Optional(CONF_DURATION, default="0.0s"): cv.templatable(
                cv.positive_time_period_milliseconds
            ),
        }
    ),
 )
 async def ducking_set_to_code(config, action_id, template_arg, args):
    var = cg.new_Pvariable(action_id, template_arg)
    await cg.register_parented(var, config[CONF_ID])
    decibel_reduction = await cg.templatable(
        config[CONF_DECIBEL_REDUCTION], args, cg.uint8
    )
    cg.add(var.set_decibel_reduction(decibel_reduction))
    duration = await cg.templatable(config[CONF_DURATION], args, cg.uint32)
    cg.add(var.set_duration(duration))
    return var
--- a/esphome/components/mixer/speaker/automation.h
+++ b/esphome/components/mixer/speaker/automation.h
@@ -0,0 +1,19 @@
 #pragma once
 #include "mixer_speaker.h"
 #ifdef USE_ESP32
 namespace esphome {
 namespace mixer_speaker {
 template<typename... Ts> class DuckingApplyAction : public Action<Ts...>, public Parented<SourceSpeaker> {
  TEMPLATABLE_VALUE(uint8_t, decibel_reduction)
  TEMPLATABLE_VALUE(uint32_t, duration)
  void play(Ts... x) override {
    this->parent_->apply_ducking(this->decibel_reduction_.value(x...), this->duration_.value(x...));
  }
 };
 }  // namespace mixer_speaker
 }  // namespace esphome
 #endif
--- a/esphome/components/mixer/speaker/mixer_speaker.cpp
+++ b/esphome/components/mixer/speaker/mixer_speaker.cpp
@@ -0,0 +1,624 @@
 #include "mixer_speaker.h"
 #ifdef USE_ESP32
 #include "esphome/core/hal.h"
 #include "esphome/core/helpers.h"
 #include "esphome/core/log.h"
 #include <algorithm>
 #include <cstring>
 namespace esphome {
 namespace mixer_speaker {
 static const UBaseType_t MIXER_TASK_PRIORITY = 10;
 static const uint32_t TRANSFER_BUFFER_DURATION_MS = 50;
 static const uint32_t TASK_DELAY_MS = 25;
 static const size_t TASK_STACK_SIZE = 4096;
 static const int16_t MAX_AUDIO_SAMPLE_VALUE = INT16_MAX;
 static const int16_t MIN_AUDIO_SAMPLE_VALUE = INT16_MIN;
 static const char *const TAG = "speaker_mixer";
 // Gives the Q15 fixed point scaling factor to reduce by 0 dB, 1dB, ..., 50 dB
 // dB to PCM scaling factor formula: floating_point_scale_factor = 2^(-db/6.014)
 // float to Q15 fixed point formula: q15_scale_factor = floating_point_scale_factor * 2^(15)
 static const std::vector<int16_t> DECIBEL_REDUCTION_TABLE = {
    32767, 29201, 26022, 23189, 20665, 18415, 16410, 14624, 13032, 11613, 10349, 9222, 8218, 7324, 6527, 5816, 5183,
    4619,  4116,  3668,  3269,  2913,  2596,  2313,  2061,  1837,  1637,  1459,  1300, 1158, 1032, 920,  820,  731,
    651,   580,   517,   461,   411,   366,   326,   291,   259,   231,   206,   183,  163,  146,  130,  116,  103};
 enum MixerEventGroupBits : uint32_t {
  COMMAND_STOP = (1 << 0),  // stops the mixer task
  STATE_STARTING = (1 << 10),
  STATE_RUNNING = (1 << 11),
  STATE_STOPPING = (1 << 12),
  STATE_STOPPED = (1 << 13),
  ERR_ESP_NO_MEM = (1 << 19),
  ALL_BITS = 0x00FFFFFF,  // All valid FreeRTOS event group bits
 };
 void SourceSpeaker::dump_config() {
  ESP_LOGCONFIG(TAG, "Mixer Source Speaker");
  ESP_LOGCONFIG(TAG, "  Buffer Duration: %" PRIu32 " ms", this->buffer_duration_ms_);
  if (this->timeout_ms_.has_value()) {
    ESP_LOGCONFIG(TAG, "  Timeout: %" PRIu32 " ms", this->timeout_ms_.value());
  } else {
    ESP_LOGCONFIG(TAG, "  Timeout: never");
  }
 }
 void SourceSpeaker::setup() {
  this->parent_->get_output_speaker()->add_audio_output_callback(
      [this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
        uint32_t personal_playback_ms = std::min(new_playback_ms, this->pending_playback_ms_);
        if (personal_playback_ms > 0) {
          this->pending_playback_ms_ -= personal_playback_ms;
          this->audio_output_callback_(personal_playback_ms, remainder_us, this->pending_playback_ms_, write_timestamp);
        }
      });
 }
 void SourceSpeaker::loop() {
  switch (this->state_) {
    case speaker::STATE_STARTING: {
      esp_err_t err = this->start_();
      if (err == ESP_OK) {
        this->state_ = speaker::STATE_RUNNING;
        this->stop_gracefully_ = false;
        this->last_seen_data_ms_ = millis();
        this->status_clear_error();
      } else {
        switch (err) {
          case ESP_ERR_NO_MEM:
            this->status_set_error("Failed to start mixer: not enough memory");
            break;
          case ESP_ERR_NOT_SUPPORTED:
            this->status_set_error("Failed to start mixer: unsupported bits per sample");
            break;
          case ESP_ERR_INVALID_ARG:
            this->status_set_error("Failed to start mixer: audio stream isn't compatible with the other audio stream.");
            break;
          case ESP_ERR_INVALID_STATE:
            this->status_set_error("Failed to start mixer: mixer task failed to start");
            break;
          default:
            this->status_set_error("Failed to start mixer");
            break;
        }
        this->state_ = speaker::STATE_STOPPING;
      }
      break;
    }
    case speaker::STATE_RUNNING:
      if (!this->transfer_buffer_->has_buffered_data()) {
        if ((this->timeout_ms_.has_value() && ((millis() - this->last_seen_data_ms_) > this->timeout_ms_.value())) ||
            this->stop_gracefully_) {
          this->state_ = speaker::STATE_STOPPING;
        }
      }
      break;
    case speaker::STATE_STOPPING:
      this->stop_();
      this->stop_gracefully_ = false;
      this->state_ = speaker::STATE_STOPPED;
      break;
    case speaker::STATE_STOPPED:
      break;
  }
 }
 size_t SourceSpeaker::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
  if (this->is_stopped()) {
    this->start();
  }
  size_t bytes_written = 0;
  if (this->ring_buffer_.use_count() == 1) {
    std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_.lock();
    bytes_written = temp_ring_buffer->write_without_replacement(data, length, ticks_to_wait);
    if (bytes_written > 0) {
      this->last_seen_data_ms_ = millis();
    }
  }
  return bytes_written;
 }
 void SourceSpeaker::start() { this->state_ = speaker::STATE_STARTING; }
 esp_err_t SourceSpeaker::start_() {
  const size_t ring_buffer_size = this->audio_stream_info_.ms_to_bytes(this->buffer_duration_ms_);
  if (this->transfer_buffer_.use_count() == 0) {
    this->transfer_buffer_ =
        audio::AudioSourceTransferBuffer::create(this->audio_stream_info_.ms_to_bytes(TRANSFER_BUFFER_DURATION_MS));
    if (this->transfer_buffer_ == nullptr) {
      return ESP_ERR_NO_MEM;
    }
    std::shared_ptr<RingBuffer> temp_ring_buffer;
    if (!this->ring_buffer_.use_count()) {
      temp_ring_buffer = RingBuffer::create(ring_buffer_size);
      this->ring_buffer_ = temp_ring_buffer;
    }
    if (!this->ring_buffer_.use_count()) {
      return ESP_ERR_NO_MEM;
    } else {
      this->transfer_buffer_->set_source(temp_ring_buffer);
    }
  }
  return this->parent_->start(this->audio_stream_info_);
 }
 void SourceSpeaker::stop() {
  if (this->state_ != speaker::STATE_STOPPED) {
    this->state_ = speaker::STATE_STOPPING;
  }
 }
 void SourceSpeaker::stop_() {
  this->transfer_buffer_.reset();  // deallocates the transfer buffer
 }
 void SourceSpeaker::finish() { this->stop_gracefully_ = true; }
 bool SourceSpeaker::has_buffered_data() const {
  return ((this->transfer_buffer_.use_count() > 0) && this->transfer_buffer_->has_buffered_data());
 }
 void SourceSpeaker::set_mute_state(bool mute_state) {
  this->mute_state_ = mute_state;
  this->parent_->get_output_speaker()->set_mute_state(mute_state);
 }
 void SourceSpeaker::set_volume(float volume) {
  this->volume_ = volume;
  this->parent_->get_output_speaker()->set_volume(volume);
 }
 size_t SourceSpeaker::process_data_from_source(TickType_t ticks_to_wait) {
  if (!this->transfer_buffer_.use_count()) {
    return 0;
  }
  // Store current offset, as these samples are already ducked
  const size_t current_length = this->transfer_buffer_->available();
  size_t bytes_read = this->transfer_buffer_->transfer_data_from_source(ticks_to_wait);
  uint32_t samples_to_duck = this->audio_stream_info_.bytes_to_samples(bytes_read);
  if (samples_to_duck > 0) {
    int16_t *current_buffer = reinterpret_cast<int16_t *>(this->transfer_buffer_->get_buffer_start() + current_length);
    duck_samples(current_buffer, samples_to_duck, &this->current_ducking_db_reduction_,
                 &this->ducking_transition_samples_remaining_, this->samples_per_ducking_step_,
                 this->db_change_per_ducking_step_);
  }
  return bytes_read;
 }
 void SourceSpeaker::apply_ducking(uint8_t decibel_reduction, uint32_t duration) {
  if (this->target_ducking_db_reduction_ != decibel_reduction) {
    this->current_ducking_db_reduction_ = this->target_ducking_db_reduction_;
    this->target_ducking_db_reduction_ = decibel_reduction;
    uint8_t total_ducking_steps = 0;
    if (this->target_ducking_db_reduction_ > this->current_ducking_db_reduction_) {
      // The dB reduction level is increasing (which results in quieter audio)
      total_ducking_steps = this->target_ducking_db_reduction_ - this->current_ducking_db_reduction_ - 1;
      this->db_change_per_ducking_step_ = 1;
    } else {
      // The dB reduction level is decreasing (which results in louder audio)
      total_ducking_steps = this->current_ducking_db_reduction_ - this->target_ducking_db_reduction_ - 1;
      this->db_change_per_ducking_step_ = -1;
    }
    if ((duration > 0) && (total_ducking_steps > 0)) {
      this->ducking_transition_samples_remaining_ = this->audio_stream_info_.ms_to_samples(duration);
      this->samples_per_ducking_step_ = this->ducking_transition_samples_remaining_ / total_ducking_steps;
      this->ducking_transition_samples_remaining_ =
          this->samples_per_ducking_step_ * total_ducking_steps;  // Adjust for integer division rounding
      this->current_ducking_db_reduction_ += this->db_change_per_ducking_step_;
    } else {
      this->ducking_transition_samples_remaining_ = 0;
      this->current_ducking_db_reduction_ = this->target_ducking_db_reduction_;
    }
  }
 }
 void SourceSpeaker::duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck,
                                 int8_t *current_ducking_db_reduction, uint32_t *ducking_transition_samples_remaining,
                                 uint32_t samples_per_ducking_step, int8_t db_change_per_ducking_step) {
  if (*ducking_transition_samples_remaining > 0) {
    // Ducking level is still transitioning
    // Takes the ceiling of input_samples_to_duck/samples_per_ducking_step
    uint32_t ducking_steps_in_batch =
        input_samples_to_duck / samples_per_ducking_step + (input_samples_to_duck % samples_per_ducking_step != 0);
    for (uint32_t i = 0; i < ducking_steps_in_batch; ++i) {
      uint32_t samples_left_in_step = *ducking_transition_samples_remaining % samples_per_ducking_step;
      if (samples_left_in_step == 0) {
        samples_left_in_step = samples_per_ducking_step;
      }
      uint32_t samples_to_duck = std::min(input_samples_to_duck, samples_left_in_step);
      samples_to_duck = std::min(samples_to_duck, *ducking_transition_samples_remaining);
      // Ensure we only point to valid index in the Q15 scaling factor table
      uint8_t safe_db_reduction_index =
          clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
      int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
      audio::scale_audio_samples(input_buffer, input_buffer, q15_scale_factor, samples_to_duck);
      if (samples_left_in_step - samples_to_duck == 0) {
        // After scaling the current samples, we are ready to transition to the next step
        *current_ducking_db_reduction += db_change_per_ducking_step;
      }
      input_buffer += samples_to_duck;
      *ducking_transition_samples_remaining -= samples_to_duck;
      input_samples_to_duck -= samples_to_duck;
    }
  }
  if ((*current_ducking_db_reduction > 0) && (input_samples_to_duck > 0)) {
    // Audio is ducked, but its not in the middle of a transition step
    uint8_t safe_db_reduction_index =
        clamp<uint8_t>(*current_ducking_db_reduction, 0, DECIBEL_REDUCTION_TABLE.size() - 1);
    int16_t q15_scale_factor = DECIBEL_REDUCTION_TABLE[safe_db_reduction_index];
    audio::scale_audio_samples(input_buffer, input_buffer, q15_scale_factor, input_samples_to_duck);
  }
 }
 void MixerSpeaker::dump_config() {
  ESP_LOGCONFIG(TAG, "Speaker Mixer:");
  ESP_LOGCONFIG(TAG, "  Number of output channels: %u", this->output_channels_);
 }
 void MixerSpeaker::setup() {
  this->event_group_ = xEventGroupCreate();
  if (this->event_group_ == nullptr) {
    ESP_LOGE(TAG, "Failed to create event group");
    this->mark_failed();
    return;
  }
 }
 void MixerSpeaker::loop() {
  uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
  if (event_group_bits & MixerEventGroupBits::STATE_STARTING) {
    ESP_LOGD(TAG, "Starting speaker mixer");
    xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_STARTING);
  }
  if (event_group_bits & MixerEventGroupBits::ERR_ESP_NO_MEM) {
    this->status_set_error("Failed to allocate the mixer's internal buffer");
    xEventGroupClearBits(this->event_group_, MixerEventGroupBits::ERR_ESP_NO_MEM);
  }
  if (event_group_bits & MixerEventGroupBits::STATE_RUNNING) {
    ESP_LOGD(TAG, "Started speaker mixer");
    this->status_clear_error();
    xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_RUNNING);
  }
  if (event_group_bits & MixerEventGroupBits::STATE_STOPPING) {
    ESP_LOGD(TAG, "Stopping speaker mixer");
    xEventGroupClearBits(this->event_group_, MixerEventGroupBits::STATE_STOPPING);
  }
  if (event_group_bits & MixerEventGroupBits::STATE_STOPPED) {
    if (this->delete_task_() == ESP_OK) {
      xEventGroupClearBits(this->event_group_, MixerEventGroupBits::ALL_BITS);
    }
  }
  if (this->task_handle_ != nullptr) {
    bool all_stopped = true;
    for (auto &speaker : this->source_speakers_) {
      all_stopped &= speaker->is_stopped();
    }
    if (all_stopped) {
      this->stop();
    }
  }
 }
 esp_err_t MixerSpeaker::start(audio::AudioStreamInfo &stream_info) {
  if (!this->audio_stream_info_.has_value()) {
    if (stream_info.get_bits_per_sample() != 16) {
      // Audio streams that don't have 16 bits per sample are not supported
      return ESP_ERR_NOT_SUPPORTED;
    }
    this->audio_stream_info_ = audio::AudioStreamInfo(stream_info.get_bits_per_sample(), this->output_channels_,
                                                      stream_info.get_sample_rate());
    this->output_speaker_->set_audio_stream_info(this->audio_stream_info_.value());
  } else {
    if (!this->queue_mode_ && (stream_info.get_sample_rate() != this->audio_stream_info_.value().get_sample_rate())) {
      // The two audio streams must have the same sample rate to mix properly if not in queue mode
      return ESP_ERR_INVALID_ARG;
    }
  }
  return this->start_task_();
 }
 esp_err_t MixerSpeaker::start_task_() {
  if (this->task_stack_buffer_ == nullptr) {
    if (this->task_stack_in_psram_) {
      RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
      this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
    } else {
      RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
      this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
    }
  }
  if (this->task_stack_buffer_ == nullptr) {
    return ESP_ERR_NO_MEM;
  }
  if (this->task_handle_ == nullptr) {
    this->task_handle_ = xTaskCreateStatic(audio_mixer_task, "mixer", TASK_STACK_SIZE, (void *) this,
                                           MIXER_TASK_PRIORITY, this->task_stack_buffer_, &this->task_stack_);
  }
  if (this->task_handle_ == nullptr) {
    return ESP_ERR_INVALID_STATE;
  }
  return ESP_OK;
 }
 esp_err_t MixerSpeaker::delete_task_() {
  if (!this->task_created_) {
    this->task_handle_ = nullptr;
    if (this->task_stack_buffer_ != nullptr) {
      if (this->task_stack_in_psram_) {
        RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
        stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
      } else {
        RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
        stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
      }
      this->task_stack_buffer_ = nullptr;
    }
    return ESP_OK;
  }
  return ESP_ERR_INVALID_STATE;
 }
 void MixerSpeaker::stop() { xEventGroupSetBits(this->event_group_, MixerEventGroupBits::COMMAND_STOP); }
 void MixerSpeaker::copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info,
                               int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
                               uint32_t frames_to_transfer) {
  uint8_t input_channels = input_stream_info.get_channels();
  uint8_t output_channels = output_stream_info.get_channels();
  const uint8_t max_input_channel_index = input_channels - 1;
  if (input_channels == output_channels) {
    size_t bytes_to_copy = input_stream_info.frames_to_bytes(frames_to_transfer);
    memcpy(output_buffer, input_buffer, bytes_to_copy);
    return;
  }
  for (uint32_t frame_index = 0; frame_index < frames_to_transfer; ++frame_index) {
    for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
      uint8_t input_channel_index = std::min(output_channel_index, max_input_channel_index);
      output_buffer[output_channels * frame_index + output_channel_index] =
          input_buffer[input_channels * frame_index + input_channel_index];
    }
  }
 }
 void MixerSpeaker::mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info,
                                     const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info,
                                     int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
                                     uint32_t frames_to_mix) {
  const uint8_t primary_channels = primary_stream_info.get_channels();
  const uint8_t secondary_channels = secondary_stream_info.get_channels();
  const uint8_t output_channels = output_stream_info.get_channels();
  const uint8_t max_primary_channel_index = primary_channels - 1;
  const uint8_t max_secondary_channel_index = secondary_channels - 1;
  for (uint32_t frames_index = 0; frames_index < frames_to_mix; ++frames_index) {
    for (uint8_t output_channel_index = 0; output_channel_index < output_channels; ++output_channel_index) {
      const uint32_t secondary_channel_index = std::min(output_channel_index, max_secondary_channel_index);
      const int32_t secondary_sample = secondary_buffer[frames_index * secondary_channels + secondary_channel_index];
      const uint32_t primary_channel_index = std::min(output_channel_index, max_primary_channel_index);
      const int32_t primary_sample =
          static_cast<int32_t>(primary_buffer[frames_index * primary_channels + primary_channel_index]);
      const int32_t added_sample = secondary_sample + primary_sample;
      output_buffer[frames_index * output_channels + output_channel_index] =
          static_cast<int16_t>(clamp<int32_t>(added_sample, MIN_AUDIO_SAMPLE_VALUE, MAX_AUDIO_SAMPLE_VALUE));
    }
  }
 }
 void MixerSpeaker::audio_mixer_task(void *params) {
  MixerSpeaker *this_mixer = (MixerSpeaker *) params;
  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_STARTING);
  this_mixer->task_created_ = true;
  std::unique_ptr<audio::AudioSinkTransferBuffer> output_transfer_buffer = audio::AudioSinkTransferBuffer::create(
      this_mixer->audio_stream_info_.value().ms_to_bytes(TRANSFER_BUFFER_DURATION_MS));
  if (output_transfer_buffer == nullptr) {
    xEventGroupSetBits(this_mixer->event_group_,
                       MixerEventGroupBits::STATE_STOPPED | MixerEventGroupBits::ERR_ESP_NO_MEM);
    this_mixer->task_created_ = false;
    vTaskDelete(nullptr);
  }
  output_transfer_buffer->set_sink(this_mixer->output_speaker_);
  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_RUNNING);
  bool sent_finished = false;
  while (true) {
    uint32_t event_group_bits = xEventGroupGetBits(this_mixer->event_group_);
    if (event_group_bits & MixerEventGroupBits::COMMAND_STOP) {
      break;
    }
    output_transfer_buffer->transfer_data_to_sink(pdMS_TO_TICKS(TASK_DELAY_MS));
    const uint32_t output_frames_free =
        this_mixer->audio_stream_info_.value().bytes_to_frames(output_transfer_buffer->free());
    std::vector<SourceSpeaker *> speakers_with_data;
    std::vector<std::shared_ptr<audio::AudioSourceTransferBuffer>> transfer_buffers_with_data;
    for (auto &speaker : this_mixer->source_speakers_) {
      if (speaker->get_transfer_buffer().use_count() > 0) {
        std::shared_ptr<audio::AudioSourceTransferBuffer> transfer_buffer = speaker->get_transfer_buffer().lock();
        speaker->process_data_from_source(0);  // Transfers and ducks audio from source ring buffers
        if ((transfer_buffer->available() > 0) && !speaker->get_pause_state()) {
          // Store the locked transfer buffers in their own vector to avoid releasing ownership until after the loop
          transfer_buffers_with_data.push_back(transfer_buffer);
          speakers_with_data.push_back(speaker);
        }
      }
    }
    if (transfer_buffers_with_data.empty()) {
      // No audio available for transferring, block task temporarily
      delay(TASK_DELAY_MS);
      continue;
    }
    uint32_t frames_to_mix = output_frames_free;
    if ((transfer_buffers_with_data.size() == 1) || this_mixer->queue_mode_) {
      // Only one speaker has audio data, just copy samples over
      audio::AudioStreamInfo active_stream_info = speakers_with_data[0]->get_audio_stream_info();
      if (active_stream_info.get_sample_rate() ==
          this_mixer->output_speaker_->get_audio_stream_info().get_sample_rate()) {
        // Speaker's sample rate matches the output speaker's, copy directly
        const uint32_t frames_available_in_buffer =
            active_stream_info.bytes_to_frames(transfer_buffers_with_data[0]->available());
        frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
        copy_frames(reinterpret_cast<int16_t *>(transfer_buffers_with_data[0]->get_buffer_start()), active_stream_info,
                    reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
                    this_mixer->audio_stream_info_.value(), frames_to_mix);
        // Update source speaker buffer length
        transfer_buffers_with_data[0]->decrease_buffer_length(active_stream_info.frames_to_bytes(frames_to_mix));
        speakers_with_data[0]->accumulated_frames_read_ += frames_to_mix;
        // Add new audio duration to the source speaker pending playback
        speakers_with_data[0]->pending_playback_ms_ +=
            active_stream_info.frames_to_milliseconds_with_remainder(&speakers_with_data[0]->accumulated_frames_read_);
        // Update output transfer buffer length
        output_transfer_buffer->increase_buffer_length(
            this_mixer->audio_stream_info_.value().frames_to_bytes(frames_to_mix));
      } else {
        // Speaker's stream info doesn't match the output speaker's, so it's a new source speaker
        if (!this_mixer->output_speaker_->is_stopped()) {
          if (!sent_finished) {
            this_mixer->output_speaker_->finish();
            sent_finished = true;  // Avoid repeatedly sending the finish command
          }
        } else {
          // Speaker has finished writing the current audio, update the stream information and restart the speaker
          this_mixer->audio_stream_info_ =
              audio::AudioStreamInfo(active_stream_info.get_bits_per_sample(), this_mixer->output_channels_,
                                     active_stream_info.get_sample_rate());
          this_mixer->output_speaker_->set_audio_stream_info(this_mixer->audio_stream_info_.value());
          this_mixer->output_speaker_->start();
          sent_finished = false;
        }
      }
    } else {
      // Determine how many frames to mix
      for (int i = 0; i < transfer_buffers_with_data.size(); ++i) {
        const uint32_t frames_available_in_buffer =
            speakers_with_data[i]->get_audio_stream_info().bytes_to_frames(transfer_buffers_with_data[i]->available());
        frames_to_mix = std::min(frames_to_mix, frames_available_in_buffer);
      }
      int16_t *primary_buffer = reinterpret_cast<int16_t *>(transfer_buffers_with_data[0]->get_buffer_start());
      audio::AudioStreamInfo primary_stream_info = speakers_with_data[0]->get_audio_stream_info();
      // Mix two streams together
      for (int i = 1; i < transfer_buffers_with_data.size(); ++i) {
        mix_audio_samples(primary_buffer, primary_stream_info,
                          reinterpret_cast<int16_t *>(transfer_buffers_with_data[i]->get_buffer_start()),
                          speakers_with_data[i]->get_audio_stream_info(),
                          reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end()),
                          this_mixer->audio_stream_info_.value(), frames_to_mix);
        speakers_with_data[i]->pending_playback_ms_ +=
            speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
                &speakers_with_data[i]->accumulated_frames_read_);
        if (i != transfer_buffers_with_data.size() - 1) {
          // Need to mix more streams together, point primary buffer and stream info to the already mixed output
          primary_buffer = reinterpret_cast<int16_t *>(output_transfer_buffer->get_buffer_end());
          primary_stream_info = this_mixer->audio_stream_info_.value();
        }
      }
      // Update source transfer buffer lengths and add new audio durations to the source speaker pending playbacks
      for (int i = 0; i < transfer_buffers_with_data.size(); ++i) {
        transfer_buffers_with_data[i]->decrease_buffer_length(
            speakers_with_data[i]->get_audio_stream_info().frames_to_bytes(frames_to_mix));
        speakers_with_data[i]->accumulated_frames_read_ += frames_to_mix;
        speakers_with_data[i]->pending_playback_ms_ +=
            speakers_with_data[i]->get_audio_stream_info().frames_to_milliseconds_with_remainder(
                &speakers_with_data[i]->accumulated_frames_read_);
      }
      // Update output transfer buffer length
      output_transfer_buffer->increase_buffer_length(
          this_mixer->audio_stream_info_.value().frames_to_bytes(frames_to_mix));
    }
  }
  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_STOPPING);
  output_transfer_buffer.reset();
  xEventGroupSetBits(this_mixer->event_group_, MixerEventGroupBits::STATE_STOPPED);
  this_mixer->task_created_ = false;
  vTaskDelete(nullptr);
 }
 }  // namespace mixer_speaker
 }  // namespace esphome
 #endif
--- a/esphome/components/mixer/speaker/mixer_speaker.h
+++ b/esphome/components/mixer/speaker/mixer_speaker.h
@@ -0,0 +1,207 @@
 #pragma once
 #ifdef USE_ESP32
 #include "esphome/components/audio/audio.h"
 #include "esphome/components/audio/audio_transfer_buffer.h"
 #include "esphome/components/speaker/speaker.h"
 #include "esphome/core/component.h"
 #include <freertos/event_groups.h>
 #include <freertos/FreeRTOS.h>
 namespace esphome {
 namespace mixer_speaker {
 /* Classes for mixing several source speaker audio streams and writing it to another speaker component.
 *  - Volume controls are passed through to the output speaker
 *  - Directly handles pausing at the SourceSpeaker level; pause state is not passed through to the output speaker.
 *  - Audio sent to the SourceSpeaker's must have 16 bits per sample.
 *  - Audio sent to the SourceSpeaker can have any number of channels. They are duplicated or ignored as needed to match
 *    the number of channels required for the output speaker.
 *  - In queue mode, the audio sent to the SoureSpeakers can have different sample rates.
 *  - In non-queue mode, the audio sent to the SourceSpeakers must have the same sample rates.
 *  - SourceSpeaker has an internal ring buffer. It also allocates a shared_ptr for an AudioTranserBuffer object.
 *  - Audio Data Flow:
 *      - Audio data played on a SourceSpeaker first writes to its internal ring buffer.
 *      - MixerSpeaker task temporarily takes shared ownership of each SourceSpeaker's AudioTransferBuffer.
 *      - MixerSpeaker calls SourceSpeaker's `process_data_from_source`, which tranfers audio from the SourceSpeaker's
 *        ring buffer to its AudioTransferBuffer. Audio ducking is applied at this step.
 *      - In queue mode, MixerSpeaker prioritizes the earliest configured SourceSpeaker with audio data. Audio data is
 *        sent to the output speaker.
 *      - In non-queue mode, MixerSpeaker adds all the audio data in each SourceSpeaker into one stream that is written
 *        to the output speaker.
 */
 class MixerSpeaker;
 class SourceSpeaker : public speaker::Speaker, public Component {
 public:
  void dump_config() override;
  void setup() override;
  void loop() override;
  size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override;
  size_t play(const uint8_t *data, size_t length) override { return this->play(data, length, 0); }
  void start() override;
  void stop() override;
  void finish() override;
  bool has_buffered_data() const override;
  /// @brief Mute state changes are passed to the parent's output speaker
  void set_mute_state(bool mute_state) override;
  /// @brief Volume state changes are passed to the parent's output speaker
  void set_volume(float volume) override;
  void set_pause_state(bool pause_state) override { this->pause_state_ = pause_state; }
  bool get_pause_state() const override { return this->pause_state_; }
  /// @brief Transfers audio from the ring buffer into the transfer buffer. Ducks audio while transferring.
  /// @param ticks_to_wait FreeRTOS ticks to wait while waiting to read from the ring buffer.
  /// @return Number of bytes transferred from the ring buffer.
  size_t process_data_from_source(TickType_t ticks_to_wait);
  /// @brief Sets the ducking level for the source speaker.
  /// @param decibel_reduction (uint8_t) The dB reduction level. For example, 0 is no change, 10 is a reduction by 10 dB
  /// @param duration (uint32_t) The number of milliseconds to transition from the current level to the new level
  void apply_ducking(uint8_t decibel_reduction, uint32_t duration);
  void set_buffer_duration(uint32_t buffer_duration_ms) { this->buffer_duration_ms_ = buffer_duration_ms; }
  void set_parent(MixerSpeaker *parent) { this->parent_ = parent; }
  void set_timeout(uint32_t ms) { this->timeout_ms_ = ms; }
  std::weak_ptr<audio::AudioSourceTransferBuffer> get_transfer_buffer() { return this->transfer_buffer_; }
 protected:
  friend class MixerSpeaker;
  esp_err_t start_();
  void stop_();
  /// @brief Ducks audio samples by a specified amount. When changing the ducking amount, it can transition gradually
  /// over a specified amount of samples.
  /// @param input_buffer buffer with audio samples to be ducked in place
  /// @param input_samples_to_duck number of samples to process in ``input_buffer``
  /// @param current_ducking_db_reduction pointer to the current dB reduction
  /// @param ducking_transition_samples_remaining pointer to the total number of samples left before the the
  ///         transition is finished
  /// @param samples_per_ducking_step total number of samples per ducking step for the transition
  /// @param db_change_per_ducking_step the change in dB reduction per step
  static void duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck, int8_t *current_ducking_db_reduction,
                           uint32_t *ducking_transition_samples_remaining, uint32_t samples_per_ducking_step,
                           int8_t db_change_per_ducking_step);
  MixerSpeaker *parent_;
  std::shared_ptr<audio::AudioSourceTransferBuffer> transfer_buffer_;
  std::weak_ptr<RingBuffer> ring_buffer_;
  uint32_t buffer_duration_ms_;
  uint32_t last_seen_data_ms_{0};
  optional<uint32_t> timeout_ms_;
  bool stop_gracefully_{false};
  bool pause_state_{false};
  int8_t target_ducking_db_reduction_{0};
  int8_t current_ducking_db_reduction_{0};
  int8_t db_change_per_ducking_step_{1};
  uint32_t ducking_transition_samples_remaining_{0};
  uint32_t samples_per_ducking_step_{0};
  uint32_t accumulated_frames_read_{0};
  uint32_t pending_playback_ms_{0};
 };
 class MixerSpeaker : public Component {
 public:
  void dump_config() override;
  void setup() override;
  void loop() override;
  void add_source_speaker(SourceSpeaker *source_speaker) { this->source_speakers_.push_back(source_speaker); }
  /// @brief Starts the mixer task. Called by a source speaker giving the current audio stream information
  /// @param stream_info The calling source speakers audio stream information
  /// @return ESP_ERR_NOT_SUPPORTED if the incoming stream is incompatible due to unsupported bits per sample
  ///         ESP_ERR_INVALID_ARG if the incoming stream is incompatible to be mixed with the other input audio stream
  ///         ESP_ERR_NO_MEM if there isn't enough memory for the task's stack
  ///         ESP_ERR_INVALID_STATE if the task fails to start
  ///         ESP_OK if the incoming stream is compatible and the mixer task starts
  esp_err_t start(audio::AudioStreamInfo &stream_info);
  void stop();
  void set_output_channels(uint8_t output_channels) { this->output_channels_ = output_channels; }
  void set_output_speaker(speaker::Speaker *speaker) { this->output_speaker_ = speaker; }
  void set_queue_mode(bool queue_mode) { this->queue_mode_ = queue_mode; }
  void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; }
  speaker::Speaker *get_output_speaker() const { return this->output_speaker_; }
 protected:
  /// @brief Copies audio frames from the input buffer to the output buffer taking into account the number of channels
  /// in each stream. If the output stream has more channels, the input samples are duplicated. If the output stream has
  /// less channels, the extra channel input samples are dropped.
  /// @param input_buffer
  /// @param input_stream_info
  /// @param output_buffer
  /// @param output_stream_info
  /// @param frames_to_transfer number of frames (consisting of a sample for each channel) to copy from the input buffer
  static void copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info, int16_t *output_buffer,
                          audio::AudioStreamInfo output_stream_info, uint32_t frames_to_transfer);
  /// @brief Mixes the primary and secondary streams taking into account the number of channels in each stream. Primary
  /// and secondary samples are duplicated or dropped as necessary to ensure the output stream has the configured number
  /// of channels. Output samples are clamped to the corresponding int16 min or max values if the mixed sample
  /// overflows.
  /// @param primary_buffer (int16_t *) samples buffer for the primary stream
  /// @param primary_stream_info stream info for the primary stream
  /// @param secondary_buffer (int16_t *) samples buffer for secondary stream
  /// @param secondary_stream_info stream info for the secondary stream
  /// @param output_buffer (int16_t *) buffer for the mixed samples
  /// @param output_stream_info stream info for the output buffer
  /// @param frames_to_mix number of frames in the primary and secondary buffers to mix together
  static void mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info,
                                const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info,
                                int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
                                uint32_t frames_to_mix);
  static void audio_mixer_task(void *params);
  /// @brief Starts the mixer task after allocating memory for the task stack.
  /// @return ESP_ERR_NO_MEM if there isn't enough memory for the task's stack
  ///         ESP_ERR_INVALID_STATE if the task didn't start
  ///         ESP_OK if successful
  esp_err_t start_task_();
  /// @brief If the task is stopped, it sets the task handle to the nullptr and deallocates its stack
  /// @return ESP_OK if the task was stopped, ESP_ERR_INVALID_STATE otherwise.
  esp_err_t delete_task_();
  EventGroupHandle_t event_group_{nullptr};
  std::vector<SourceSpeaker *> source_speakers_;
  speaker::Speaker *output_speaker_{nullptr};
  uint8_t output_channels_;
  bool queue_mode_;
  bool task_stack_in_psram_{false};
  bool task_created_{false};
  TaskHandle_t task_handle_{nullptr};
  StaticTask_t task_stack_;
  StackType_t *task_stack_buffer_{nullptr};
  optional<audio::AudioStreamInfo> audio_stream_info_;
 };
 }  // namespace mixer_speaker
 }  // namespace esphome
 #endif
--- a/esphome/components/mpr121/mpr121.h
+++ b/esphome/components/mpr121/mpr121.h
@@ -117,6 +117,8 @@ class MPR121GPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { this->inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { this->flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  MPR121Component *parent_;
  uint8_t pin_;
--- a/esphome/components/pca6416a/pca6416a.h
+++ b/esphome/components/pca6416a/pca6416a.h
@@ -52,6 +52,8 @@ class PCA6416AGPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  PCA6416AComponent *parent_;
  uint8_t pin_;
--- a/esphome/components/pca9554/pca9554.h
+++ b/esphome/components/pca9554/pca9554.h
@@ -65,6 +65,8 @@ class PCA9554GPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  PCA9554Component *parent_;
  uint8_t pin_;
--- a/esphome/components/pcf8574/pcf8574.h
+++ b/esphome/components/pcf8574/pcf8574.h
@@ -54,6 +54,8 @@ class PCF8574GPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  PCF8574Component *parent_;
  uint8_t pin_;
--- a/esphome/components/resampler/init.py
+++ b/esphome/components/resampler/init.py
--- a/esphome/components/resampler/speaker/init.py
+++ b/esphome/components/resampler/speaker/init.py
@@ -0,0 +1,103 @@
 import esphome.codegen as cg
 from esphome.components import audio, esp32, speaker
 import esphome.config_validation as cv
 from esphome.const import (
    CONF_BITS_PER_SAMPLE,
    CONF_BUFFER_DURATION,
    CONF_FILTERS,
    CONF_ID,
    CONF_NUM_CHANNELS,
    CONF_OUTPUT_SPEAKER,
    CONF_SAMPLE_RATE,
    CONF_TASK_STACK_IN_PSRAM,
    PLATFORM_ESP32,
 )
 from esphome.core.entity_helpers import inherit_property_from
 AUTO_LOAD = ["audio"]
 CODEOWNERS = ["@kahrendt"]
 resampler_ns = cg.esphome_ns.namespace("resampler")
 ResamplerSpeaker = resampler_ns.class_(
    "ResamplerSpeaker", cg.Component, speaker.Speaker
 )
 CONF_TAPS = "taps"
 def _set_stream_limits(config):
    audio.set_stream_limits(
        min_bits_per_sample=16,
        max_bits_per_sample=32,
    )(config)
    return config
 def _validate_audio_compatability(config):
    inherit_property_from(CONF_BITS_PER_SAMPLE, CONF_OUTPUT_SPEAKER)(config)
    inherit_property_from(CONF_NUM_CHANNELS, CONF_OUTPUT_SPEAKER)(config)
    inherit_property_from(CONF_SAMPLE_RATE, CONF_OUTPUT_SPEAKER)(config)
    audio.final_validate_audio_schema(
        "source_speaker",
        audio_device=CONF_OUTPUT_SPEAKER,
        bits_per_sample=config.get(CONF_BITS_PER_SAMPLE),
        channels=config.get(CONF_NUM_CHANNELS),
        sample_rate=config.get(CONF_SAMPLE_RATE),
    )(config)
 def _validate_taps(taps):
    value = cv.int_range(min=16, max=128)(taps)
    if value % 4 != 0:
        raise cv.Invalid("Number of taps must be divisible by 4")
    return value
 CONFIG_SCHEMA = cv.All(
    speaker.SPEAKER_SCHEMA.extend(
        {
            cv.GenerateID(): cv.declare_id(ResamplerSpeaker),
            cv.Required(CONF_OUTPUT_SPEAKER): cv.use_id(speaker.Speaker),
            cv.Optional(
                CONF_BUFFER_DURATION, default="100ms"
            ): cv.positive_time_period_milliseconds,
            cv.SplitDefault(CONF_TASK_STACK_IN_PSRAM, esp32_idf=False): cv.All(
                cv.boolean, cv.only_with_esp_idf
            ),
            cv.Optional(CONF_FILTERS, default=16): cv.int_range(min=2, max=1024),
            cv.Optional(CONF_TAPS, default=16): _validate_taps,
        }
    ).extend(cv.COMPONENT_SCHEMA),
    cv.only_on([PLATFORM_ESP32]),
    _set_stream_limits,
 )
 FINAL_VALIDATE_SCHEMA = _validate_audio_compatability
 async def to_code(config):
    var = cg.new_Pvariable(config[CONF_ID])
    await cg.register_component(var, config)
    await speaker.register_speaker(var, config)
    output_spkr = await cg.get_variable(config[CONF_OUTPUT_SPEAKER])
    cg.add(var.set_output_speaker(output_spkr))
    cg.add(var.set_buffer_duration(config[CONF_BUFFER_DURATION]))
    if task_stack_in_psram := config.get(CONF_TASK_STACK_IN_PSRAM):
        cg.add(var.set_task_stack_in_psram(task_stack_in_psram))
        if task_stack_in_psram:
            if config[CONF_TASK_STACK_IN_PSRAM]:
                esp32.add_idf_sdkconfig_option(
                    "CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True
                )
    cg.add(var.set_target_bits_per_sample(config[CONF_BITS_PER_SAMPLE]))
    cg.add(var.set_target_sample_rate(config[CONF_SAMPLE_RATE]))
    cg.add(var.set_filters(config[CONF_FILTERS]))
    cg.add(var.set_taps(config[CONF_TAPS]))
--- a/esphome/components/resampler/speaker/resampler_speaker.cpp
+++ b/esphome/components/resampler/speaker/resampler_speaker.cpp
@@ -0,0 +1,318 @@
 #include "resampler_speaker.h"
 #ifdef USE_ESP32
 #include "esphome/components/audio/audio_resampler.h"
 #include "esphome/core/helpers.h"
 #include "esphome/core/log.h"
 #include <algorithm>
 #include <cstring>
 namespace esphome {
 namespace resampler {
 static const UBaseType_t RESAMPLER_TASK_PRIORITY = 1;
 static const uint32_t TRANSFER_BUFFER_DURATION_MS = 50;
 static const uint32_t TASK_DELAY_MS = 20;
 static const uint32_t TASK_STACK_SIZE = 3072;
 static const char *const TAG = "resampler_speaker";
 enum ResamplingEventGroupBits : uint32_t {
  COMMAND_STOP = (1 << 0),  // stops the resampler task
  STATE_STARTING = (1 << 10),
  STATE_RUNNING = (1 << 11),
  STATE_STOPPING = (1 << 12),
  STATE_STOPPED = (1 << 13),
  ERR_ESP_NO_MEM = (1 << 19),
  ERR_ESP_NOT_SUPPORTED = (1 << 20),
  ERR_ESP_FAIL = (1 << 21),
  ALL_BITS = 0x00FFFFFF,  // All valid FreeRTOS event group bits
 };
 void ResamplerSpeaker::setup() {
  this->event_group_ = xEventGroupCreate();
  if (this->event_group_ == nullptr) {
    ESP_LOGE(TAG, "Failed to create event group");
    this->mark_failed();
    return;
  }
  this->output_speaker_->add_audio_output_callback(
      [this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
        int32_t adjustment = this->playback_differential_ms_;
        this->playback_differential_ms_ -= adjustment;
        int32_t adjusted_playback_ms = static_cast<int32_t>(new_playback_ms) + adjustment;
        this->audio_output_callback_(adjusted_playback_ms, remainder_us, pending_ms, write_timestamp);
      });
 }
 void ResamplerSpeaker::loop() {
  uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
  if (event_group_bits & ResamplingEventGroupBits::STATE_STARTING) {
    ESP_LOGD(TAG, "Starting resampler task");
    xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::STATE_STARTING);
  }
  if (event_group_bits & ResamplingEventGroupBits::ERR_ESP_NO_MEM) {
    this->status_set_error("Resampler task failed to allocate the internal buffers");
    xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ERR_ESP_NO_MEM);
    this->state_ = speaker::STATE_STOPPING;
  }
  if (event_group_bits & ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED) {
    this->status_set_error("Cannot resample due to an unsupported audio stream");
    xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED);
    this->state_ = speaker::STATE_STOPPING;
  }
  if (event_group_bits & ResamplingEventGroupBits::ERR_ESP_FAIL) {
    this->status_set_error("Resampler task failed");
    xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ERR_ESP_FAIL);
    this->state_ = speaker::STATE_STOPPING;
  }
  if (event_group_bits & ResamplingEventGroupBits::STATE_RUNNING) {
    ESP_LOGD(TAG, "Started resampler task");
    this->status_clear_error();
    xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::STATE_RUNNING);
  }
  if (event_group_bits & ResamplingEventGroupBits::STATE_STOPPING) {
    ESP_LOGD(TAG, "Stopping resampler task");
    xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::STATE_STOPPING);
  }
  if (event_group_bits & ResamplingEventGroupBits::STATE_STOPPED) {
    if (this->delete_task_() == ESP_OK) {
      ESP_LOGD(TAG, "Stopped resampler task");
      xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ALL_BITS);
    }
  }
  switch (this->state_) {
    case speaker::STATE_STARTING: {
      esp_err_t err = this->start_();
      if (err == ESP_OK) {
        this->status_clear_error();
        this->state_ = speaker::STATE_RUNNING;
      } else {
        switch (err) {
          case ESP_ERR_INVALID_STATE:
            this->status_set_error("Failed to start resampler: resampler task failed to start");
            break;
          case ESP_ERR_NO_MEM:
            this->status_set_error("Failed to start resampler: not enough memory for task stack");
          default:
            this->status_set_error("Failed to start resampler");
            break;
        }
        this->state_ = speaker::STATE_STOPPING;
      }
      break;
    }
    case speaker::STATE_RUNNING:
      if (this->output_speaker_->is_stopped()) {
        this->state_ = speaker::STATE_STOPPING;
      }
      break;
    case speaker::STATE_STOPPING:
      this->stop_();
      this->state_ = speaker::STATE_STOPPED;
      break;
    case speaker::STATE_STOPPED:
      break;
  }
 }
 size_t ResamplerSpeaker::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
  if (this->is_stopped()) {
    this->start();
  }
  size_t bytes_written = 0;
  if ((this->output_speaker_->is_running()) && (!this->requires_resampling_())) {
    bytes_written = this->output_speaker_->play(data, length, ticks_to_wait);
  } else {
    if (this->ring_buffer_.use_count() == 1) {
      std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_.lock();
      bytes_written = temp_ring_buffer->write_without_replacement(data, length, ticks_to_wait);
    }
  }
  return bytes_written;
 }
 void ResamplerSpeaker::start() { this->state_ = speaker::STATE_STARTING; }
 esp_err_t ResamplerSpeaker::start_() {
  this->target_stream_info_ = audio::AudioStreamInfo(
      this->target_bits_per_sample_, this->audio_stream_info_.get_channels(), this->target_sample_rate_);
  this->output_speaker_->set_audio_stream_info(this->target_stream_info_);
  this->output_speaker_->start();
  if (this->requires_resampling_()) {
    // Start the resampler task to handle converting sample rates
    return this->start_task_();
  }
  return ESP_OK;
 }
 esp_err_t ResamplerSpeaker::start_task_() {
  if (this->task_stack_buffer_ == nullptr) {
    if (this->task_stack_in_psram_) {
      RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
      this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
    } else {
      RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
      this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
    }
  }
  if (this->task_stack_buffer_ == nullptr) {
    return ESP_ERR_NO_MEM;
  }
  if (this->task_handle_ == nullptr) {
    this->task_handle_ = xTaskCreateStatic(resample_task, "sample", TASK_STACK_SIZE, (void *) this,
                                           RESAMPLER_TASK_PRIORITY, this->task_stack_buffer_, &this->task_stack_);
  }
  if (this->task_handle_ == nullptr) {
    return ESP_ERR_INVALID_STATE;
  }
  return ESP_OK;
 }
 void ResamplerSpeaker::stop() { this->state_ = speaker::STATE_STOPPING; }
 void ResamplerSpeaker::stop_() {
  if (this->task_handle_ != nullptr) {
    xEventGroupSetBits(this->event_group_, ResamplingEventGroupBits::COMMAND_STOP);
  }
  this->output_speaker_->stop();
 }
 esp_err_t ResamplerSpeaker::delete_task_() {
  if (!this->task_created_) {
    this->task_handle_ = nullptr;
    if (this->task_stack_buffer_ != nullptr) {
      if (this->task_stack_in_psram_) {
        RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
        stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
      } else {
        RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
        stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
      }
      this->task_stack_buffer_ = nullptr;
    }
    return ESP_OK;
  }
  return ESP_ERR_INVALID_STATE;
 }
 void ResamplerSpeaker::finish() { this->output_speaker_->finish(); }
 bool ResamplerSpeaker::has_buffered_data() const {
  bool has_ring_buffer_data = false;
  if (this->requires_resampling_() && (this->ring_buffer_.use_count() > 0)) {
    has_ring_buffer_data = (this->ring_buffer_.lock()->available() > 0);
  }
  return (has_ring_buffer_data || this->output_speaker_->has_buffered_data());
 }
 void ResamplerSpeaker::set_mute_state(bool mute_state) {
  this->mute_state_ = mute_state;
  this->output_speaker_->set_mute_state(mute_state);
 }
 void ResamplerSpeaker::set_volume(float volume) {
  this->volume_ = volume;
  this->output_speaker_->set_volume(volume);
 }
 bool ResamplerSpeaker::requires_resampling_() const {
  return (this->audio_stream_info_.get_sample_rate() != this->target_sample_rate_) ||
         (this->audio_stream_info_.get_bits_per_sample() != this->target_bits_per_sample_);
 }
 void ResamplerSpeaker::resample_task(void *params) {
  ResamplerSpeaker *this_resampler = (ResamplerSpeaker *) params;
  this_resampler->task_created_ = true;
  xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_STARTING);
  std::unique_ptr<audio::AudioResampler> resampler =
      make_unique<audio::AudioResampler>(this_resampler->audio_stream_info_.ms_to_bytes(TRANSFER_BUFFER_DURATION_MS),
                                         this_resampler->target_stream_info_.ms_to_bytes(TRANSFER_BUFFER_DURATION_MS));
  esp_err_t err = resampler->start(this_resampler->audio_stream_info_, this_resampler->target_stream_info_,
                                   this_resampler->taps_, this_resampler->filters_);
  if (err == ESP_OK) {
    std::shared_ptr<RingBuffer> temp_ring_buffer =
        RingBuffer::create(this_resampler->audio_stream_info_.ms_to_bytes(this_resampler->buffer_duration_ms_));
    if (temp_ring_buffer.use_count() == 0) {
      err = ESP_ERR_NO_MEM;
    } else {
      this_resampler->ring_buffer_ = temp_ring_buffer;
      resampler->add_source(this_resampler->ring_buffer_);
      this_resampler->output_speaker_->set_audio_stream_info(this_resampler->target_stream_info_);
      resampler->add_sink(this_resampler->output_speaker_);
    }
  }
  if (err == ESP_OK) {
    xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_RUNNING);
  } else if (err == ESP_ERR_NO_MEM) {
    xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_NO_MEM);
  } else if (err == ESP_ERR_NOT_SUPPORTED) {
    xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED);
  }
  this_resampler->playback_differential_ms_ = 0;
  while (err == ESP_OK) {
    uint32_t event_bits = xEventGroupGetBits(this_resampler->event_group_);
    if (event_bits & ResamplingEventGroupBits::COMMAND_STOP) {
      break;
    }
    // Stop gracefully if the decoder is done
    int32_t ms_differential = 0;
    audio::AudioResamplerState resampler_state = resampler->resample(false, &ms_differential);
    this_resampler->playback_differential_ms_ += ms_differential;
    if (resampler_state == audio::AudioResamplerState::FINISHED) {
      break;
    } else if (resampler_state == audio::AudioResamplerState::FAILED) {
      xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_FAIL);
      break;
    }
  }
  xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_STOPPING);
  resampler.reset();
  xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_STOPPED);
  this_resampler->task_created_ = false;
  vTaskDelete(nullptr);
 }
 }  // namespace resampler
 }  // namespace esphome
 #endif
--- a/esphome/components/resampler/speaker/resampler_speaker.h
+++ b/esphome/components/resampler/speaker/resampler_speaker.h
@@ -0,0 +1,107 @@
 #pragma once
 #ifdef USE_ESP32
 #include "esphome/components/audio/audio.h"
 #include "esphome/components/audio/audio_transfer_buffer.h"
 #include "esphome/components/speaker/speaker.h"
 #include "esphome/core/component.h"
 #include <freertos/event_groups.h>
 #include <freertos/FreeRTOS.h>
 namespace esphome {
 namespace resampler {
 class ResamplerSpeaker : public Component, public speaker::Speaker {
 public:
  float get_setup_priority() const override { return esphome::setup_priority::DATA; }
  void setup() override;
  void loop() override;
  size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override;
  size_t play(const uint8_t *data, size_t length) override { return this->play(data, length, 0); }
  void start() override;
  void stop() override;
  void finish() override;
  void set_pause_state(bool pause_state) override { this->output_speaker_->set_pause_state(pause_state); }
  bool get_pause_state() const override { return this->output_speaker_->get_pause_state(); }
  bool has_buffered_data() const override;
  /// @brief Mute state changes are passed to the parent's output speaker
  void set_mute_state(bool mute_state) override;
  /// @brief Volume state changes are passed to the parent's output speaker
  void set_volume(float volume) override;
  void set_output_speaker(speaker::Speaker *speaker) { this->output_speaker_ = speaker; }
  void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; }
  void set_target_bits_per_sample(uint8_t target_bits_per_sample) {
    this->target_bits_per_sample_ = target_bits_per_sample;
  }
  void set_target_sample_rate(uint32_t target_sample_rate) { this->target_sample_rate_ = target_sample_rate; }
  void set_filters(uint16_t filters) { this->filters_ = filters; }
  void set_taps(uint16_t taps) { this->taps_ = taps; }
  void set_buffer_duration(uint32_t buffer_duration_ms) { this->buffer_duration_ms_ = buffer_duration_ms; }
 protected:
  /// @brief Starts the output speaker after setting the resampled stream info. If resampling is required, it starts the
  /// task.
  /// @return ESP_OK if resampling is required
  ///         return value of start_task_() if resampling is required
  esp_err_t start_();
  /// @brief Starts the resampler task after allocating the task stack
  /// @return ESP_OK if successful,
  ///         ESP_ERR_NO_MEM if the task stack couldn't be allocated
  ///         ESP_ERR_INVALID_STATE if the task wasn't created
  esp_err_t start_task_();
  /// @brief Stops the output speaker. If the resampling task is running, it sends the stop command.
  void stop_();
  /// @brief Deallocates the task stack and resets the pointers.
  /// @return ESP_OK if successful
  ///         ESP_ERR_INVALID_STATE if the task hasn't stopped itself
  esp_err_t delete_task_();
  inline bool requires_resampling_() const;
  static void resample_task(void *params);
  EventGroupHandle_t event_group_{nullptr};
  std::weak_ptr<RingBuffer> ring_buffer_;
  speaker::Speaker *output_speaker_{nullptr};
  bool task_stack_in_psram_{false};
  bool task_created_{false};
  TaskHandle_t task_handle_{nullptr};
  StaticTask_t task_stack_;
  StackType_t *task_stack_buffer_{nullptr};
  audio::AudioStreamInfo target_stream_info_;
  uint16_t taps_;
  uint16_t filters_;
  uint8_t target_bits_per_sample_;
  uint32_t target_sample_rate_;
  uint32_t buffer_duration_ms_;
  int32_t playback_differential_ms_{0};
 };
 }  // namespace resampler
 }  // namespace esphome
 #endif
--- a/esphome/components/rp2040/gpio.h
+++ b/esphome/components/rp2040/gpio.h
@@ -22,6 +22,7 @@ class RP2040GPIOPin : public InternalGPIOPin {
  void detach_interrupt() const override;
  ISRInternalGPIOPin to_isr() const override;
  uint8_t get_pin() const override { return pin_; }
  gpio::Flags get_flags() const override { return flags_; }
  bool is_inverted() const override { return inverted_; }
 protected:
--- a/esphome/components/sn74hc165/sn74hc165.h
+++ b/esphome/components/sn74hc165/sn74hc165.h
@@ -52,6 +52,9 @@ class SN74HC165GPIOPin : public GPIOPin, public Parented<SN74HC165Component> {
  void set_pin(uint16_t pin) { pin_ = pin; }
  void set_inverted(bool inverted) { inverted_ = inverted; }
  /// Always returns `gpio::Flags::FLAG_INPUT`.
  gpio::Flags get_flags() const override { return gpio::Flags::FLAG_INPUT; }
 protected:
  uint16_t pin_;
  bool inverted_;
--- a/esphome/components/sn74hc595/sn74hc595.h
+++ b/esphome/components/sn74hc595/sn74hc595.h
@@ -59,6 +59,9 @@ class SN74HC595GPIOPin : public GPIOPin, public Parented<SN74HC595Component> {
  void set_pin(uint16_t pin) { pin_ = pin; }
  void set_inverted(bool inverted) { inverted_ = inverted; }
  /// Always returns `gpio::Flags::FLAG_OUTPUT`.
  gpio::Flags get_flags() const override { return gpio::Flags::FLAG_OUTPUT; }
 protected:
  uint16_t pin_;
  bool inverted_;
--- a/esphome/components/speaker/init.py
+++ b/esphome/components/speaker/init.py
@@ -1,7 +1,6 @@
 from esphome import automation
 from esphome.automation import maybe_simple_id
 import esphome.codegen as cg
-from esphome.components import audio_dac
+from esphome.components import audio, audio_dac
 import esphome.config_validation as cv
 from esphome.const import CONF_DATA, CONF_ID, CONF_VOLUME
 from esphome.core import CORE
@@ -54,13 +53,15 @@ async def register_speaker(var, config):
    await setup_speaker_core_(var, config)
-SPEAKER_SCHEMA = cv.Schema(
+SPEAKER_SCHEMA = cv.Schema.extend(audio.AUDIO_COMPONENT_SCHEMA).extend(
    {
        cv.Optional(CONF_AUDIO_DAC): cv.use_id(audio_dac.AudioDac),
    }
 )
-SPEAKER_AUTOMATION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(Speaker)})
+SPEAKER_AUTOMATION_SCHEMA = automation.maybe_simple_id(
    {cv.GenerateID(): cv.use_id(Speaker)}
 )
 async def speaker_action(config, action_id, template_arg, args):
--- a/esphome/components/speaker/speaker.h
+++ b/esphome/components/speaker/speaker.h
@@ -9,6 +9,7 @@
 #endif
 #include "esphome/core/defines.h"
 #include "esphome/core/helpers.h"
 #include "esphome/components/audio/audio.h"
 #ifdef USE_AUDIO_DAC
@@ -56,6 +57,10 @@ class Speaker {
  // When finish() is not implemented on the platform component it should just do a normal stop.
  virtual void finish() { this->stop(); }
  // Pauses processing incoming audio. Needs to be implemented specifically per speaker component
  virtual void set_pause_state(bool pause_state) {}
  virtual bool get_pause_state() const { return false; }
  virtual bool has_buffered_data() const = 0;
  bool is_running() const { return this->state_ == STATE_RUNNING; }
@@ -95,6 +100,19 @@ class Speaker {
    this->audio_stream_info_ = audio_stream_info;
  }
  audio::AudioStreamInfo &get_audio_stream_info() { return this->audio_stream_info_; }
  /// Callback function for sending the duration of the audio written to the speaker since the last callback.
  /// Parameters:
  ///   - Duration in milliseconds. Never rounded and should always be less than or equal to the actual duration.
  ///   - Remainder duration in microseconds. Rounded duration after subtracting the previous parameter from the actual
  ///     duration.
  ///   - Duration of remaining, unwritten audio buffered in the speaker in milliseconds.
  ///   - System time in microseconds when the last write was completed.
  void add_audio_output_callback(std::function<void(uint32_t, uint32_t, uint32_t, uint32_t)> &&callback) {
    this->audio_output_callback_.add(std::move(callback));
  }
 protected:
  State state_{STATE_STOPPED};
  audio::AudioStreamInfo audio_stream_info_;
@@ -104,6 +122,8 @@ class Speaker {
 #ifdef USE_AUDIO_DAC
  audio_dac::AudioDac *audio_dac_{nullptr};
 #endif
  CallbackManager<void(uint32_t, uint32_t, uint32_t, uint32_t)> audio_output_callback_{};
 };
 }  // namespace speaker
--- a/esphome/components/spi/spi.h
+++ b/esphome/components/spi/spi.h
@@ -114,6 +114,8 @@ class NullPin : public GPIOPin {
  void pin_mode(gpio::Flags flags) override {}
  gpio::Flags get_flags() const override { return gpio::Flags::FLAG_NONE; }
  bool digital_read() override { return false; }
  void digital_write(bool value) override {}
--- a/esphome/components/sx1509/sx1509_gpio_pin.h
+++ b/esphome/components/sx1509/sx1509_gpio_pin.h
@@ -20,6 +20,8 @@ class SX1509GPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { this->inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { this->flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  SX1509Component *parent_;
  uint8_t pin_;
--- a/esphome/components/tca9555/tca9555.h
+++ b/esphome/components/tca9555/tca9555.h
@@ -54,6 +54,8 @@ class TCA9555GPIOPin : public GPIOPin, public Parented<TCA9555Component> {
  void set_inverted(bool inverted) { this->inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { this->flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
 protected:
  uint8_t pin_;
  bool inverted_;
--- a/esphome/components/udp/init.py
+++ b/esphome/components/udp/init.py
@@ -18,7 +18,7 @@ from esphome.cpp_generator import MockObjClass
 CODEOWNERS = ["@clydebarrow"]
 DEPENDENCIES = ["network"]
-AUTO_LOAD = ["socket"]
+AUTO_LOAD = ["socket", "xxtea"]
 MULTI_CONF = True
 udp_ns = cg.esphome_ns.namespace("udp")
--- a/esphome/components/udp/udp_component.cpp
+++ b/esphome/components/udp/udp_component.cpp
@@ -3,6 +3,8 @@
 #include "esphome/components/network/util.h"
 #include "udp_component.h"
 #include "esphome/components/xxtea/xxtea.h"
 namespace esphome {
 namespace udp {
@@ -47,54 +49,7 @@ namespace udp {
 */
 static const char *const TAG = "udp";
-/**
+static size_t round4(size_t value) { return (value + 3) & ~3; }
 * XXTEA implementation, using 256 bit key.
 */
 static const uint32_t DELTA = 0x9e3779b9;
 #define MX ((((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4))) ^ ((sum ^ y) + (k[(p ^ e) & 7] ^ z)))
 /**
 * Encrypt a block of data in-place
 */
 static void xxtea_encrypt(uint32_t *v, size_t n, const uint32_t *k) {
  uint32_t z, y, sum, e;
  size_t p;
  size_t q = 6 + 52 / n;
  sum = 0;
  z = v[n - 1];
  while (q-- != 0) {
    sum += DELTA;
    e = (sum >> 2);
    for (p = 0; p != n - 1; p++) {
      y = v[p + 1];
      z = v[p] += MX;
    }
    y = v[0];
    z = v[n - 1] += MX;
  }
 }
 static void xxtea_decrypt(uint32_t *v, size_t n, const uint32_t *k) {
  uint32_t z, y, sum, e;
  size_t p;
  size_t q = 6 + 52 / n;
  sum = q * DELTA;
  y = v[0];
  while (q-- != 0) {
    e = (sum >> 2);
    for (p = n - 1; p != 0; p--) {
      z = v[p - 1];
      y = v[p] -= MX;
    }
    z = v[n - 1];
    y = v[0] -= MX;
    sum -= DELTA;
  }
 }
 inline static size_t round4(size_t value) { return (value + 3) & ~3; }
 union FuData {
  uint32_t u32;
@@ -312,7 +267,7 @@ void UDPComponent::flush_() {
  memcpy(buffer, this->header_.data(), this->header_.size());
  memcpy(buffer + header_len, this->data_.data(), this->data_.size());
  if (this->is_encrypted_()) {
-    xxtea_encrypt(buffer + header_len, len, (uint32_t *) this->encryption_key_.data());
+    xxtea::encrypt(buffer + header_len, len, (uint32_t *) this->encryption_key_.data());
  }
  auto total_len = (header_len + len) * 4;
  this->send_packet_(buffer, total_len);
@@ -503,7 +458,7 @@ void UDPComponent::process_(uint8_t *buf, const size_t len) {
 #endif
  if (!provider.encryption_key.empty()) {
-    xxtea_decrypt((uint32_t *) buf, (end - buf) / 4, (uint32_t *) provider.encryption_key.data());
+    xxtea::decrypt((uint32_t *) buf, (end - buf) / 4, (uint32_t *) provider.encryption_key.data());
  }
  byte = *buf++;
  if (byte == ROLLING_CODE_KEY) {
--- a/esphome/components/weikai/weikai.h
+++ b/esphome/components/weikai/weikai.h
@@ -275,6 +275,8 @@ class WeikaiGPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { this->inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { this->flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
  void setup() override;
  std::string dump_summary() const override;
  void pin_mode(gpio::Flags flags) override { this->parent_->set_pin_direction_(this->pin_, flags); }
--- a/esphome/components/xl9535/xl9535.h
+++ b/esphome/components/xl9535/xl9535.h
@@ -36,6 +36,8 @@ class XL9535GPIOPin : public GPIOPin {
  void set_inverted(bool inverted) { this->inverted_ = inverted; }
  void set_flags(gpio::Flags flags) { this->flags_ = flags; }
  gpio::Flags get_flags() const override { return this->flags_; }
  void setup() override;
  std::string dump_summary() const override;
  void pin_mode(gpio::Flags flags) override;
--- a/esphome/components/xxtea/init.py
+++ b/esphome/components/xxtea/init.py
@@ -0,0 +1,3 @@
 """ESPHome XXTEA encryption component."""
 CODEOWNERS = ["@clydebarrow"]
--- a/esphome/components/xxtea/xxtea.cpp
+++ b/esphome/components/xxtea/xxtea.cpp
@@ -0,0 +1,46 @@
 #include "xxtea.h"
 namespace esphome {
 namespace xxtea {
 static const uint32_t DELTA = 0x9e3779b9;
 #define MX ((((z >> 5) ^ (y << 2)) + ((y >> 3) ^ (z << 4))) ^ ((sum ^ y) + (k[(p ^ e) & 7] ^ z)))
 void encrypt(uint32_t *v, size_t n, const uint32_t *k) {
  uint32_t z, y, sum, e;
  size_t p;
  size_t q = 6 + 52 / n;
  sum = 0;
  z = v[n - 1];
  while (q-- != 0) {
    sum += DELTA;
    e = (sum >> 2);
    for (p = 0; p != n - 1; p++) {
      y = v[p + 1];
      z = v[p] += MX;
    }
    y = v[0];
    z = v[n - 1] += MX;
  }
 }
 void decrypt(uint32_t *v, size_t n, const uint32_t *k) {
  uint32_t z, y, sum, e;
  size_t p;
  size_t q = 6 + 52 / n;
  sum = q * DELTA;
  y = v[0];
  while (q-- != 0) {
    e = (sum >> 2);
    for (p = n - 1; p != 0; p--) {
      z = v[p - 1];
      y = v[p] -= MX;
    }
    z = v[n - 1];
    y = v[0] -= MX;
    sum -= DELTA;
  }
 }
 }  // namespace xxtea
 }  // namespace esphome
--- a/esphome/components/xxtea/xxtea.h
+++ b/esphome/components/xxtea/xxtea.h
@@ -0,0 +1,26 @@
 #pragma once
 #include <cstdint>
 #include <cstddef>
 namespace esphome {
 namespace xxtea {
 /**
 * Encrypt a block of data in-place using XXTEA algorithm with 256-bit key
 * @param v Data to encrypt (as array of 32-bit words)
 * @param n Number of 32-bit words in data
 * @param k Key (array of 8 32-bit words)
 */
 void encrypt(uint32_t *v, size_t n, const uint32_t *k);
 /**
 * Decrypt a block of data in-place using XXTEA algorithm with 256-bit key
 * @param v Data to decrypt (as array of 32-bit words)
 * @param n Number of 32-bit words in data
 * @param k Key (array of 8 32-bit words)
 */
 void decrypt(uint32_t *v, size_t n, const uint32_t *k);
 }  // namespace xxtea
 }  // namespace esphome
--- a/esphome/const.py
+++ b/esphome/const.py
@@ -94,6 +94,7 @@ CONF_BRIGHTNESS = "brightness"
 CONF_BRIGHTNESS_LIMITS = "brightness_limits"
 CONF_BROKER = "broker"
 CONF_BSSID = "bssid"
 CONF_BUFFER_DURATION = "buffer_duration"
 CONF_BUFFER_SIZE = "buffer_size"
 CONF_BUILD_PATH = "build_path"
 CONF_BUS_VOLTAGE = "bus_voltage"
@@ -527,6 +528,7 @@ CONF_NAME_FONT = "name_font"
 CONF_NBITS = "nbits"
 CONF_NEC = "nec"
 CONF_NETWORKS = "networks"
 CONF_NEVER = "never"
 CONF_NEW_PASSWORD = "new_password"
 CONF_NITROGEN_DIOXIDE = "nitrogen_dioxide"
 CONF_NOISE_LEVEL = "noise_level"
@@ -615,6 +617,7 @@ CONF_OTA = "ota"
 CONF_OUTDOOR_TEMPERATURE = "outdoor_temperature"
 CONF_OUTPUT = "output"
 CONF_OUTPUT_ID = "output_id"
 CONF_OUTPUT_SPEAKER = "output_speaker"
 CONF_OUTPUTS = "outputs"
 CONF_OVERSAMPLING = "oversampling"
 CONF_PACKAGES = "packages"
@@ -859,6 +862,7 @@ CONF_TARGET_TEMPERATURE_LOW = "target_temperature_low"
 CONF_TARGET_TEMPERATURE_LOW_COMMAND_TOPIC = "target_temperature_low_command_topic"
 CONF_TARGET_TEMPERATURE_LOW_STATE_TOPIC = "target_temperature_low_state_topic"
 CONF_TARGET_TEMPERATURE_STATE_TOPIC = "target_temperature_state_topic"
 CONF_TASK_STACK_IN_PSRAM = "task_stack_in_psram"
 CONF_TEMPERATURE = "temperature"
 CONF_TEMPERATURE_COMPENSATION = "temperature_compensation"
 CONF_TEMPERATURE_OFFSET = "temperature_offset"
--- a/esphome/core/init.py
+++ b/esphome/core/init.py
@@ -689,7 +689,7 @@ class EsphomeCore:
        _LOGGER.debug("Adding: %s", expression)
        return expression
-    def add_global(self, expression):
+    def add_global(self, expression, prepend=False):
        from esphome.cpp_generator import Expression, Statement, statement
        if isinstance(expression, Expression):
@@ -698,7 +698,10 @@ class EsphomeCore:
            raise ValueError(
                f"Add '{expression}' must be expression or statement, not {type(expression)}"
            )
-        self.global_statements.append(expression)
+        if prepend:
            self.global_statements.insert(0, expression)
        else:
            self.global_statements.append(expression)
        _LOGGER.debug("Adding global: %s", expression)
        return expression
--- a/esphome/core/config.py
+++ b/esphome/core/config.py
@@ -72,6 +72,9 @@ def validate_hostname(config):
 def valid_include(value):
    # Look for "<...>" includes
    if value.startswith("<") and value.endswith(">"):
        return value
    try:
        return cv.directory(value)
    except cv.Invalid:
@@ -360,7 +363,19 @@ async def to_code(config):
        CORE.add_job(add_arduino_global_workaround)
    if config[CONF_INCLUDES]:
-        CORE.add_job(add_includes, config[CONF_INCLUDES])
+        # Get the <...> includes
        system_includes = []
        other_includes = []
        for include in config[CONF_INCLUDES]:
            if include.startswith("<") and include.endswith(">"):
                system_includes.append(include)
            else:
                other_includes.append(include)
        # <...> includes should be at the start
        for include in system_includes:
            cg.add_global(cg.RawStatement(f"#include {include}"), prepend=True)
        # Other includes should be at the end
        CORE.add_job(add_includes, other_includes)
    if project_conf := config.get(CONF_PROJECT):
        cg.add_define("ESPHOME_PROJECT_NAME", project_conf[CONF_NAME])
--- a/esphome/core/defines.h
+++ b/esphome/core/defines.h
@@ -16,6 +16,8 @@
 // Feature flags
 #define USE_ALARM_CONTROL_PANEL
 #define USE_AUDIO_FLAC_SUPPORT
 #define USE_AUDIO_MP3_SUPPORT
 #define USE_API
 #define USE_API_NOISE
 #define USE_API_PLAINTEXT
--- a/esphome/core/gpio.h
+++ b/esphome/core/gpio.h
@@ -53,6 +53,13 @@ class GPIOPin {
  virtual void pin_mode(gpio::Flags flags) = 0;
  /**
   * @brief Retrieve GPIO pin flags.
   *
   * @return The GPIO flags describing the pin mode and properties.
   */
  virtual gpio::Flags get_flags() const = 0;
  virtual bool digital_read() = 0;
  virtual void digital_write(bool value) = 0;
--- a/esphome/cpp_generator.py
+++ b/esphome/cpp_generator.py
@@ -588,9 +588,9 @@ def add(expression: Union[Expression, Statement]):
    CORE.add(expression)
-def add_global(expression: Union[SafeExpType, Statement]):
+def add_global(expression: Union[SafeExpType, Statement], prepend: bool = False):
    """Add an expression to the codegen global storage (above setup())."""
-    CORE.add_global(expression)
+    CORE.add_global(expression, prepend)
 def add_library(name: str, version: Optional[str], repository: Optional[str] = None):
--- a/platformio.ini
+++ b/platformio.ini
@@ -127,7 +127,8 @@ lib_deps =
    ESPmDNS                              ; mdns (Arduino built-in)
    DNSServer                            ; captive_portal (Arduino built-in)
    esphome/ESP32-audioI2S@2.0.7         ; i2s_audio
-    droscy/esp_wireguard@0.4.2          ; wireguard
+    droscy/esp_wireguard@0.4.2           ; wireguard
    esphome/esp-audio-libs@1.1.1         ; audio
 build_flags =
    ${common:arduino.build_flags}
@@ -148,6 +149,7 @@ lib_deps =
    ${common:idf.lib_deps}
    droscy/esp_wireguard@0.4.2              ; wireguard
    kahrendt/ESPMicroSpeechFeatures@1.1.0   ; micro_wake_word
    esphome/esp-audio-libs@1.1.1            ; audio
 build_flags =
    ${common:idf.build_flags}
    -Wno-nonnull-compare
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,7 +14,7 @@ esptool==4.7.0
 click==8.1.7
 esphome-dashboard==20241217.1
 aioesphomeapi==24.6.2
-zeroconf==0.132.2
+zeroconf==0.143.0
 puremagic==1.27
 ruamel.yaml==0.18.6 # dashboard_import
 glyphsets==1.0.0
--- a/tests/components/a02yyuw/common.yaml
+++ b/tests/components/a02yyuw/common.yaml
@@ -0,0 +1,11 @@
 uart:
  - id: uart_a02yyuw
    tx_pin: ${tx_pin}
    rx_pin: ${rx_pin}
    baud_rate: 9600
 sensor:
  - platform: a02yyuw
    id: a02yyuw_sensor
    name: a02yyuw Distance
    uart_id: uart_a02yyuw
--- a/tests/components/a02yyuw/test.esp32-ard.yaml
+++ b/tests/components/a02yyuw/test.esp32-ard.yaml
@@ -1,13 +1,5 @@
-uart:
+substitutions:
-  - id: uart_a02yyuw
+  tx_pin: GPIO17
-    tx_pin:
+  rx_pin: GPIO16
      number: 17
    rx_pin:
      number: 16
    baud_rate: 9600
-sensor:
+<<: !include common.yaml
  - platform: a02yyuw
    id: a02yyuw_sensor
    name: a02yyuw Distance
    uart_id: uart_a02yyuw
--- a/tests/components/a02yyuw/test.esp32-c3-ard.yaml
+++ b/tests/components/a02yyuw/test.esp32-c3-ard.yaml
@@ -1,13 +1,5 @@
-uart:
+substitutions:
-  - id: uart_a02yyuw
+  tx_pin: GPIO4
-    tx_pin:
+  rx_pin: GPIO5
      number: 4
    rx_pin:
      number: 5
    baud_rate: 9600
-sensor:
+<<: !include common.yaml
  - platform: a02yyuw
    id: a02yyuw_sensor
    name: a02yyuw Distance
    uart_id: uart_a02yyuw
--- a/tests/components/a02yyuw/test.esp32-c3-idf.yaml
+++ b/tests/components/a02yyuw/test.esp32-c3-idf.yaml
@@ -1,13 +1,5 @@
-uart:
+substitutions:
-  - id: uart_a02yyuw
+  tx_pin: GPIO4
-    tx_pin:
+  rx_pin: GPIO5
      number: 4
    rx_pin:
      number: 5
    baud_rate: 9600
-sensor:
+<<: !include common.yaml
  - platform: a02yyuw
    id: a02yyuw_sensor
    name: a02yyuw Distance
    uart_id: uart_a02yyuw
--- a/tests/components/a02yyuw/test.esp32-idf.yaml
+++ b/tests/components/a02yyuw/test.esp32-idf.yaml
@@ -1,13 +1,5 @@
-uart:
+substitutions:
-  - id: uart_a02yyuw
+  tx_pin: GPIO17
-    tx_pin:
+  rx_pin: GPIO16
      number: 17
    rx_pin:
      number: 16
    baud_rate: 9600
-sensor:
+<<: !include common.yaml
  - platform: a02yyuw
    id: a02yyuw_sensor
    name: a02yyuw Distance
    uart_id: uart_a02yyuw
--- a/tests/components/a02yyuw/test.esp8266-ard.yaml
+++ b/tests/components/a02yyuw/test.esp8266-ard.yaml
@@ -1,13 +1,5 @@
-uart:
+substitutions:
-  - id: uart_a02yyuw
+  tx_pin: GPIO4
-    tx_pin:
+  rx_pin: GPIO5
      number: 4
    rx_pin:
      number: 5
    baud_rate: 9600
-sensor:
+<<: !include common.yaml
  - platform: a02yyuw
    id: a02yyuw_sensor
    name: a02yyuw Distance
    uart_id: uart_a02yyuw
--- a/tests/components/a02yyuw/test.rp2040-ard.yaml
+++ b/tests/components/a02yyuw/test.rp2040-ard.yaml
@@ -1,13 +1,5 @@
-uart:
+substitutions:
-  - id: uart_a02yyuw
+  tx_pin: GPIO4
-    tx_pin:
+  rx_pin: GPIO5
      number: 4
    rx_pin:
      number: 5
    baud_rate: 9600
-sensor:
+<<: !include common.yaml
  - platform: a02yyuw
    id: a02yyuw_sensor
    name: a02yyuw Distance
    uart_id: uart_a02yyuw
--- a/tests/components/a4988/common.yaml
+++ b/tests/components/a4988/common.yaml
@@ -0,0 +1,9 @@
 stepper:
  - platform: a4988
    id: a4988_stepper
    step_pin: ${step_pin}
    dir_pin: ${dir_pin}
    sleep_pin: ${sleep_pin}
    max_speed: 250 steps/s
    acceleration: 100 steps/s^2
    deceleration: 200 steps/s^2
--- a/tests/components/a4988/test.esp32-ard.yaml
+++ b/tests/components/a4988/test.esp32-ard.yaml
@@ -1,12 +1,6 @@
-stepper:
+substitutions:
-  - platform: a4988
+  step_pin: GPIO22
-    id: a4988_stepper
+  dir_pin: GPIO23
-    step_pin:
+  sleep_pin: GPIO25
-      number: 22
+
-    dir_pin:
+<<: !include common.yaml
      number: 23
    sleep_pin:
      number: 25
    max_speed: 250 steps/s
    acceleration: 100 steps/s^2
    deceleration: 200 steps/s^2
--- a/tests/components/a4988/test.esp32-c3-ard.yaml
+++ b/tests/components/a4988/test.esp32-c3-ard.yaml
@@ -1,12 +1,6 @@
-stepper:
+substitutions:
-  - platform: a4988
+  step_pin: GPIO2
-    id: a4988_stepper
+  dir_pin: GPIO3
-    step_pin:
+  sleep_pin: GPIO5
-      number: 2
+
-    dir_pin:
+<<: !include common.yaml
      number: 3
    sleep_pin:
      number: 5
    max_speed: 250 steps/s
    acceleration: 100 steps/s^2
    deceleration: 200 steps/s^2
--- a/tests/components/a4988/test.esp32-c3-idf.yaml
+++ b/tests/components/a4988/test.esp32-c3-idf.yaml
@@ -1,12 +1,6 @@
-stepper:
+substitutions:
-  - platform: a4988
+  step_pin: GPIO2
-    id: a4988_stepper
+  dir_pin: GPIO3
-    step_pin:
+  sleep_pin: GPIO5
-      number: 2
+
-    dir_pin:
+<<: !include common.yaml
      number: 3
    sleep_pin:
      number: 5
    max_speed: 250 steps/s
    acceleration: 100 steps/s^2
    deceleration: 200 steps/s^2
--- a/tests/components/a4988/test.esp32-idf.yaml
+++ b/tests/components/a4988/test.esp32-idf.yaml
@@ -1,12 +1,6 @@
-stepper:
+substitutions:
-  - platform: a4988
+  step_pin: GPIO22
-    id: a4988_stepper
+  dir_pin: GPIO23
-    step_pin:
+  sleep_pin: GPIO25
-      number: 22
+
-    dir_pin:
+<<: !include common.yaml
      number: 23
    sleep_pin:
      number: 25
    max_speed: 250 steps/s
    acceleration: 100 steps/s^2
    deceleration: 200 steps/s^2
--- a/tests/components/a4988/test.esp8266-ard.yaml
+++ b/tests/components/a4988/test.esp8266-ard.yaml
@@ -1,12 +1,6 @@
-stepper:
+substitutions:
-  - platform: a4988
+  step_pin: GPIO1
-    id: a4988_stepper
+  dir_pin: GPIO2
-    step_pin:
+  sleep_pin: GPIO5
-      number: 1
+
-    dir_pin:
+<<: !include common.yaml
      number: 2
    sleep_pin:
      number: 5
    max_speed: 250 steps/s
    acceleration: 100 steps/s^2
    deceleration: 200 steps/s^2
--- a/tests/components/a4988/test.rp2040-ard.yaml
+++ b/tests/components/a4988/test.rp2040-ard.yaml
@@ -1,12 +1,6 @@
-stepper:
+substitutions:
-  - platform: a4988
+  step_pin: GPIO2
-    id: a4988_stepper
+  dir_pin: GPIO3
-    step_pin:
+  sleep_pin: GPIO5
-      number: 2
+
-    dir_pin:
+<<: !include common.yaml
      number: 3
    sleep_pin:
      number: 5
    max_speed: 250 steps/s
    acceleration: 100 steps/s^2
    deceleration: 200 steps/s^2
--- a/tests/components/ac_dimmer/common.yaml
+++ b/tests/components/ac_dimmer/common.yaml
@@ -0,0 +1,5 @@
 output:
  - platform: ac_dimmer
    id: ac_dimmer_1
    gate_pin: ${gate_pin}
    zero_cross_pin: ${zero_cross_pin}
--- a/tests/components/ac_dimmer/test.esp32-ard.yaml
+++ b/tests/components/ac_dimmer/test.esp32-ard.yaml
@@ -1,7 +1,5 @@
-output:
+substitutions:
-  - platform: ac_dimmer
+  gate_pin: GPIO18
-    id: ac_dimmer_1
+  zero_cross_pin: GPIO19
-    gate_pin:
+
-      number: 12
+<<: !include common.yaml
    zero_cross_pin:
      number: 13
--- a/tests/components/ac_dimmer/test.esp32-c3-ard.yaml
+++ b/tests/components/ac_dimmer/test.esp32-c3-ard.yaml
@@ -1,7 +1,5 @@
-output:
+substitutions:
-  - platform: ac_dimmer
+  gate_pin: GPIO5
-    id: ac_dimmer_1
+  zero_cross_pin: GPIO4
-    gate_pin:
+
-      number: 5
+<<: !include common.yaml
    zero_cross_pin:
      number: 6
--- a/tests/components/ac_dimmer/test.esp8266-ard.yaml
+++ b/tests/components/ac_dimmer/test.esp8266-ard.yaml
@@ -1,7 +1,5 @@
-output:
+substitutions:
-  - platform: ac_dimmer
+  gate_pin: GPIO5
-    id: ac_dimmer_1
+  zero_cross_pin: GPIO4
-    gate_pin:
+
-      number: 5
+<<: !include common.yaml
    zero_cross_pin:
      number: 4
--- a/tests/components/ac_dimmer/test.rp2040-ard.yaml
+++ b/tests/components/ac_dimmer/test.rp2040-ard.yaml
@@ -1,7 +1,5 @@
-output:
+substitutions:
-  - platform: ac_dimmer
+  gate_pin: GPIO5
-    id: ac_dimmer_1
+  zero_cross_pin: GPIO4
-    gate_pin:
+
-      number: 5
+<<: !include common.yaml
    zero_cross_pin:
      number: 6
--- a/tests/components/adc/test.esp32-c3-ard.yaml
+++ b/tests/components/adc/test.esp32-c3-ard.yaml
@@ -2,4 +2,4 @@ sensor:
  - platform: adc
    id: my_sensor
    pin: 4
-    attenuation: 11db
+    attenuation: 12db
--- a/tests/components/adc/test.esp32-s2-ard.yaml
+++ b/tests/components/adc/test.esp32-s2-ard.yaml
@@ -2,4 +2,4 @@ sensor:
  - platform: adc
    id: my_sensor
    pin: 1
-    attenuation: 11db
+    attenuation: 12db
--- a/tests/components/adc/test.esp32-s3-ard.yaml
+++ b/tests/components/adc/test.esp32-s3-ard.yaml
@@ -2,4 +2,4 @@ sensor:
  - platform: adc
    id: my_sensor
    pin: 1
-    attenuation: 11db
+    attenuation: 12db
--- a/tests/components/adc128s102/common.yaml
+++ b/tests/components/adc128s102/common.yaml
@@ -0,0 +1,14 @@
 spi:
  - id: spi_adc128s102
    clk_pin: ${clk_pin}
    mosi_pin: ${mosi_pin}
    miso_pin: ${miso_pin}
 adc128s102:
  cs_pin: ${cs_pin}
  id: adc128s102_adc
 sensor:
  - platform: adc128s102
    id: adc128s102_channel_0
    channel: 0
--- a/tests/components/adc128s102/test.esp32-ard.yaml
+++ b/tests/components/adc128s102/test.esp32-ard.yaml
@@ -1,14 +1,7 @@
-spi:
+substitutions:
-  - id: spi_adc128s102
+  clk_pin: GPIO16
-    clk_pin: 16
+  mosi_pin: GPIO17
-    mosi_pin: 17
+  miso_pin: GPIO15
-    miso_pin: 15
+  cs_pin: GPIO12
-adc128s102:
+<<: !include common.yaml
  cs_pin: 12
  id: adc128s102_adc
 sensor:
  - platform: adc128s102
    id: adc128s102_channel_0
    channel: 0
--- a/tests/components/adc128s102/test.esp32-c3-ard.yaml
+++ b/tests/components/adc128s102/test.esp32-c3-ard.yaml
@@ -1,14 +1,7 @@
-spi:
+substitutions:
-  - id: spi_adc128s102
+  clk_pin: GPIO6
-    clk_pin: 6
+  mosi_pin: GPIO7
-    mosi_pin: 7
+  miso_pin: GPIO5
-    miso_pin: 5
+  cs_pin: GPIO2
-adc128s102:
+<<: !include common.yaml
  cs_pin: 8
  id: adc128s102_adc
 sensor:
  - platform: adc128s102
    id: adc128s102_channel_0
    channel: 0
--- a/tests/components/adc128s102/test.esp32-c3-idf.yaml
+++ b/tests/components/adc128s102/test.esp32-c3-idf.yaml
@@ -1,14 +1,7 @@
-spi:
+substitutions:
-  - id: spi_adc128s102
+  clk_pin: GPIO6
-    clk_pin: 6
+  mosi_pin: GPIO7
-    mosi_pin: 7
+  miso_pin: GPIO5
-    miso_pin: 5
+  cs_pin: GPIO2
-adc128s102:
+<<: !include common.yaml
  cs_pin: 8
  id: adc128s102_adc
 sensor:
  - platform: adc128s102
    id: adc128s102_channel_0
    channel: 0
--- a/tests/components/adc128s102/test.esp32-idf.yaml
+++ b/tests/components/adc128s102/test.esp32-idf.yaml
@@ -1,14 +1,7 @@
-spi:
+substitutions:
-  - id: spi_adc128s102
+  clk_pin: GPIO16
-    clk_pin: 16
+  mosi_pin: GPIO17
-    mosi_pin: 17
+  miso_pin: GPIO15
-    miso_pin: 15
+  cs_pin: GPIO12
-adc128s102:
+<<: !include common.yaml
  cs_pin: 12
  id: adc128s102_adc
 sensor:
  - platform: adc128s102
    id: adc128s102_channel_0
    channel: 0
--- a/tests/components/adc128s102/test.esp8266-ard.yaml
+++ b/tests/components/adc128s102/test.esp8266-ard.yaml
@@ -1,14 +1,7 @@
-spi:
+substitutions:
-  - id: spi_adc128s102
+  clk_pin: GPIO14
-    clk_pin: 14
+  mosi_pin: GPIO13
-    mosi_pin: 13
+  miso_pin: GPIO12
-    miso_pin: 12
+  cs_pin: GPIO15
-adc128s102:
+<<: !include common.yaml
  cs_pin: 15
  id: adc128s102_adc
 sensor:
  - platform: adc128s102
    id: adc128s102_channel_0
    channel: 0
--- a/tests/components/adc128s102/test.rp2040-ard.yaml
+++ b/tests/components/adc128s102/test.rp2040-ard.yaml
@@ -1,14 +1,7 @@
-spi:
+substitutions:
-  - id: spi_adc128s102
+  clk_pin: GPIO2
-    clk_pin: 2
+  mosi_pin: GPIO3
-    mosi_pin: 3
+  miso_pin: GPIO4
-    miso_pin: 4
+  cs_pin: GPIO5
-adc128s102:
+<<: !include common.yaml
  cs_pin: 5
  id: adc128s102_adc
 sensor:
  - platform: adc128s102
    id: adc128s102_channel_0
    channel: 0
--- a/tests/components/addressable_light/common-ard-esp32_rmt_led_strip.yaml
+++ b/tests/components/addressable_light/common-ard-esp32_rmt_led_strip.yaml
@@ -5,7 +5,7 @@ light:
    chipset: ws2812
    rgb_order: GRB
    num_leds: 256
-    pin: 2
+    pin: ${pin}
    rmt_channel: 0
 display:
--- a/tests/components/addressable_light/common-ard-fastled.yaml
+++ b/tests/components/addressable_light/common-ard-fastled.yaml
@@ -3,7 +3,7 @@ light:
    id: led_matrix_32x8
    name: led_matrix_32x8
    chipset: WS2812B
-    pin: 2
+    pin: ${pin}
    num_leds: 256
    rgb_order: GRB
    default_transition_length: 0s
--- a/tests/components/addressable_light/common-idf-esp32_rmt_led_strip.yaml
+++ b/tests/components/addressable_light/common-idf-esp32_rmt_led_strip.yaml
@@ -5,7 +5,7 @@ light:
    chipset: ws2812
    rgb_order: GRB
    num_leds: 256
-    pin: 2
+    pin: ${pin}
 display:
  - platform: addressable_light
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1,3 @@`
							`"""ESPHome XXTEA encryption component."""`

							`CODEOWNERS = ["@clydebarrow"]`