[i2s_audio, microphone, micro_wake_word, voice_assistant] Use microphone source to process incoming audio (#8645)

Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
2025-09-28 16:12:24 +01:00 · 2025-04-29 17:27:03 -05:00
parent 0fe6c65ba3
commit 9f629dcaa2
15 changed files with 166 additions and 98 deletions
--- a/esphome/components/voice_assistant/init.py
+++ b/esphome/components/voice_assistant/init.py
@@ -88,7 +88,14 @@ CONFIG_SCHEMA = cv.All(
    cv.Schema(
        {
            cv.GenerateID(): cv.declare_id(VoiceAssistant),
-            cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone),
+            cv.Optional(
+                CONF_MICROPHONE, default={}
+            ): microphone.microphone_source_schema(
+                min_bits_per_sample=16,
+                max_bits_per_sample=16,
+                min_channels=1,
+                max_channels=1,
+            ),
            cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
            cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
                media_player.MediaPlayer
@@ -163,13 +170,26 @@ CONFIG_SCHEMA = cv.All(
    tts_stream_validate,
 )

+FINAL_VALIDATE_SCHEMA = cv.All(
+    cv.Schema(
+        {
+            cv.Optional(
+                CONF_MICROPHONE
+            ): microphone.final_validate_microphone_source_schema(
+                "voice_assistant", sample_rate=16000
+            ),
+        },
+        extra=cv.ALLOW_EXTRA,
+    ),
+)
+

 async def to_code(config):
    var = cg.new_Pvariable(config[CONF_ID])
    await cg.register_component(var, config)

-    mic = await cg.get_variable(config[CONF_MICROPHONE])
-    cg.add(var.set_microphone(mic))
+    mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE])
+    cg.add(var.set_microphone_source(mic_source))

    if CONF_SPEAKER in config:
        spkr = await cg.get_variable(config[CONF_SPEAKER])
--- a/esphome/components/voice_assistant/voice_assistant.cpp
+++ b/esphome/components/voice_assistant/voice_assistant.cpp
@@ -29,10 +29,10 @@ static const size_t SPEAKER_BUFFER_SIZE = 16 * RECEIVE_SIZE;
 VoiceAssistant::VoiceAssistant() { global_voice_assistant = this; }

 void VoiceAssistant::setup() {
-  this->mic_->add_data_callback([this](const std::vector<int16_t> &data) {
+  this->mic_source_->add_data_callback([this](const std::vector<uint8_t> &data) {
    std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_;
    if (this->ring_buffer_.use_count() > 1) {
-      temp_ring_buffer->write((void *) data.data(), data.size() * sizeof(int16_t));
+      temp_ring_buffer->write((void *) data.data(), data.size());
    }
  });
 }
@@ -162,7 +162,7 @@ void VoiceAssistant::reset_conversation_id() {
 void VoiceAssistant::loop() {
  if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&
      this->state_ != State::STOPPING_MICROPHONE) {
-    if (this->mic_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
+    if (this->mic_source_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
      this->set_state_(State::STOP_MICROPHONE, State::IDLE);
    } else {
      this->set_state_(State::IDLE, State::IDLE);
@@ -193,12 +193,12 @@ void VoiceAssistant::loop() {
      }
      this->clear_buffers_();

-      this->mic_->start();
+      this->mic_source_->start();
      this->set_state_(State::STARTING_MICROPHONE);
      break;
    }
    case State::STARTING_MICROPHONE: {
-      if (this->mic_->is_running()) {
+      if (this->mic_source_->is_running()) {
        this->set_state_(this->desired_state_);
      }
      break;
@@ -262,8 +262,8 @@ void VoiceAssistant::loop() {
      break;
    }
    case State::STOP_MICROPHONE: {
-      if (this->mic_->is_running()) {
-        this->mic_->stop();
+      if (this->mic_source_->is_running()) {
+        this->mic_source_->stop();
        this->set_state_(State::STOPPING_MICROPHONE);
      } else {
        this->set_state_(this->desired_state_);
@@ -271,7 +271,7 @@ void VoiceAssistant::loop() {
      break;
    }
    case State::STOPPING_MICROPHONE: {
-      if (this->mic_->is_stopped()) {
+      if (this->mic_source_->is_stopped()) {
        this->set_state_(this->desired_state_);
      }
      break;
@@ -478,7 +478,7 @@ void VoiceAssistant::start_streaming() {
  ESP_LOGD(TAG, "Client started, streaming microphone");
  this->audio_mode_ = AUDIO_MODE_API;

-  if (this->mic_->is_running()) {
+  if (this->mic_source_->is_running()) {
    this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
  } else {
    this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
@@ -508,7 +508,7 @@ void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t por
    return;
  }

-  if (this->mic_->is_running()) {
+  if (this->mic_source_->is_running()) {
    this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
  } else {
    this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
--- a/esphome/components/voice_assistant/voice_assistant.h
+++ b/esphome/components/voice_assistant/voice_assistant.h
@@ -11,7 +11,7 @@

 #include "esphome/components/api/api_connection.h"
 #include "esphome/components/api/api_pb2.h"
-#include "esphome/components/microphone/microphone.h"
+#include "esphome/components/microphone/microphone_source.h"
 #ifdef USE_SPEAKER
 #include "esphome/components/speaker/speaker.h"
 #endif
@@ -98,7 +98,7 @@ class VoiceAssistant : public Component {
  void start_streaming(struct sockaddr_storage *addr, uint16_t port);
  void failed_to_start();

-  void set_microphone(microphone::Microphone *mic) { this->mic_ = mic; }
+  void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
 #ifdef USE_SPEAKER
  void set_speaker(speaker::Speaker *speaker) {
    this->speaker_ = speaker;
@@ -249,7 +249,7 @@ class VoiceAssistant : public Component {
  bool has_timers_{false};
  bool timer_tick_running_{false};

-  microphone::Microphone *mic_{nullptr};
+  microphone::MicrophoneSource *mic_source_{nullptr};
 #ifdef USE_SPEAKER
  void write_speaker_();
  speaker::Speaker *speaker_{nullptr};