1
0
mirror of https://github.com/esphome/esphome.git synced 2025-09-28 16:12:24 +01:00

[i2s_audio, microphone, micro_wake_word, voice_assistant] Use microphone source to process incoming audio (#8645)

Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
Kevin Ahrendt
2025-04-29 17:27:03 -05:00
committed by GitHub
parent 0fe6c65ba3
commit 9f629dcaa2
15 changed files with 166 additions and 98 deletions

View File

@@ -88,7 +88,14 @@ CONFIG_SCHEMA = cv.All(
cv.Schema(
{
cv.GenerateID(): cv.declare_id(VoiceAssistant),
cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone),
cv.Optional(
CONF_MICROPHONE, default={}
): microphone.microphone_source_schema(
min_bits_per_sample=16,
max_bits_per_sample=16,
min_channels=1,
max_channels=1,
),
cv.Exclusive(CONF_SPEAKER, "output"): cv.use_id(speaker.Speaker),
cv.Exclusive(CONF_MEDIA_PLAYER, "output"): cv.use_id(
media_player.MediaPlayer
@@ -163,13 +170,26 @@ CONFIG_SCHEMA = cv.All(
tts_stream_validate,
)
FINAL_VALIDATE_SCHEMA = cv.All(
cv.Schema(
{
cv.Optional(
CONF_MICROPHONE
): microphone.final_validate_microphone_source_schema(
"voice_assistant", sample_rate=16000
),
},
extra=cv.ALLOW_EXTRA,
),
)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
mic = await cg.get_variable(config[CONF_MICROPHONE])
cg.add(var.set_microphone(mic))
mic_source = await microphone.microphone_source_to_code(config[CONF_MICROPHONE])
cg.add(var.set_microphone_source(mic_source))
if CONF_SPEAKER in config:
spkr = await cg.get_variable(config[CONF_SPEAKER])

View File

@@ -29,10 +29,10 @@ static const size_t SPEAKER_BUFFER_SIZE = 16 * RECEIVE_SIZE;
VoiceAssistant::VoiceAssistant() { global_voice_assistant = this; }
void VoiceAssistant::setup() {
this->mic_->add_data_callback([this](const std::vector<int16_t> &data) {
this->mic_source_->add_data_callback([this](const std::vector<uint8_t> &data) {
std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_;
if (this->ring_buffer_.use_count() > 1) {
temp_ring_buffer->write((void *) data.data(), data.size() * sizeof(int16_t));
temp_ring_buffer->write((void *) data.data(), data.size());
}
});
}
@@ -162,7 +162,7 @@ void VoiceAssistant::reset_conversation_id() {
void VoiceAssistant::loop() {
if (this->api_client_ == nullptr && this->state_ != State::IDLE && this->state_ != State::STOP_MICROPHONE &&
this->state_ != State::STOPPING_MICROPHONE) {
if (this->mic_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
if (this->mic_source_->is_running() || this->state_ == State::STARTING_MICROPHONE) {
this->set_state_(State::STOP_MICROPHONE, State::IDLE);
} else {
this->set_state_(State::IDLE, State::IDLE);
@@ -193,12 +193,12 @@ void VoiceAssistant::loop() {
}
this->clear_buffers_();
this->mic_->start();
this->mic_source_->start();
this->set_state_(State::STARTING_MICROPHONE);
break;
}
case State::STARTING_MICROPHONE: {
if (this->mic_->is_running()) {
if (this->mic_source_->is_running()) {
this->set_state_(this->desired_state_);
}
break;
@@ -262,8 +262,8 @@ void VoiceAssistant::loop() {
break;
}
case State::STOP_MICROPHONE: {
if (this->mic_->is_running()) {
this->mic_->stop();
if (this->mic_source_->is_running()) {
this->mic_source_->stop();
this->set_state_(State::STOPPING_MICROPHONE);
} else {
this->set_state_(this->desired_state_);
@@ -271,7 +271,7 @@ void VoiceAssistant::loop() {
break;
}
case State::STOPPING_MICROPHONE: {
if (this->mic_->is_stopped()) {
if (this->mic_source_->is_stopped()) {
this->set_state_(this->desired_state_);
}
break;
@@ -478,7 +478,7 @@ void VoiceAssistant::start_streaming() {
ESP_LOGD(TAG, "Client started, streaming microphone");
this->audio_mode_ = AUDIO_MODE_API;
if (this->mic_->is_running()) {
if (this->mic_source_->is_running()) {
this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
} else {
this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);
@@ -508,7 +508,7 @@ void VoiceAssistant::start_streaming(struct sockaddr_storage *addr, uint16_t por
return;
}
if (this->mic_->is_running()) {
if (this->mic_source_->is_running()) {
this->set_state_(State::STREAMING_MICROPHONE, State::STREAMING_MICROPHONE);
} else {
this->set_state_(State::START_MICROPHONE, State::STREAMING_MICROPHONE);

View File

@@ -11,7 +11,7 @@
#include "esphome/components/api/api_connection.h"
#include "esphome/components/api/api_pb2.h"
#include "esphome/components/microphone/microphone.h"
#include "esphome/components/microphone/microphone_source.h"
#ifdef USE_SPEAKER
#include "esphome/components/speaker/speaker.h"
#endif
@@ -98,7 +98,7 @@ class VoiceAssistant : public Component {
void start_streaming(struct sockaddr_storage *addr, uint16_t port);
void failed_to_start();
void set_microphone(microphone::Microphone *mic) { this->mic_ = mic; }
void set_microphone_source(microphone::MicrophoneSource *mic_source) { this->mic_source_ = mic_source; }
#ifdef USE_SPEAKER
void set_speaker(speaker::Speaker *speaker) {
this->speaker_ = speaker;
@@ -249,7 +249,7 @@ class VoiceAssistant : public Component {
bool has_timers_{false};
bool timer_tick_running_{false};
microphone::Microphone *mic_{nullptr};
microphone::MicrophoneSource *mic_source_{nullptr};
#ifdef USE_SPEAKER
void write_speaker_();
speaker::Speaker *speaker_{nullptr};