From 2fc4e8827131f3199a2e15c64201eed1312d0688 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Wed, 11 Oct 2023 20:51:29 -0500 Subject: [PATCH] Small fixes for voice assistant (#5513) Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com> --- .../i2s_audio/speaker/i2s_audio_speaker.cpp | 26 +++++++++----- .../voice_assistant/voice_assistant.cpp | 36 ++++++++++++++----- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp index a0934e3844..592a27b739 100644 --- a/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp +++ b/esphome/components/i2s_audio/speaker/i2s_audio_speaker.cpp @@ -11,7 +11,7 @@ namespace esphome { namespace i2s_audio { -static const size_t BUFFER_COUNT = 10; +static const size_t BUFFER_COUNT = 20; static const char *const TAG = "i2s_audio.speaker"; @@ -19,7 +19,7 @@ void I2SAudioSpeaker::setup() { ESP_LOGCONFIG(TAG, "Setting up I2S Audio Speaker..."); this->buffer_queue_ = xQueueCreate(BUFFER_COUNT, sizeof(DataEvent)); - this->event_queue_ = xQueueCreate(20, sizeof(TaskEvent)); + this->event_queue_ = xQueueCreate(BUFFER_COUNT, sizeof(TaskEvent)); } void I2SAudioSpeaker::start() { this->state_ = speaker::STATE_STARTING; } @@ -47,7 +47,7 @@ void I2SAudioSpeaker::player_task(void *params) { .communication_format = I2S_COMM_FORMAT_STAND_I2S, .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1, .dma_buf_count = 8, - .dma_buf_len = 1024, + .dma_buf_len = 128, .use_apll = false, .tx_desc_auto_clear = true, .fixed_mclk = I2S_PIN_NO_CHANGE, @@ -60,7 +60,17 @@ void I2SAudioSpeaker::player_task(void *params) { } #endif - i2s_driver_install(this_speaker->parent_->get_port(), &config, 0, nullptr); + esp_err_t err = i2s_driver_install(this_speaker->parent_->get_port(), &config, 0, nullptr); + if (err != ESP_OK) { + event.type = TaskEventType::WARNING; + event.err = err; + xQueueSend(this_speaker->event_queue_, &event, 0); + event.type = TaskEventType::STOPPED; + xQueueSend(this_speaker->event_queue_, &event, 0); + while (true) { + delay(10); + } + } #if SOC_I2S_SUPPORTS_DAC if (this_speaker->internal_dac_mode_ == I2S_DAC_CHANNEL_DISABLE) { @@ -88,9 +98,7 @@ void I2SAudioSpeaker::player_task(void *params) { } if (data_event.stop) { // Stop signal from main thread - while (xQueueReceive(this_speaker->buffer_queue_, &data_event, 0) == pdTRUE) { - // Flush queue - } + xQueueReset(this_speaker->buffer_queue_); // Flush queue break; } size_t bytes_written; @@ -103,7 +111,7 @@ void I2SAudioSpeaker::player_task(void *params) { uint32_t sample = (buffer[current] << 16) | (buffer[current] & 0xFFFF); esp_err_t err = i2s_write(this_speaker->parent_->get_port(), &sample, sizeof(sample), &bytes_written, - (100 / portTICK_PERIOD_MS)); + (10 / portTICK_PERIOD_MS)); if (err != ESP_OK) { event = {.type = TaskEventType::WARNING, .err = err}; xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); @@ -122,7 +130,6 @@ void I2SAudioSpeaker::player_task(void *params) { event.type = TaskEventType::STOPPING; xQueueSend(this_speaker->event_queue_, &event, portMAX_DELAY); - i2s_stop(this_speaker->parent_->get_port()); i2s_driver_uninstall(this_speaker->parent_->get_port()); event.type = TaskEventType::STOPPED; @@ -162,6 +169,7 @@ void I2SAudioSpeaker::watch_() { vTaskDelete(this->player_task_handle_); this->player_task_handle_ = nullptr; this->parent_->unlock(); + xQueueReset(this->buffer_queue_); break; case TaskEventType::WARNING: ESP_LOGW(TAG, "Error writing to I2S: %s", esp_err_to_name(event.err)); diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index 802ae508ff..448df61d80 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -188,6 +188,9 @@ void VoiceAssistant::loop() { } else { ESP_LOGD(TAG, "VAD detected speech"); this->set_state_(State::START_PIPELINE, State::STREAMING_MICROPHONE); + + // Reset for next time + this->vad_counter_ = 0; } } else { if (this->vad_counter_ > 0) { @@ -270,13 +273,18 @@ void VoiceAssistant::loop() { this->speaker_buffer_size_ += len; } } else { - ESP_LOGW(TAG, "Speaker buffer full."); + ESP_LOGW(TAG, "Receive buffer full."); } if (this->speaker_buffer_size_ > 0) { size_t written = this->speaker_->play(this->speaker_buffer_, this->speaker_buffer_size_); - memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written); - this->speaker_buffer_size_ -= written; - this->speaker_buffer_index_ -= written; + if (written > 0) { + memmove(this->speaker_buffer_, this->speaker_buffer_ + written, this->speaker_buffer_size_ - written); + this->speaker_buffer_size_ -= written; + this->speaker_buffer_index_ -= written; + this->set_timeout("speaker-timeout", 1000, [this]() { this->speaker_->stop(); }); + } else { + ESP_LOGW(TAG, "Speaker buffer full."); + } } playing = this->speaker_->is_running(); } @@ -287,7 +295,10 @@ void VoiceAssistant::loop() { } #endif if (playing) { - this->set_timeout("playing", 100, [this]() { this->set_state_(State::IDLE, State::IDLE); }); + this->set_timeout("playing", 100, [this]() { + this->cancel_timeout("speaker-timeout"); + this->set_state_(State::IDLE, State::IDLE); + }); } break; } @@ -483,8 +494,17 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { } case api::enums::VOICE_ASSISTANT_RUN_END: { ESP_LOGD(TAG, "Assist Pipeline ended"); - if (this->state_ != State::STREAMING_RESPONSE && this->state_ != State::IDLE) { - this->set_state_(State::IDLE, State::IDLE); + if (this->state_ == State::STREAMING_MICROPHONE) { +#ifdef USE_ESP_ADF + if (this->use_wake_word_) { + rb_reset(this->ring_buffer_); + // No need to stop the microphone since we didn't use the speaker + this->set_state_(State::WAIT_FOR_VAD, State::WAITING_FOR_VAD); + } else +#endif + { + this->set_state_(State::IDLE, State::IDLE); + } } this->end_trigger_->trigger(); break; @@ -500,7 +520,7 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { } } if (code == "wake-word-timeout" || code == "wake_word_detection_aborted") { - this->set_state_(State::STOP_MICROPHONE, State::IDLE); + // Don't change state here since either the "tts-end" or "run-end" events will do it. return; } ESP_LOGE(TAG, "Error: %s - %s", code.c_str(), message.c_str());