diff --git a/esphome/components/api/api.proto b/esphome/components/api/api.proto index ad6fc79cf3..1c40e8014e 100644 --- a/esphome/components/api/api.proto +++ b/esphome/components/api/api.proto @@ -1553,6 +1553,23 @@ message VoiceAssistantTimerEventResponse { bool is_active = 6; } +message VoiceAssistantAnnounceRequest { + option (id) = 119; + option (source) = SOURCE_CLIENT; + option (ifdef) = "USE_VOICE_ASSISTANT"; + + string media_id = 1; + string text = 2; +} + +message VoiceAssistantAnnounceFinished { + option (id) = 120; + option (source) = SOURCE_SERVER; + option (ifdef) = "USE_VOICE_ASSISTANT"; + + bool success = 1; +} + // ==================== ALARM CONTROL PANEL ==================== enum AlarmControlPanelState { ALARM_STATE_DISARMED = 0; diff --git a/esphome/components/api/api_connection.cpp b/esphome/components/api/api_connection.cpp index a655d06e66..6b7051a704 100644 --- a/esphome/components/api/api_connection.cpp +++ b/esphome/components/api/api_connection.cpp @@ -1213,6 +1213,16 @@ void APIConnection::on_voice_assistant_timer_event_response(const VoiceAssistant } }; +void APIConnection::on_voice_assistant_announce_request(const VoiceAssistantAnnounceRequest &msg) { + if (voice_assistant::global_voice_assistant != nullptr) { + if (voice_assistant::global_voice_assistant->get_api_connection() != this) { + return; + } + + voice_assistant::global_voice_assistant->on_announce(msg); + } +} + #endif #ifdef USE_ALARM_CONTROL_PANEL diff --git a/esphome/components/api/api_connection.h b/esphome/components/api/api_connection.h index 714e806470..e8d66a5e07 100644 --- a/esphome/components/api/api_connection.h +++ b/esphome/components/api/api_connection.h @@ -151,6 +151,7 @@ class APIConnection : public APIServerConnection { void on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) override; void on_voice_assistant_audio(const VoiceAssistantAudio &msg) override; void on_voice_assistant_timer_event_response(const VoiceAssistantTimerEventResponse &msg) override; + void on_voice_assistant_announce_request(const VoiceAssistantAnnounceRequest &msg) override; #endif #ifdef USE_ALARM_CONTROL_PANEL diff --git a/esphome/components/api/api_pb2.cpp b/esphome/components/api/api_pb2.cpp index c944d0dae8..2a1552d6fc 100644 --- a/esphome/components/api/api_pb2.cpp +++ b/esphome/components/api/api_pb2.cpp @@ -7061,6 +7061,59 @@ void VoiceAssistantTimerEventResponse::dump_to(std::string &out) const { out.append("}"); } #endif +bool VoiceAssistantAnnounceRequest::decode_length(uint32_t field_id, ProtoLengthDelimited value) { + switch (field_id) { + case 1: { + this->media_id = value.as_string(); + return true; + } + case 2: { + this->text = value.as_string(); + return true; + } + default: + return false; + } +} +void VoiceAssistantAnnounceRequest::encode(ProtoWriteBuffer buffer) const { + buffer.encode_string(1, this->media_id); + buffer.encode_string(2, this->text); +} +#ifdef HAS_PROTO_MESSAGE_DUMP +void VoiceAssistantAnnounceRequest::dump_to(std::string &out) const { + __attribute__((unused)) char buffer[64]; + out.append("VoiceAssistantAnnounceRequest {\n"); + out.append(" media_id: "); + out.append("'").append(this->media_id).append("'"); + out.append("\n"); + + out.append(" text: "); + out.append("'").append(this->text).append("'"); + out.append("\n"); + out.append("}"); +} +#endif +bool VoiceAssistantAnnounceFinished::decode_varint(uint32_t field_id, ProtoVarInt value) { + switch (field_id) { + case 1: { + this->success = value.as_bool(); + return true; + } + default: + return false; + } +} +void VoiceAssistantAnnounceFinished::encode(ProtoWriteBuffer buffer) const { buffer.encode_bool(1, this->success); } +#ifdef HAS_PROTO_MESSAGE_DUMP +void VoiceAssistantAnnounceFinished::dump_to(std::string &out) const { + __attribute__((unused)) char buffer[64]; + out.append("VoiceAssistantAnnounceFinished {\n"); + out.append(" success: "); + out.append(YESNO(this->success)); + out.append("\n"); + out.append("}"); +} +#endif bool ListEntitiesAlarmControlPanelResponse::decode_varint(uint32_t field_id, ProtoVarInt value) { switch (field_id) { case 6: { diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h index 3f609c793c..6fab1f57e0 100644 --- a/esphome/components/api/api_pb2.h +++ b/esphome/components/api/api_pb2.h @@ -1825,6 +1825,29 @@ class VoiceAssistantTimerEventResponse : public ProtoMessage { bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override; bool decode_varint(uint32_t field_id, ProtoVarInt value) override; }; +class VoiceAssistantAnnounceRequest : public ProtoMessage { + public: + std::string media_id{}; + std::string text{}; + void encode(ProtoWriteBuffer buffer) const override; +#ifdef HAS_PROTO_MESSAGE_DUMP + void dump_to(std::string &out) const override; +#endif + + protected: + bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override; +}; +class VoiceAssistantAnnounceFinished : public ProtoMessage { + public: + bool success{false}; + void encode(ProtoWriteBuffer buffer) const override; +#ifdef HAS_PROTO_MESSAGE_DUMP + void dump_to(std::string &out) const override; +#endif + + protected: + bool decode_varint(uint32_t field_id, ProtoVarInt value) override; +}; class ListEntitiesAlarmControlPanelResponse : public ProtoMessage { public: std::string object_id{}; diff --git a/esphome/components/api/api_pb2_service.cpp b/esphome/components/api/api_pb2_service.cpp index 269a755e9e..faa977389a 100644 --- a/esphome/components/api/api_pb2_service.cpp +++ b/esphome/components/api/api_pb2_service.cpp @@ -486,6 +486,16 @@ bool APIServerConnectionBase::send_voice_assistant_audio(const VoiceAssistantAud #endif #ifdef USE_VOICE_ASSISTANT #endif +#ifdef USE_VOICE_ASSISTANT +#endif +#ifdef USE_VOICE_ASSISTANT +bool APIServerConnectionBase::send_voice_assistant_announce_finished(const VoiceAssistantAnnounceFinished &msg) { +#ifdef HAS_PROTO_MESSAGE_DUMP + ESP_LOGVV(TAG, "send_voice_assistant_announce_finished: %s", msg.dump().c_str()); +#endif + return this->send_message_(msg, 120); +} +#endif #ifdef USE_ALARM_CONTROL_PANEL bool APIServerConnectionBase::send_list_entities_alarm_control_panel_response( const ListEntitiesAlarmControlPanelResponse &msg) { @@ -1135,6 +1145,17 @@ bool APIServerConnectionBase::read_message(uint32_t msg_size, uint32_t msg_type, ESP_LOGVV(TAG, "on_update_command_request: %s", msg.dump().c_str()); #endif this->on_update_command_request(msg); +#endif + break; + } + case 119: { +#ifdef USE_VOICE_ASSISTANT + VoiceAssistantAnnounceRequest msg; + msg.decode(msg_data, msg_size); +#ifdef HAS_PROTO_MESSAGE_DUMP + ESP_LOGVV(TAG, "on_voice_assistant_announce_request: %s", msg.dump().c_str()); +#endif + this->on_voice_assistant_announce_request(msg); #endif break; } diff --git a/esphome/components/api/api_pb2_service.h b/esphome/components/api/api_pb2_service.h index 83bfc2ed98..f3803ad628 100644 --- a/esphome/components/api/api_pb2_service.h +++ b/esphome/components/api/api_pb2_service.h @@ -247,6 +247,12 @@ class APIServerConnectionBase : public ProtoService { #ifdef USE_VOICE_ASSISTANT virtual void on_voice_assistant_timer_event_response(const VoiceAssistantTimerEventResponse &value){}; #endif +#ifdef USE_VOICE_ASSISTANT + virtual void on_voice_assistant_announce_request(const VoiceAssistantAnnounceRequest &value){}; +#endif +#ifdef USE_VOICE_ASSISTANT + bool send_voice_assistant_announce_finished(const VoiceAssistantAnnounceFinished &msg); +#endif #ifdef USE_ALARM_CONTROL_PANEL bool send_list_entities_alarm_control_panel_response(const ListEntitiesAlarmControlPanelResponse &msg); #endif diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp index 43c7428858..577de630fb 100644 --- a/esphome/components/voice_assistant/voice_assistant.cpp +++ b/esphome/components/voice_assistant/voice_assistant.cpp @@ -396,6 +396,10 @@ void VoiceAssistant::loop() { this->set_timeout("playing", 2000, [this]() { this->cancel_timeout("speaker-timeout"); this->set_state_(State::IDLE, State::IDLE); + + api::VoiceAssistantAnnounceFinished msg; + msg.success = true; + this->api_client_->send_voice_assistant_announce_finished(msg); }); } break; @@ -866,6 +870,18 @@ void VoiceAssistant::timer_tick_() { this->timer_tick_trigger_->trigger(res); } +void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) { +#ifdef USE_MEDIA_PLAYER + if (this->media_player_ != nullptr) { + this->tts_start_trigger_->trigger(msg.text); + this->media_player_->make_call().set_media_url(msg.media_id).set_announcement(true).perform(); + this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE); + this->tts_end_trigger_->trigger(msg.media_id); + this->end_trigger_->trigger(); + } +#endif +} + VoiceAssistant *global_voice_assistant = nullptr; // NOLINT(cppcoreguidelines-avoid-non-const-global-variables) } // namespace voice_assistant diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h index 88cb0dd413..b0a172332f 100644 --- a/esphome/components/voice_assistant/voice_assistant.h +++ b/esphome/components/voice_assistant/voice_assistant.h @@ -132,6 +132,7 @@ class VoiceAssistant : public Component { void on_event(const api::VoiceAssistantEventResponse &msg); void on_audio(const api::VoiceAssistantAudio &msg); void on_timer_event(const api::VoiceAssistantTimerEventResponse &msg); + void on_announce(const api::VoiceAssistantAnnounceRequest &msg); bool is_running() const { return this->state_ != State::IDLE; } void set_continuous(bool continuous) { this->continuous_ = continuous; }