From 746488cabf2d4ac8db4b0d7883dc72dd8c5aa73c Mon Sep 17 00:00:00 2001
From: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
Date: Wed, 19 Jul 2023 11:38:47 +1200
Subject: [PATCH] Fix silence detection flag on voice assistant (#5120)

---
 esphome/components/api/api.proto                       | 1 +
 esphome/components/api/api_connection.cpp              | 3 ++-
 esphome/components/api/api_connection.h                | 2 +-
 esphome/components/api/api_pb2.cpp                     | 9 +++++++++
 esphome/components/api/api_pb2.h                       | 1 +
 esphome/components/api/api_server.cpp                  | 6 +++---
 esphome/components/api/api_server.h                    | 2 +-
 esphome/components/voice_assistant/voice_assistant.cpp | 2 +-
 esphome/components/voice_assistant/voice_assistant.h   | 6 +-----
 9 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/esphome/components/api/api.proto b/esphome/components/api/api.proto
index 0d68d9fe55..86685aa5e6 100644
--- a/esphome/components/api/api.proto
+++ b/esphome/components/api/api.proto
@@ -1420,6 +1420,7 @@ message VoiceAssistantRequest {
 
   bool start = 1;
   string conversation_id = 2;
+  bool use_vad = 3;
 }
 
 message VoiceAssistantResponse {
diff --git a/esphome/components/api/api_connection.cpp b/esphome/components/api/api_connection.cpp
index 858ff0e525..a46efd80e5 100644
--- a/esphome/components/api/api_connection.cpp
+++ b/esphome/components/api/api_connection.cpp
@@ -907,12 +907,13 @@ BluetoothConnectionsFreeResponse APIConnection::subscribe_bluetooth_connections_
 #endif
 
 #ifdef USE_VOICE_ASSISTANT
-bool APIConnection::request_voice_assistant(bool start, const std::string &conversation_id) {
+bool APIConnection::request_voice_assistant(bool start, const std::string &conversation_id, bool use_vad) {
   if (!this->voice_assistant_subscription_)
     return false;
   VoiceAssistantRequest msg;
   msg.start = start;
   msg.conversation_id = conversation_id;
+  msg.use_vad = use_vad;
   return this->send_voice_assistant_request(msg);
 }
 void APIConnection::on_voice_assistant_response(const VoiceAssistantResponse &msg) {
diff --git a/esphome/components/api/api_connection.h b/esphome/components/api/api_connection.h
index c146adff02..acc4578661 100644
--- a/esphome/components/api/api_connection.h
+++ b/esphome/components/api/api_connection.h
@@ -124,7 +124,7 @@ class APIConnection : public APIServerConnection {
   void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) override {
     this->voice_assistant_subscription_ = msg.subscribe;
   }
-  bool request_voice_assistant(bool start, const std::string &conversation_id);
+  bool request_voice_assistant(bool start, const std::string &conversation_id, bool use_vad);
   void on_voice_assistant_response(const VoiceAssistantResponse &msg) override;
   void on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) override;
 #endif
diff --git a/esphome/components/api/api_pb2.cpp b/esphome/components/api/api_pb2.cpp
index 8c7f6d0c4a..3a2d980e57 100644
--- a/esphome/components/api/api_pb2.cpp
+++ b/esphome/components/api/api_pb2.cpp
@@ -6348,6 +6348,10 @@ bool VoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarInt value)
       this->start = value.as_bool();
       return true;
     }
+    case 3: {
+      this->use_vad = value.as_bool();
+      return true;
+    }
     default:
       return false;
   }
@@ -6365,6 +6369,7 @@ bool VoiceAssistantRequest::decode_length(uint32_t field_id, ProtoLengthDelimite
 void VoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const {
   buffer.encode_bool(1, this->start);
   buffer.encode_string(2, this->conversation_id);
+  buffer.encode_bool(3, this->use_vad);
 }
 #ifdef HAS_PROTO_MESSAGE_DUMP
 void VoiceAssistantRequest::dump_to(std::string &out) const {
@@ -6377,6 +6382,10 @@ void VoiceAssistantRequest::dump_to(std::string &out) const {
   out.append("  conversation_id: ");
   out.append("'").append(this->conversation_id).append("'");
   out.append("\n");
+
+  out.append("  use_vad: ");
+  out.append(YESNO(this->use_vad));
+  out.append("\n");
   out.append("}");
 }
 #endif
diff --git a/esphome/components/api/api_pb2.h b/esphome/components/api/api_pb2.h
index 769f7aaff5..627165953d 100644
--- a/esphome/components/api/api_pb2.h
+++ b/esphome/components/api/api_pb2.h
@@ -1655,6 +1655,7 @@ class VoiceAssistantRequest : public ProtoMessage {
  public:
   bool start{false};
   std::string conversation_id{};
+  bool use_vad{false};
   void encode(ProtoWriteBuffer buffer) const override;
 #ifdef HAS_PROTO_MESSAGE_DUMP
   void dump_to(std::string &out) const override;
diff --git a/esphome/components/api/api_server.cpp b/esphome/components/api/api_server.cpp
index 87b5f9e63f..f70d45ecd0 100644
--- a/esphome/components/api/api_server.cpp
+++ b/esphome/components/api/api_server.cpp
@@ -323,16 +323,16 @@ void APIServer::on_shutdown() {
 }
 
 #ifdef USE_VOICE_ASSISTANT
-bool APIServer::start_voice_assistant(const std::string &conversation_id) {
+bool APIServer::start_voice_assistant(const std::string &conversation_id, bool use_vad) {
   for (auto &c : this->clients_) {
-    if (c->request_voice_assistant(true, conversation_id))
+    if (c->request_voice_assistant(true, conversation_id, use_vad))
       return true;
   }
   return false;
 }
 void APIServer::stop_voice_assistant() {
   for (auto &c : this->clients_) {
-    if (c->request_voice_assistant(false, ""))
+    if (c->request_voice_assistant(false, "", false))
       return;
   }
 }
diff --git a/esphome/components/api/api_server.h b/esphome/components/api/api_server.h
index be124f42ff..9b40a5ef02 100644
--- a/esphome/components/api/api_server.h
+++ b/esphome/components/api/api_server.h
@@ -81,7 +81,7 @@ class APIServer : public Component, public Controller {
 #endif
 
 #ifdef USE_VOICE_ASSISTANT
-  bool start_voice_assistant(const std::string &conversation_id);
+  bool start_voice_assistant(const std::string &conversation_id, bool use_vad);
   void stop_voice_assistant();
 #endif
 
diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp
index 44d640ff39..217ddb6354 100644
--- a/esphome/components/voice_assistant/voice_assistant.cpp
+++ b/esphome/components/voice_assistant/voice_assistant.cpp
@@ -130,7 +130,7 @@ void VoiceAssistant::start(struct sockaddr_storage *addr, uint16_t port) {
 
 void VoiceAssistant::request_start(bool continuous) {
   ESP_LOGD(TAG, "Requesting start...");
-  if (!api::global_api_server->start_voice_assistant(this->conversation_id_)) {
+  if (!api::global_api_server->start_voice_assistant(this->conversation_id_, this->silence_detection_)) {
     ESP_LOGW(TAG, "Could not request start.");
     this->error_trigger_->trigger("not-connected", "Could not request start.");
     this->continuous_ = false;
diff --git a/esphome/components/voice_assistant/voice_assistant.h b/esphome/components/voice_assistant/voice_assistant.h
index b103584509..e67baaee65 100644
--- a/esphome/components/voice_assistant/voice_assistant.h
+++ b/esphome/components/voice_assistant/voice_assistant.h
@@ -25,10 +25,9 @@ namespace voice_assistant {
 
 // Version 1: Initial version
 // Version 2: Adds raw speaker support
-// Version 3: Adds continuous support
+// Version 3: Unused/skip
 static const uint32_t INITIAL_VERSION = 1;
 static const uint32_t SPEAKER_SUPPORT = 2;
-static const uint32_t SILENCE_DETECTION_SUPPORT = 3;
 
 class VoiceAssistant : public Component {
  public:
@@ -48,9 +47,6 @@ class VoiceAssistant : public Component {
   uint32_t get_version() const {
 #ifdef USE_SPEAKER
     if (this->speaker_ != nullptr) {
-      if (this->silence_detection_) {
-        return SILENCE_DETECTION_SUPPORT;
-      }
       return SPEAKER_SUPPORT;
     }
 #endif