mirror of
https://github.com/esphome/esphome.git
synced 2025-10-29 06:04:01 +00:00
Send/Receive Voice Assistant audio via API (#6471)
Co-authored-by: Michael Hansen <mike@rhasspy.org>
This commit is contained in:
@@ -217,7 +217,8 @@ message DeviceInfoResponse {
|
||||
|
||||
string friendly_name = 13;
|
||||
|
||||
uint32 voice_assistant_version = 14;
|
||||
uint32 legacy_voice_assistant_version = 14;
|
||||
uint32 voice_assistant_feature_flags = 17;
|
||||
|
||||
string suggested_area = 16;
|
||||
}
|
||||
@@ -1422,12 +1423,18 @@ message BluetoothDeviceClearCacheResponse {
|
||||
}
|
||||
|
||||
// ==================== PUSH TO TALK ====================
|
||||
enum VoiceAssistantSubscribeFlag {
|
||||
VOICE_ASSISTANT_SUBSCRIBE_NONE = 0;
|
||||
VOICE_ASSISTANT_SUBSCRIBE_API_AUDIO = 1;
|
||||
}
|
||||
|
||||
message SubscribeVoiceAssistantRequest {
|
||||
option (id) = 89;
|
||||
option (source) = SOURCE_CLIENT;
|
||||
option (ifdef) = "USE_VOICE_ASSISTANT";
|
||||
|
||||
bool subscribe = 1;
|
||||
uint32 flags = 2;
|
||||
}
|
||||
|
||||
enum VoiceAssistantRequestFlag {
|
||||
@@ -1495,6 +1502,16 @@ message VoiceAssistantEventResponse {
|
||||
repeated VoiceAssistantEventData data = 2;
|
||||
}
|
||||
|
||||
message VoiceAssistantAudio {
|
||||
option (id) = 106;
|
||||
option (source) = SOURCE_BOTH;
|
||||
option (ifdef) = "USE_VOICE_ASSISTANT";
|
||||
|
||||
bytes data = 1;
|
||||
bool end = 2;
|
||||
}
|
||||
|
||||
|
||||
// ==================== ALARM CONTROL PANEL ====================
|
||||
enum AlarmControlPanelState {
|
||||
ALARM_STATE_DISARMED = 0;
|
||||
|
||||
@@ -1040,10 +1040,15 @@ void APIConnection::on_voice_assistant_response(const VoiceAssistantResponse &ms
|
||||
voice_assistant::global_voice_assistant->failed_to_start();
|
||||
return;
|
||||
}
|
||||
struct sockaddr_storage storage;
|
||||
socklen_t len = sizeof(storage);
|
||||
this->helper_->getpeername((struct sockaddr *) &storage, &len);
|
||||
voice_assistant::global_voice_assistant->start_streaming(&storage, msg.port);
|
||||
if (msg.port == 0) {
|
||||
// Use API Audio
|
||||
voice_assistant::global_voice_assistant->start_streaming();
|
||||
} else {
|
||||
struct sockaddr_storage storage;
|
||||
socklen_t len = sizeof(storage);
|
||||
this->helper_->getpeername((struct sockaddr *) &storage, &len);
|
||||
voice_assistant::global_voice_assistant->start_streaming(&storage, msg.port);
|
||||
}
|
||||
}
|
||||
};
|
||||
void APIConnection::on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) {
|
||||
@@ -1055,6 +1060,15 @@ void APIConnection::on_voice_assistant_event_response(const VoiceAssistantEventR
|
||||
voice_assistant::global_voice_assistant->on_event(msg);
|
||||
}
|
||||
}
|
||||
void APIConnection::on_voice_assistant_audio(const VoiceAssistantAudio &msg) {
|
||||
if (voice_assistant::global_voice_assistant != nullptr) {
|
||||
if (voice_assistant::global_voice_assistant->get_api_connection() != this) {
|
||||
return;
|
||||
}
|
||||
|
||||
voice_assistant::global_voice_assistant->on_audio(msg);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1142,7 +1156,7 @@ HelloResponse APIConnection::hello(const HelloRequest &msg) {
|
||||
|
||||
HelloResponse resp;
|
||||
resp.api_version_major = 1;
|
||||
resp.api_version_minor = 9;
|
||||
resp.api_version_minor = 10;
|
||||
resp.server_info = App.get_name() + " (esphome v" ESPHOME_VERSION ")";
|
||||
resp.name = App.get_name();
|
||||
|
||||
@@ -1203,7 +1217,8 @@ DeviceInfoResponse APIConnection::device_info(const DeviceInfoRequest &msg) {
|
||||
resp.bluetooth_proxy_feature_flags = bluetooth_proxy::global_bluetooth_proxy->get_feature_flags();
|
||||
#endif
|
||||
#ifdef USE_VOICE_ASSISTANT
|
||||
resp.voice_assistant_version = voice_assistant::global_voice_assistant->get_version();
|
||||
resp.legacy_voice_assistant_version = voice_assistant::global_voice_assistant->get_legacy_version();
|
||||
resp.voice_assistant_feature_flags = voice_assistant::global_voice_assistant->get_feature_flags();
|
||||
#endif
|
||||
return resp;
|
||||
}
|
||||
|
||||
@@ -134,6 +134,7 @@ class APIConnection : public APIServerConnection {
|
||||
void subscribe_voice_assistant(const SubscribeVoiceAssistantRequest &msg) override;
|
||||
void on_voice_assistant_response(const VoiceAssistantResponse &msg) override;
|
||||
void on_voice_assistant_event_response(const VoiceAssistantEventResponse &msg) override;
|
||||
void on_voice_assistant_audio(const VoiceAssistantAudio &msg) override;
|
||||
#endif
|
||||
|
||||
#ifdef USE_ALARM_CONTROL_PANEL
|
||||
|
||||
@@ -410,6 +410,19 @@ const char *proto_enum_to_string<enums::BluetoothDeviceRequestType>(enums::Bluet
|
||||
}
|
||||
#endif
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
template<>
|
||||
const char *proto_enum_to_string<enums::VoiceAssistantSubscribeFlag>(enums::VoiceAssistantSubscribeFlag value) {
|
||||
switch (value) {
|
||||
case enums::VOICE_ASSISTANT_SUBSCRIBE_NONE:
|
||||
return "VOICE_ASSISTANT_SUBSCRIBE_NONE";
|
||||
case enums::VOICE_ASSISTANT_SUBSCRIBE_API_AUDIO:
|
||||
return "VOICE_ASSISTANT_SUBSCRIBE_API_AUDIO";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
template<> const char *proto_enum_to_string<enums::VoiceAssistantRequestFlag>(enums::VoiceAssistantRequestFlag value) {
|
||||
switch (value) {
|
||||
case enums::VOICE_ASSISTANT_REQUEST_NONE:
|
||||
@@ -716,7 +729,11 @@ bool DeviceInfoResponse::decode_varint(uint32_t field_id, ProtoVarInt value) {
|
||||
return true;
|
||||
}
|
||||
case 14: {
|
||||
this->voice_assistant_version = value.as_uint32();
|
||||
this->legacy_voice_assistant_version = value.as_uint32();
|
||||
return true;
|
||||
}
|
||||
case 17: {
|
||||
this->voice_assistant_feature_flags = value.as_uint32();
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
@@ -784,7 +801,8 @@ void DeviceInfoResponse::encode(ProtoWriteBuffer buffer) const {
|
||||
buffer.encode_uint32(15, this->bluetooth_proxy_feature_flags);
|
||||
buffer.encode_string(12, this->manufacturer);
|
||||
buffer.encode_string(13, this->friendly_name);
|
||||
buffer.encode_uint32(14, this->voice_assistant_version);
|
||||
buffer.encode_uint32(14, this->legacy_voice_assistant_version);
|
||||
buffer.encode_uint32(17, this->voice_assistant_feature_flags);
|
||||
buffer.encode_string(16, this->suggested_area);
|
||||
}
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
@@ -850,8 +868,13 @@ void DeviceInfoResponse::dump_to(std::string &out) const {
|
||||
out.append("'").append(this->friendly_name).append("'");
|
||||
out.append("\n");
|
||||
|
||||
out.append(" voice_assistant_version: ");
|
||||
sprintf(buffer, "%" PRIu32, this->voice_assistant_version);
|
||||
out.append(" legacy_voice_assistant_version: ");
|
||||
sprintf(buffer, "%" PRIu32, this->legacy_voice_assistant_version);
|
||||
out.append(buffer);
|
||||
out.append("\n");
|
||||
|
||||
out.append(" voice_assistant_feature_flags: ");
|
||||
sprintf(buffer, "%" PRIu32, this->voice_assistant_feature_flags);
|
||||
out.append(buffer);
|
||||
out.append("\n");
|
||||
|
||||
@@ -6514,11 +6537,18 @@ bool SubscribeVoiceAssistantRequest::decode_varint(uint32_t field_id, ProtoVarIn
|
||||
this->subscribe = value.as_bool();
|
||||
return true;
|
||||
}
|
||||
case 2: {
|
||||
this->flags = value.as_uint32();
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
void SubscribeVoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const { buffer.encode_bool(1, this->subscribe); }
|
||||
void SubscribeVoiceAssistantRequest::encode(ProtoWriteBuffer buffer) const {
|
||||
buffer.encode_bool(1, this->subscribe);
|
||||
buffer.encode_uint32(2, this->flags);
|
||||
}
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
void SubscribeVoiceAssistantRequest::dump_to(std::string &out) const {
|
||||
__attribute__((unused)) char buffer[64];
|
||||
@@ -6526,6 +6556,11 @@ void SubscribeVoiceAssistantRequest::dump_to(std::string &out) const {
|
||||
out.append(" subscribe: ");
|
||||
out.append(YESNO(this->subscribe));
|
||||
out.append("\n");
|
||||
|
||||
out.append(" flags: ");
|
||||
sprintf(buffer, "%" PRIu32, this->flags);
|
||||
out.append(buffer);
|
||||
out.append("\n");
|
||||
out.append("}");
|
||||
}
|
||||
#endif
|
||||
@@ -6752,6 +6787,44 @@ void VoiceAssistantEventResponse::dump_to(std::string &out) const {
|
||||
out.append("}");
|
||||
}
|
||||
#endif
|
||||
bool VoiceAssistantAudio::decode_varint(uint32_t field_id, ProtoVarInt value) {
|
||||
switch (field_id) {
|
||||
case 2: {
|
||||
this->end = value.as_bool();
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool VoiceAssistantAudio::decode_length(uint32_t field_id, ProtoLengthDelimited value) {
|
||||
switch (field_id) {
|
||||
case 1: {
|
||||
this->data = value.as_string();
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
void VoiceAssistantAudio::encode(ProtoWriteBuffer buffer) const {
|
||||
buffer.encode_string(1, this->data);
|
||||
buffer.encode_bool(2, this->end);
|
||||
}
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
void VoiceAssistantAudio::dump_to(std::string &out) const {
|
||||
__attribute__((unused)) char buffer[64];
|
||||
out.append("VoiceAssistantAudio {\n");
|
||||
out.append(" data: ");
|
||||
out.append("'").append(this->data).append("'");
|
||||
out.append("\n");
|
||||
|
||||
out.append(" end: ");
|
||||
out.append(YESNO(this->end));
|
||||
out.append("\n");
|
||||
out.append("}");
|
||||
}
|
||||
#endif
|
||||
bool ListEntitiesAlarmControlPanelResponse::decode_varint(uint32_t field_id, ProtoVarInt value) {
|
||||
switch (field_id) {
|
||||
case 6: {
|
||||
|
||||
@@ -165,6 +165,10 @@ enum BluetoothDeviceRequestType : uint32_t {
|
||||
BLUETOOTH_DEVICE_REQUEST_TYPE_CONNECT_V3_WITHOUT_CACHE = 5,
|
||||
BLUETOOTH_DEVICE_REQUEST_TYPE_CLEAR_CACHE = 6,
|
||||
};
|
||||
enum VoiceAssistantSubscribeFlag : uint32_t {
|
||||
VOICE_ASSISTANT_SUBSCRIBE_NONE = 0,
|
||||
VOICE_ASSISTANT_SUBSCRIBE_API_AUDIO = 1,
|
||||
};
|
||||
enum VoiceAssistantRequestFlag : uint32_t {
|
||||
VOICE_ASSISTANT_REQUEST_NONE = 0,
|
||||
VOICE_ASSISTANT_REQUEST_USE_VAD = 1,
|
||||
@@ -327,7 +331,8 @@ class DeviceInfoResponse : public ProtoMessage {
|
||||
uint32_t bluetooth_proxy_feature_flags{0};
|
||||
std::string manufacturer{};
|
||||
std::string friendly_name{};
|
||||
uint32_t voice_assistant_version{0};
|
||||
uint32_t legacy_voice_assistant_version{0};
|
||||
uint32_t voice_assistant_feature_flags{0};
|
||||
std::string suggested_area{};
|
||||
void encode(ProtoWriteBuffer buffer) const override;
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
@@ -1674,6 +1679,7 @@ class BluetoothDeviceClearCacheResponse : public ProtoMessage {
|
||||
class SubscribeVoiceAssistantRequest : public ProtoMessage {
|
||||
public:
|
||||
bool subscribe{false};
|
||||
uint32_t flags{0};
|
||||
void encode(ProtoWriteBuffer buffer) const override;
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
void dump_to(std::string &out) const override;
|
||||
@@ -1749,6 +1755,19 @@ class VoiceAssistantEventResponse : public ProtoMessage {
|
||||
bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override;
|
||||
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
|
||||
};
|
||||
class VoiceAssistantAudio : public ProtoMessage {
|
||||
public:
|
||||
std::string data{};
|
||||
bool end{false};
|
||||
void encode(ProtoWriteBuffer buffer) const override;
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
void dump_to(std::string &out) const override;
|
||||
#endif
|
||||
|
||||
protected:
|
||||
bool decode_length(uint32_t field_id, ProtoLengthDelimited value) override;
|
||||
bool decode_varint(uint32_t field_id, ProtoVarInt value) override;
|
||||
};
|
||||
class ListEntitiesAlarmControlPanelResponse : public ProtoMessage {
|
||||
public:
|
||||
std::string object_id{};
|
||||
|
||||
@@ -476,6 +476,14 @@ bool APIServerConnectionBase::send_voice_assistant_request(const VoiceAssistantR
|
||||
#endif
|
||||
#ifdef USE_VOICE_ASSISTANT
|
||||
#endif
|
||||
#ifdef USE_VOICE_ASSISTANT
|
||||
bool APIServerConnectionBase::send_voice_assistant_audio(const VoiceAssistantAudio &msg) {
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
ESP_LOGVV(TAG, "send_voice_assistant_audio: %s", msg.dump().c_str());
|
||||
#endif
|
||||
return this->send_message_<VoiceAssistantAudio>(msg, 106);
|
||||
}
|
||||
#endif
|
||||
#ifdef USE_ALARM_CONTROL_PANEL
|
||||
bool APIServerConnectionBase::send_list_entities_alarm_control_panel_response(
|
||||
const ListEntitiesAlarmControlPanelResponse &msg) {
|
||||
@@ -971,6 +979,17 @@ bool APIServerConnectionBase::read_message(uint32_t msg_size, uint32_t msg_type,
|
||||
ESP_LOGVV(TAG, "on_date_command_request: %s", msg.dump().c_str());
|
||||
#endif
|
||||
this->on_date_command_request(msg);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case 106: {
|
||||
#ifdef USE_VOICE_ASSISTANT
|
||||
VoiceAssistantAudio msg;
|
||||
msg.decode(msg_data, msg_size);
|
||||
#ifdef HAS_PROTO_MESSAGE_DUMP
|
||||
ESP_LOGVV(TAG, "on_voice_assistant_audio: %s", msg.dump().c_str());
|
||||
#endif
|
||||
this->on_voice_assistant_audio(msg);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -240,6 +240,10 @@ class APIServerConnectionBase : public ProtoService {
|
||||
#ifdef USE_VOICE_ASSISTANT
|
||||
virtual void on_voice_assistant_event_response(const VoiceAssistantEventResponse &value){};
|
||||
#endif
|
||||
#ifdef USE_VOICE_ASSISTANT
|
||||
bool send_voice_assistant_audio(const VoiceAssistantAudio &msg);
|
||||
virtual void on_voice_assistant_audio(const VoiceAssistantAudio &value){};
|
||||
#endif
|
||||
#ifdef USE_ALARM_CONTROL_PANEL
|
||||
bool send_list_entities_alarm_control_panel_response(const ListEntitiesAlarmControlPanelResponse &msg);
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user