1
0
mirror of https://github.com/esphome/esphome.git synced 2026-02-08 08:41:59 +00:00

[voice_assistant] Use zero-copy buffer access for audio data (#12656)

This commit is contained in:
J. Nick Koston
2026-01-02 14:10:21 -10:00
committed by GitHub
parent 0b7ff09657
commit 51259888bf
5 changed files with 19 additions and 25 deletions

View File

@@ -1937,7 +1937,7 @@ message VoiceAssistantAudio {
option (source) = SOURCE_BOTH;
option (ifdef) = "USE_VOICE_ASSISTANT";
bytes data = 1;
bytes data = 1 [(pointer_to_buffer) = true];
bool end = 2;
}

View File

@@ -2527,20 +2527,22 @@ bool VoiceAssistantAudio::decode_varint(uint32_t field_id, ProtoVarInt value) {
}
bool VoiceAssistantAudio::decode_length(uint32_t field_id, ProtoLengthDelimited value) {
switch (field_id) {
case 1:
this->data = value.as_string();
case 1: {
this->data = value.data();
this->data_len = value.size();
break;
}
default:
return false;
}
return true;
}
void VoiceAssistantAudio::encode(ProtoWriteBuffer buffer) const {
buffer.encode_bytes(1, this->data_ptr_, this->data_len_);
buffer.encode_bytes(1, this->data, this->data_len);
buffer.encode_bool(2, this->end);
}
void VoiceAssistantAudio::calculate_size(ProtoSize &size) const {
size.add_length(1, this->data_len_);
size.add_length(1, this->data_len);
size.add_bool(1, this->end);
}
bool VoiceAssistantTimerEventResponse::decode_varint(uint32_t field_id, ProtoVarInt value) {

View File

@@ -2521,17 +2521,12 @@ class VoiceAssistantEventResponse final : public ProtoDecodableMessage {
class VoiceAssistantAudio final : public ProtoDecodableMessage {
public:
static constexpr uint8_t MESSAGE_TYPE = 106;
static constexpr uint8_t ESTIMATED_SIZE = 11;
static constexpr uint8_t ESTIMATED_SIZE = 21;
#ifdef HAS_PROTO_MESSAGE_DUMP
const char *message_name() const override { return "voice_assistant_audio"; }
#endif
std::string data{};
const uint8_t *data_ptr_{nullptr};
size_t data_len_{0};
void set_data(const uint8_t *data, size_t len) {
this->data_ptr_ = data;
this->data_len_ = len;
}
const uint8_t *data{nullptr};
uint16_t data_len{0};
bool end{false};
void encode(ProtoWriteBuffer buffer) const override;
void calculate_size(ProtoSize &size) const override;

View File

@@ -1978,11 +1978,7 @@ void VoiceAssistantEventResponse::dump_to(std::string &out) const {
void VoiceAssistantAudio::dump_to(std::string &out) const {
MessageDumpHelper helper(out, "VoiceAssistantAudio");
out.append(" data: ");
if (this->data_ptr_ != nullptr) {
out.append(format_hex_pretty(this->data_ptr_, this->data_len_));
} else {
out.append(format_hex_pretty(reinterpret_cast<const uint8_t *>(this->data.data()), this->data.size()));
}
out.append(format_hex_pretty(this->data, this->data_len));
out.append("\n");
dump_field(out, "end", this->end);
}

View File

@@ -272,7 +272,8 @@ void VoiceAssistant::loop() {
size_t read_bytes = this->ring_buffer_->read((void *) this->send_buffer_, SEND_BUFFER_SIZE, 0);
if (this->audio_mode_ == AUDIO_MODE_API) {
api::VoiceAssistantAudio msg;
msg.set_data(this->send_buffer_, read_bytes);
msg.data = this->send_buffer_;
msg.data_len = read_bytes;
this->api_client_->send_message(msg, api::VoiceAssistantAudio::MESSAGE_TYPE);
} else {
if (!this->udp_socket_running_) {
@@ -841,12 +842,12 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) {
void VoiceAssistant::on_audio(const api::VoiceAssistantAudio &msg) {
#ifdef USE_SPEAKER // We should never get to this function if there is no speaker anyway
if ((this->speaker_ != nullptr) && (this->speaker_buffer_ != nullptr)) {
if (this->speaker_buffer_index_ + msg.data.length() < SPEAKER_BUFFER_SIZE) {
memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data.data(), msg.data.length());
this->speaker_buffer_index_ += msg.data.length();
this->speaker_buffer_size_ += msg.data.length();
this->speaker_bytes_received_ += msg.data.length();
ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data.length());
if (this->speaker_buffer_index_ + msg.data_len < SPEAKER_BUFFER_SIZE) {
memcpy(this->speaker_buffer_ + this->speaker_buffer_index_, msg.data, msg.data_len);
this->speaker_buffer_index_ += msg.data_len;
this->speaker_buffer_size_ += msg.data_len;
this->speaker_bytes_received_ += msg.data_len;
ESP_LOGV(TAG, "Received audio: %u bytes from API", msg.data_len);
} else {
ESP_LOGE(TAG, "Cannot receive audio, buffer is full");
}