1
0
mirror of https://github.com/esphome/esphome.git synced 2025-09-02 11:22:24 +01:00
Files
esphome/esphome/components/microphone/microphone_source.cpp
2025-04-29 00:05:07 +00:00

97 lines
4.1 KiB
C++

#include "microphone_source.h"
namespace esphome {
namespace microphone {
void MicrophoneSource::add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback) {
std::function<void(const std::vector<uint8_t> &)> filtered_callback =
[this, data_callback](const std::vector<uint8_t> &data) {
if (this->enabled_) {
data_callback(this->process_audio_(data));
}
};
// Future PR will uncomment this! It requires changing the callback vector to an uint8_t in every component using a
// mic callback.
// this->mic_->add_data_callback(std::move(filtered_callback));
}
void MicrophoneSource::start() {
this->enabled_ = true;
this->mic_->start();
}
void MicrophoneSource::stop() {
this->enabled_ = false;
this->mic_->stop();
}
std::vector<uint8_t> MicrophoneSource::process_audio_(const std::vector<uint8_t> &data) {
// Bit depth conversions are obtained by truncating bits or padding with zeros - no dithering is applied.
const size_t source_bytes_per_sample = this->mic_->get_audio_stream_info().samples_to_bytes(1);
const size_t source_channels = this->mic_->get_audio_stream_info().get_channels();
const size_t source_bytes_per_frame = this->mic_->get_audio_stream_info().frames_to_bytes(1);
const uint32_t total_frames = this->mic_->get_audio_stream_info().bytes_to_frames(data.size());
const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8;
const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count();
std::vector<uint8_t> filtered_data;
filtered_data.reserve(target_bytes_per_frame * total_frames);
const int32_t target_min_value = -(1 << (8 * target_bytes_per_sample - 1));
const int32_t target_max_value = (1 << (8 * target_bytes_per_sample - 1)) - 1;
for (size_t frame_index = 0; frame_index < total_frames; ++frame_index) {
for (size_t channel_index = 0; channel_index < source_channels; ++channel_index) {
if (this->channels_.test(channel_index)) {
// Channel's current sample is included in the target mask. Convert bits per sample, if necessary.
size_t sample_index = frame_index * source_bytes_per_frame + channel_index * source_bytes_per_sample;
int32_t sample = 0;
// Copy the data into the most significant bits of the sample variable to ensure the sign bit is correct
uint8_t bit_offset = (4 - source_bytes_per_sample) * 8;
for (int i = 0; i < source_bytes_per_sample; ++i) {
sample |= data[sample_index + i] << bit_offset;
bit_offset += 8;
}
// Shift data back to the least significant bits
if (source_bytes_per_sample >= target_bytes_per_sample) {
// Keep source bytes per sample of data so that the gain multiplication uses all significant bits instead of
// shifting to the target bytes per sample immediately, potentially losing information.
sample >>= (4 - source_bytes_per_sample) * 8; // ``source_bytes_per_sample`` bytes of valid data
} else {
// Keep padded zeros to match the target bytes per sample
sample >>= (4 - target_bytes_per_sample) * 8; // ``target_bytes_per_sample`` bytes of valid data
}
// Apply gain using multiplication
sample *= this->gain_factor_;
// Match target output bytes by shifting out the least significant bits
if (source_bytes_per_sample > target_bytes_per_sample) {
sample >>= 8 * (source_bytes_per_sample -
target_bytes_per_sample); // ``target_bytes_per_sample`` bytes of valid data
}
// Clamp ``sample`` to the target bytes per sample range in case gain multiplication overflows
sample = clamp<int32_t>(sample, target_min_value, target_max_value);
// Copy ``target_bytes_per_sample`` bytes to the output buffer.
for (int i = 0; i < target_bytes_per_sample; ++i) {
filtered_data.push_back(static_cast<uint8_t>(sample));
sample >>= 8;
}
}
}
}
return filtered_data;
}
} // namespace microphone
} // namespace esphome