mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-31 15:12:06 +00:00 
			
		
		
		
	[audio, microphone] - Allow MicrophoneSource to passively capture/optimization (#8732)
This commit is contained in:
		| @@ -135,7 +135,7 @@ const char *audio_file_type_to_string(AudioFileType file_type); | ||||
| void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor, | ||||
|                          size_t samples_to_scale); | ||||
|  | ||||
| /// @brief Unpacks a quantized audio sample into a Q31 fixed point number. | ||||
| /// @brief Unpacks a quantized audio sample into a Q31 fixed-point number. | ||||
| /// @param data Pointer to uint8_t array containing the audio sample | ||||
| /// @param bytes_per_sample The number of bytes per sample | ||||
| /// @return Q31 sample | ||||
| @@ -160,5 +160,28 @@ inline int32_t unpack_audio_sample_to_q31(const uint8_t *data, size_t bytes_per_ | ||||
|   return sample; | ||||
| } | ||||
|  | ||||
| /// @brief Packs a Q31 fixed-point number as an audio sample with the specified number of bytes per sample. | ||||
| /// Packs the most significant bits - no dithering is applied. | ||||
| /// @param sample Q31 fixed-point number to pack | ||||
| /// @param data Pointer to data array to store | ||||
| /// @param bytes_per_sample The audio data's bytes per sample | ||||
| inline void pack_q31_as_audio_sample(int32_t sample, uint8_t *data, size_t bytes_per_sample) { | ||||
|   if (bytes_per_sample == 1) { | ||||
|     data[0] = static_cast<uint8_t>(sample >> 24); | ||||
|   } else if (bytes_per_sample == 2) { | ||||
|     data[0] = static_cast<uint8_t>(sample >> 16); | ||||
|     data[1] = static_cast<uint8_t>(sample >> 24); | ||||
|   } else if (bytes_per_sample == 3) { | ||||
|     data[0] = static_cast<uint8_t>(sample >> 8); | ||||
|     data[1] = static_cast<uint8_t>(sample >> 16); | ||||
|     data[2] = static_cast<uint8_t>(sample >> 24); | ||||
|   } else if (bytes_per_sample == 4) { | ||||
|     data[0] = static_cast<uint8_t>(sample); | ||||
|     data[1] = static_cast<uint8_t>(sample >> 8); | ||||
|     data[2] = static_cast<uint8_t>(sample >> 16); | ||||
|     data[3] = static_cast<uint8_t>(sample >> 24); | ||||
|   } | ||||
| } | ||||
|  | ||||
| }  // namespace audio | ||||
| }  // namespace esphome | ||||
|   | ||||
| @@ -162,13 +162,22 @@ def final_validate_microphone_source_schema( | ||||
|     return _validate_audio_compatability | ||||
|  | ||||
|  | ||||
| async def microphone_source_to_code(config): | ||||
| async def microphone_source_to_code(config, passive=False): | ||||
|     """Creates a MicrophoneSource variable for codegen. | ||||
|  | ||||
|     Setting passive to true makes the MicrophoneSource never start/stop the microphone, but only receives audio when another component has actively started the Microphone. If false, then the microphone needs to be explicitly started/stopped. | ||||
|  | ||||
|     Args: | ||||
|         config (Schema): Created with `microphone_source_schema` specifying bits per sample, channels, and gain factor | ||||
|         passive (bool): Enable passive mode for the MicrophoneSource | ||||
|     """ | ||||
|     mic = await cg.get_variable(config[CONF_MICROPHONE]) | ||||
|     mic_source = cg.new_Pvariable( | ||||
|         config[CONF_ID], | ||||
|         mic, | ||||
|         config[CONF_BITS_PER_SAMPLE], | ||||
|         config[CONF_GAIN_FACTOR], | ||||
|         passive, | ||||
|     ) | ||||
|     for channel in config[CONF_CHANNELS]: | ||||
|         cg.add(mic_source.add_channel(channel)) | ||||
|   | ||||
| @@ -6,12 +6,10 @@ namespace microphone { | ||||
| static const int32_t Q25_MAX_VALUE = (1 << 25) - 1; | ||||
| static const int32_t Q25_MIN_VALUE = ~Q25_MAX_VALUE; | ||||
|  | ||||
| static const uint32_t HISTORY_VALUES = 32; | ||||
|  | ||||
| void MicrophoneSource::add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback) { | ||||
|   std::function<void(const std::vector<uint8_t> &)> filtered_callback = | ||||
|       [this, data_callback](const std::vector<uint8_t> &data) { | ||||
|         if (this->enabled_) { | ||||
|         if (this->enabled_ || this->passive_) { | ||||
|           if (this->processed_samples_.use_count() == 0) { | ||||
|             // Create vector if its unused | ||||
|             this->processed_samples_ = std::make_shared<std::vector<uint8_t>>(); | ||||
| @@ -32,13 +30,14 @@ audio::AudioStreamInfo MicrophoneSource::get_audio_stream_info() { | ||||
| } | ||||
|  | ||||
| void MicrophoneSource::start() { | ||||
|   if (!this->enabled_) { | ||||
|   if (!this->enabled_ && !this->passive_) { | ||||
|     this->enabled_ = true; | ||||
|     this->mic_->start(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| void MicrophoneSource::stop() { | ||||
|   if (this->enabled_) { | ||||
|   if (this->enabled_ && !this->passive_) { | ||||
|     this->enabled_ = false; | ||||
|     this->mic_->stop(); | ||||
|     this->processed_samples_.reset(); | ||||
| @@ -63,8 +62,9 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec | ||||
|   const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8; | ||||
|   const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count(); | ||||
|  | ||||
|   filtered_data.reserve(target_bytes_per_frame * total_frames); | ||||
|   filtered_data.resize(0); | ||||
|   filtered_data.resize(target_bytes_per_frame * total_frames); | ||||
|  | ||||
|   uint8_t *current_data = filtered_data.data(); | ||||
|  | ||||
|   for (uint32_t frame_index = 0; frame_index < total_frames; ++frame_index) { | ||||
|     for (uint32_t channel_index = 0; channel_index < source_channels; ++channel_index) { | ||||
| @@ -82,26 +82,10 @@ void MicrophoneSource::process_audio_(const std::vector<uint8_t> &data, std::vec | ||||
|         // Clamp ``sample`` in case gain multiplication overflows 25 bits | ||||
|         sample = clamp<int32_t>(sample, Q25_MIN_VALUE, Q25_MAX_VALUE);  // Q25 | ||||
|  | ||||
|         // Copy ``target_bytes_per_sample`` bytes to the output buffer. | ||||
|         if (target_bytes_per_sample == 1) { | ||||
|           sample >>= 18;  // Q25 -> Q7 | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample)); | ||||
|         } else if (target_bytes_per_sample == 2) { | ||||
|           sample >>= 10;  // Q25 -> Q15 | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample)); | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample >> 8)); | ||||
|         } else if (target_bytes_per_sample == 3) { | ||||
|           sample >>= 2;  // Q25 -> Q23 | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample)); | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample >> 8)); | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample >> 16)); | ||||
|         } else { | ||||
|         sample *= (1 << 6);  // Q25 -> Q31 | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample)); | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample >> 8)); | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample >> 16)); | ||||
|           filtered_data.push_back(static_cast<uint8_t>(sample >> 24)); | ||||
|         } | ||||
|  | ||||
|         audio::pack_q31_as_audio_sample(sample, current_data, target_bytes_per_sample); | ||||
|         current_data = current_data + target_bytes_per_sample; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   | ||||
| @@ -35,8 +35,8 @@ class MicrophoneSource { | ||||
|    * Note that this class cannot convert sample rates! | ||||
|    */ | ||||
|  public: | ||||
|   MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor) | ||||
|       : mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor) {} | ||||
|   MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor, bool passive) | ||||
|       : mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor), passive_(passive) {} | ||||
|  | ||||
|   /// @brief Enables a channel to be processed through the callback. | ||||
|   /// | ||||
| @@ -59,8 +59,8 @@ class MicrophoneSource { | ||||
|  | ||||
|   void start(); | ||||
|   void stop(); | ||||
|   bool is_running() const { return (this->mic_->is_running() && this->enabled_); } | ||||
|   bool is_stopped() const { return !this->enabled_; } | ||||
|   bool is_running() const { return (this->mic_->is_running() && (this->enabled_ || this->passive_)); } | ||||
|   bool is_stopped() const { return !this->is_running(); }; | ||||
|  | ||||
|  protected: | ||||
|   void process_audio_(const std::vector<uint8_t> &data, std::vector<uint8_t> &filtered_data); | ||||
| @@ -72,6 +72,7 @@ class MicrophoneSource { | ||||
|   std::bitset<8> channels_; | ||||
|   int32_t gain_factor_; | ||||
|   bool enabled_{false}; | ||||
|   bool passive_{false}; | ||||
| }; | ||||
|  | ||||
| }  // namespace microphone | ||||
|   | ||||
		Reference in New Issue
	
	Block a user