mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-30 22:53:59 +00:00 
			
		
		
		
	[audio, microphone] Add MicrophoneSource helper class (#8641)
Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
		| @@ -278,7 +278,7 @@ esphome/components/mdns/* @esphome/core | |||||||
| esphome/components/media_player/* @jesserockz | esphome/components/media_player/* @jesserockz | ||||||
| esphome/components/micro_wake_word/* @jesserockz @kahrendt | esphome/components/micro_wake_word/* @jesserockz @kahrendt | ||||||
| esphome/components/micronova/* @jorre05 | esphome/components/micronova/* @jorre05 | ||||||
| esphome/components/microphone/* @jesserockz | esphome/components/microphone/* @jesserockz @kahrendt | ||||||
| esphome/components/mics_4514/* @jesserockz | esphome/components/mics_4514/* @jesserockz | ||||||
| esphome/components/midea/* @dudanov | esphome/components/midea/* @dudanov | ||||||
| esphome/components/midea_ir/* @dudanov | esphome/components/midea_ir/* @dudanov | ||||||
|   | |||||||
| @@ -48,6 +48,12 @@ def set_stream_limits( | |||||||
|     min_sample_rate: int = _UNDEF, |     min_sample_rate: int = _UNDEF, | ||||||
|     max_sample_rate: int = _UNDEF, |     max_sample_rate: int = _UNDEF, | ||||||
| ): | ): | ||||||
|  |     """Sets the limits for the audio stream that audio component can handle | ||||||
|  |  | ||||||
|  |     When the component sinks audio (e.g., a speaker), these indicate the limits to the audio it can receive. | ||||||
|  |     When the component sources audio (e.g., a microphone), these indicate the limits to the audio it can send. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|     def set_limits_in_config(config): |     def set_limits_in_config(config): | ||||||
|         if min_bits_per_sample is not _UNDEF: |         if min_bits_per_sample is not _UNDEF: | ||||||
|             config[CONF_MIN_BITS_PER_SAMPLE] = min_bits_per_sample |             config[CONF_MIN_BITS_PER_SAMPLE] = min_bits_per_sample | ||||||
| @@ -69,43 +75,87 @@ def final_validate_audio_schema( | |||||||
|     name: str, |     name: str, | ||||||
|     *, |     *, | ||||||
|     audio_device: str, |     audio_device: str, | ||||||
|     bits_per_sample: int, |     bits_per_sample: int = _UNDEF, | ||||||
|     channels: int, |     channels: int = _UNDEF, | ||||||
|     sample_rate: int, |     sample_rate: int = _UNDEF, | ||||||
|  |     enabled_channels: list[int] = _UNDEF, | ||||||
|  |     audio_device_issue: bool = False, | ||||||
| ): | ): | ||||||
|  |     """Validates audio compatibility when passed between different components. | ||||||
|  |  | ||||||
|  |     The component derived from ``AUDIO_COMPONENT_SCHEMA`` should call ``set_stream_limits`` in a validator to specify its compatible settings | ||||||
|  |  | ||||||
|  |       - If audio_device_issue is True, then the error message indicates the user should adjust the AUDIO_COMPONENT_SCHEMA derived component's configuration to match the values passed to this function | ||||||
|  |       - If audio_device_issue is False, then the error message indicates the user should adjust the configuration of the component calling this function, as it falls out of the valid stream limits | ||||||
|  |  | ||||||
|  |     Args: | ||||||
|  |         name (str): Friendly name of the component calling this function with an audio component to validate | ||||||
|  |         audio_device (str): The configuration parameter name that contains the ID of an AUDIO_COMPONENT_SCHEMA derived component to validate against | ||||||
|  |         bits_per_sample (int, optional): The desired bits per sample | ||||||
|  |         channels (int, optional): The desired number of channels | ||||||
|  |         sample_rate (int, optional): The desired sample rate | ||||||
|  |         enabled_channels (list[int], optional): The desired enabled channels | ||||||
|  |         audio_device_issue (bool, optional): Format the error message to indicate the problem is in the configuration for the ``audio_device`` component. Defaults to False. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|     def validate_audio_compatiblity(audio_config): |     def validate_audio_compatiblity(audio_config): | ||||||
|         audio_schema = {} |         audio_schema = {} | ||||||
|  |  | ||||||
|  |         if bits_per_sample is not _UNDEF: | ||||||
|             try: |             try: | ||||||
|                 cv.int_range( |                 cv.int_range( | ||||||
|                     min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE), |                     min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE), | ||||||
|                     max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE), |                     max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE), | ||||||
|                 )(bits_per_sample) |                 )(bits_per_sample) | ||||||
|             except cv.Invalid as exc: |             except cv.Invalid as exc: | ||||||
|             raise cv.Invalid( |                 if audio_device_issue: | ||||||
|                 f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}" |                     error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires {bits_per_sample} bits per sample." | ||||||
|             ) from exc |                 else: | ||||||
|  |                     error_string = f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}" | ||||||
|  |                 raise cv.Invalid(error_string) from exc | ||||||
|  |  | ||||||
|  |         if channels is not _UNDEF: | ||||||
|             try: |             try: | ||||||
|                 cv.int_range( |                 cv.int_range( | ||||||
|                     min=audio_config.get(CONF_MIN_CHANNELS), |                     min=audio_config.get(CONF_MIN_CHANNELS), | ||||||
|                     max=audio_config.get(CONF_MAX_CHANNELS), |                     max=audio_config.get(CONF_MAX_CHANNELS), | ||||||
|                 )(channels) |                 )(channels) | ||||||
|             except cv.Invalid as exc: |             except cv.Invalid as exc: | ||||||
|             raise cv.Invalid( |                 if audio_device_issue: | ||||||
|                 f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}" |                     error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires {channels} channels." | ||||||
|             ) from exc |                 else: | ||||||
|  |                     error_string = f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}" | ||||||
|  |                 raise cv.Invalid(error_string) from exc | ||||||
|  |  | ||||||
|  |         if sample_rate is not _UNDEF: | ||||||
|             try: |             try: | ||||||
|                 cv.int_range( |                 cv.int_range( | ||||||
|                     min=audio_config.get(CONF_MIN_SAMPLE_RATE), |                     min=audio_config.get(CONF_MIN_SAMPLE_RATE), | ||||||
|                     max=audio_config.get(CONF_MAX_SAMPLE_RATE), |                     max=audio_config.get(CONF_MAX_SAMPLE_RATE), | ||||||
|                 )(sample_rate) |                 )(sample_rate) | ||||||
|             return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config) |  | ||||||
|             except cv.Invalid as exc: |             except cv.Invalid as exc: | ||||||
|             raise cv.Invalid( |                 if audio_device_issue: | ||||||
|                 f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}" |                     error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires a {sample_rate} sample rate." | ||||||
|             ) from exc |                 else: | ||||||
|  |                     error_string = f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}" | ||||||
|  |                 raise cv.Invalid(error_string) from exc | ||||||
|  |  | ||||||
|  |         if enabled_channels is not _UNDEF: | ||||||
|  |             for channel in enabled_channels: | ||||||
|  |                 try: | ||||||
|  |                     # Channels are 0-indexed | ||||||
|  |                     cv.int_range( | ||||||
|  |                         min=0, | ||||||
|  |                         max=audio_config.get(CONF_MAX_CHANNELS) - 1, | ||||||
|  |                     )(channel) | ||||||
|  |                 except cv.Invalid as exc: | ||||||
|  |                     if audio_device_issue: | ||||||
|  |                         error_string = f"Invalid configuration for the specified {audio_device}. The {name} component requires channel {channel}." | ||||||
|  |                     else: | ||||||
|  |                         error_string = f"Invalid configuration for the {name} component. Enabled channel {channel} {str(exc)}" | ||||||
|  |                     raise cv.Invalid(error_string) from exc | ||||||
|  |  | ||||||
|  |         return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config) | ||||||
|  |  | ||||||
|     return cv.Schema( |     return cv.Schema( | ||||||
|         { |         { | ||||||
|   | |||||||
| @@ -4,6 +4,8 @@ | |||||||
|  |  | ||||||
| #include "esphome/core/hal.h" | #include "esphome/core/hal.h" | ||||||
|  |  | ||||||
|  | #include <cstring> | ||||||
|  |  | ||||||
| namespace esphome { | namespace esphome { | ||||||
| namespace audio { | namespace audio { | ||||||
|  |  | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ | |||||||
| #include "audio_transfer_buffer.h" | #include "audio_transfer_buffer.h" | ||||||
|  |  | ||||||
| #include "esphome/core/defines.h" | #include "esphome/core/defines.h" | ||||||
|  | #include "esphome/core/helpers.h" | ||||||
| #include "esphome/core/ring_buffer.h" | #include "esphome/core/ring_buffer.h" | ||||||
|  |  | ||||||
| #ifdef USE_SPEAKER | #ifdef USE_SPEAKER | ||||||
|   | |||||||
| @@ -1,12 +1,21 @@ | |||||||
| from esphome import automation | from esphome import automation | ||||||
| from esphome.automation import maybe_simple_id | from esphome.automation import maybe_simple_id | ||||||
| import esphome.codegen as cg | import esphome.codegen as cg | ||||||
|  | from esphome.components import audio | ||||||
| import esphome.config_validation as cv | import esphome.config_validation as cv | ||||||
| from esphome.const import CONF_ID, CONF_TRIGGER_ID | from esphome.const import ( | ||||||
|  |     CONF_BITS_PER_SAMPLE, | ||||||
|  |     CONF_CHANNELS, | ||||||
|  |     CONF_GAIN_FACTOR, | ||||||
|  |     CONF_ID, | ||||||
|  |     CONF_MICROPHONE, | ||||||
|  |     CONF_TRIGGER_ID, | ||||||
|  | ) | ||||||
| from esphome.core import CORE | from esphome.core import CORE | ||||||
| from esphome.coroutine import coroutine_with_priority | from esphome.coroutine import coroutine_with_priority | ||||||
|  |  | ||||||
| CODEOWNERS = ["@jesserockz"] | AUTO_LOAD = ["audio"] | ||||||
|  | CODEOWNERS = ["@jesserockz", "@kahrendt"] | ||||||
|  |  | ||||||
| IS_PLATFORM_COMPONENT = True | IS_PLATFORM_COMPONENT = True | ||||||
|  |  | ||||||
| @@ -15,6 +24,7 @@ CONF_ON_DATA = "on_data" | |||||||
| microphone_ns = cg.esphome_ns.namespace("microphone") | microphone_ns = cg.esphome_ns.namespace("microphone") | ||||||
|  |  | ||||||
| Microphone = microphone_ns.class_("Microphone") | Microphone = microphone_ns.class_("Microphone") | ||||||
|  | MicrophoneSource = microphone_ns.class_("MicrophoneSource") | ||||||
|  |  | ||||||
| CaptureAction = microphone_ns.class_( | CaptureAction = microphone_ns.class_( | ||||||
|     "CaptureAction", automation.Action, cg.Parented.template(Microphone) |     "CaptureAction", automation.Action, cg.Parented.template(Microphone) | ||||||
| @@ -37,6 +47,7 @@ IsCapturingCondition = microphone_ns.class_( | |||||||
| async def setup_microphone_core_(var, config): | async def setup_microphone_core_(var, config): | ||||||
|     for conf in config.get(CONF_ON_DATA, []): |     for conf in config.get(CONF_ON_DATA, []): | ||||||
|         trigger = cg.new_Pvariable(conf[CONF_TRIGGER_ID], var) |         trigger = cg.new_Pvariable(conf[CONF_TRIGGER_ID], var) | ||||||
|  |         # Future PR will change the vector type to uint8 | ||||||
|         await automation.build_automation( |         await automation.build_automation( | ||||||
|             trigger, |             trigger, | ||||||
|             [(cg.std_vector.template(cg.int16).operator("ref").operator("const"), "x")], |             [(cg.std_vector.template(cg.int16).operator("ref").operator("const"), "x")], | ||||||
| @@ -50,7 +61,7 @@ async def register_microphone(var, config): | |||||||
|     await setup_microphone_core_(var, config) |     await setup_microphone_core_(var, config) | ||||||
|  |  | ||||||
|  |  | ||||||
| MICROPHONE_SCHEMA = cv.Schema( | MICROPHONE_SCHEMA = cv.Schema.extend(audio.AUDIO_COMPONENT_SCHEMA).extend( | ||||||
|     { |     { | ||||||
|         cv.Optional(CONF_ON_DATA): automation.validate_automation( |         cv.Optional(CONF_ON_DATA): automation.validate_automation( | ||||||
|             { |             { | ||||||
| @@ -64,7 +75,104 @@ MICROPHONE_SCHEMA = cv.Schema( | |||||||
| MICROPHONE_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(Microphone)}) | MICROPHONE_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(Microphone)}) | ||||||
|  |  | ||||||
|  |  | ||||||
| async def media_player_action(config, action_id, template_arg, args): | def microphone_source_schema( | ||||||
|  |     min_bits_per_sample: int = 16, | ||||||
|  |     max_bits_per_sample: int = 16, | ||||||
|  |     min_channels: int = 1, | ||||||
|  |     max_channels: int = 1, | ||||||
|  | ): | ||||||
|  |     """Schema for a microphone source | ||||||
|  |  | ||||||
|  |     Components requesting microphone data should use this schema instead of accessing a microphone directly. | ||||||
|  |  | ||||||
|  |     Args: | ||||||
|  |       min_bits_per_sample (int, optional): Minimum number of bits per sample the requesting component supports. Defaults to 16. | ||||||
|  |       max_bits_per_sample (int, optional): Maximum number of bits per sample the requesting component supports. Defaults to 16. | ||||||
|  |       min_channels (int, optional): Minimum number of channels the requesting component supports. Defaults to 1. | ||||||
|  |       max_channels (int, optional): Maximum number of channels the requesting component supports. Defaults to 1. | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def _validate_unique_channels(config): | ||||||
|  |         if len(config) != len(set(config)): | ||||||
|  |             raise cv.Invalid("Channels must be unique") | ||||||
|  |         return config | ||||||
|  |  | ||||||
|  |     return cv.All( | ||||||
|  |         cv.maybe_simple_value( | ||||||
|  |             { | ||||||
|  |                 cv.GenerateID(CONF_ID): cv.declare_id(MicrophoneSource), | ||||||
|  |                 cv.Required(CONF_MICROPHONE): cv.use_id(Microphone), | ||||||
|  |                 cv.Optional(CONF_BITS_PER_SAMPLE, default=16): cv.int_range( | ||||||
|  |                     min_bits_per_sample, max_bits_per_sample | ||||||
|  |                 ), | ||||||
|  |                 cv.Optional(CONF_CHANNELS, default="0"): cv.All( | ||||||
|  |                     cv.ensure_list(cv.int_range(0, 7)), | ||||||
|  |                     cv.Length(min=min_channels, max=max_channels), | ||||||
|  |                     _validate_unique_channels, | ||||||
|  |                 ), | ||||||
|  |                 cv.Optional(CONF_GAIN_FACTOR, default="1"): cv.int_range(1, 64), | ||||||
|  |             }, | ||||||
|  |             key=CONF_MICROPHONE, | ||||||
|  |         ), | ||||||
|  |     ) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | _UNDEF = object() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def final_validate_microphone_source_schema( | ||||||
|  |     component_name: str, sample_rate: int = _UNDEF | ||||||
|  | ): | ||||||
|  |     """Validates that the microphone source can provide audio in the correct format. In particular it validates the sample rate and the enabled channels. | ||||||
|  |  | ||||||
|  |     Note that: | ||||||
|  |       - MicrophoneSource class automatically handles converting bits per sample, so no need to validate | ||||||
|  |       - microphone_source_schema already validates that channels are unique and specifies the max number of channels the component supports | ||||||
|  |  | ||||||
|  |     Args: | ||||||
|  |         component_name (str): The name of the component requesting mic audio | ||||||
|  |         sample_rate (int, optional): The sample rate the component requesting mic audio requires | ||||||
|  |     """ | ||||||
|  |  | ||||||
|  |     def _validate_audio_compatability(config): | ||||||
|  |         if sample_rate is not _UNDEF: | ||||||
|  |             # Issues require changing the microphone configuration | ||||||
|  |             #  - Verifies sample rates match | ||||||
|  |             audio.final_validate_audio_schema( | ||||||
|  |                 component_name, | ||||||
|  |                 audio_device=CONF_MICROPHONE, | ||||||
|  |                 sample_rate=sample_rate, | ||||||
|  |                 audio_device_issue=True, | ||||||
|  |             )(config) | ||||||
|  |  | ||||||
|  |         # Issues require changing the MicrophoneSource configuration | ||||||
|  |         # - Verifies that each of the enabled channels are available | ||||||
|  |         audio.final_validate_audio_schema( | ||||||
|  |             component_name, | ||||||
|  |             audio_device=CONF_MICROPHONE, | ||||||
|  |             enabled_channels=config[CONF_CHANNELS], | ||||||
|  |             audio_device_issue=False, | ||||||
|  |         )(config) | ||||||
|  |  | ||||||
|  |         return config | ||||||
|  |  | ||||||
|  |     return _validate_audio_compatability | ||||||
|  |  | ||||||
|  |  | ||||||
|  | async def microphone_source_to_code(config): | ||||||
|  |     mic = await cg.get_variable(config[CONF_MICROPHONE]) | ||||||
|  |     mic_source = cg.new_Pvariable( | ||||||
|  |         config[CONF_ID], | ||||||
|  |         mic, | ||||||
|  |         config[CONF_BITS_PER_SAMPLE], | ||||||
|  |         config[CONF_GAIN_FACTOR], | ||||||
|  |     ) | ||||||
|  |     for channel in config[CONF_CHANNELS]: | ||||||
|  |         cg.add(mic_source.add_channel(channel)) | ||||||
|  |     return mic_source | ||||||
|  |  | ||||||
|  |  | ||||||
|  | async def microphone_action(config, action_id, template_arg, args): | ||||||
|     var = cg.new_Pvariable(action_id, template_arg) |     var = cg.new_Pvariable(action_id, template_arg) | ||||||
|     await cg.register_parented(var, config[CONF_ID]) |     await cg.register_parented(var, config[CONF_ID]) | ||||||
|     return var |     return var | ||||||
| @@ -72,15 +180,15 @@ async def media_player_action(config, action_id, template_arg, args): | |||||||
|  |  | ||||||
| automation.register_action( | automation.register_action( | ||||||
|     "microphone.capture", CaptureAction, MICROPHONE_ACTION_SCHEMA |     "microphone.capture", CaptureAction, MICROPHONE_ACTION_SCHEMA | ||||||
| )(media_player_action) | )(microphone_action) | ||||||
|  |  | ||||||
| automation.register_action( | automation.register_action( | ||||||
|     "microphone.stop_capture", StopCaptureAction, MICROPHONE_ACTION_SCHEMA |     "microphone.stop_capture", StopCaptureAction, MICROPHONE_ACTION_SCHEMA | ||||||
| )(media_player_action) | )(microphone_action) | ||||||
|  |  | ||||||
| automation.register_condition( | automation.register_condition( | ||||||
|     "microphone.is_capturing", IsCapturingCondition, MICROPHONE_ACTION_SCHEMA |     "microphone.is_capturing", IsCapturingCondition, MICROPHONE_ACTION_SCHEMA | ||||||
| )(media_player_action) | )(microphone_action) | ||||||
|  |  | ||||||
|  |  | ||||||
| @coroutine_with_priority(100.0) | @coroutine_with_priority(100.0) | ||||||
|   | |||||||
| @@ -1,5 +1,7 @@ | |||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
|  | #include "esphome/components/audio/audio.h" | ||||||
|  |  | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <cstdint> | #include <cstdint> | ||||||
| #include <functional> | #include <functional> | ||||||
| @@ -28,9 +30,13 @@ class Microphone { | |||||||
|   bool is_running() const { return this->state_ == STATE_RUNNING; } |   bool is_running() const { return this->state_ == STATE_RUNNING; } | ||||||
|   bool is_stopped() const { return this->state_ == STATE_STOPPED; } |   bool is_stopped() const { return this->state_ == STATE_STOPPED; } | ||||||
|  |  | ||||||
|  |   audio::AudioStreamInfo get_audio_stream_info() { return this->audio_stream_info_; } | ||||||
|  |  | ||||||
|  protected: |  protected: | ||||||
|   State state_{STATE_STOPPED}; |   State state_{STATE_STOPPED}; | ||||||
|  |  | ||||||
|  |   audio::AudioStreamInfo audio_stream_info_; | ||||||
|  |  | ||||||
|   CallbackManager<void(const std::vector<int16_t> &)> data_callbacks_{}; |   CallbackManager<void(const std::vector<int16_t> &)> data_callbacks_{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										96
									
								
								esphome/components/microphone/microphone_source.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								esphome/components/microphone/microphone_source.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,96 @@ | |||||||
|  | #include "microphone_source.h" | ||||||
|  |  | ||||||
|  | namespace esphome { | ||||||
|  | namespace microphone { | ||||||
|  |  | ||||||
|  | void MicrophoneSource::add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback) { | ||||||
|  |   std::function<void(const std::vector<uint8_t> &)> filtered_callback = | ||||||
|  |       [this, data_callback](const std::vector<uint8_t> &data) { | ||||||
|  |         if (this->enabled_) { | ||||||
|  |           data_callback(this->process_audio_(data)); | ||||||
|  |         } | ||||||
|  |       }; | ||||||
|  |   // Future PR will uncomment this! It requires changing the callback vector to an uint8_t in every component using a | ||||||
|  |   // mic callback. | ||||||
|  |   // this->mic_->add_data_callback(std::move(filtered_callback)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void MicrophoneSource::start() { | ||||||
|  |   this->enabled_ = true; | ||||||
|  |   this->mic_->start(); | ||||||
|  | } | ||||||
|  | void MicrophoneSource::stop() { | ||||||
|  |   this->enabled_ = false; | ||||||
|  |   this->mic_->stop(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::vector<uint8_t> MicrophoneSource::process_audio_(const std::vector<uint8_t> &data) { | ||||||
|  |   // Bit depth conversions are obtained by truncating bits or padding with zeros - no dithering is applied. | ||||||
|  |  | ||||||
|  |   const size_t source_bytes_per_sample = this->mic_->get_audio_stream_info().samples_to_bytes(1); | ||||||
|  |   const size_t source_channels = this->mic_->get_audio_stream_info().get_channels(); | ||||||
|  |  | ||||||
|  |   const size_t source_bytes_per_frame = this->mic_->get_audio_stream_info().frames_to_bytes(1); | ||||||
|  |  | ||||||
|  |   const uint32_t total_frames = this->mic_->get_audio_stream_info().bytes_to_frames(data.size()); | ||||||
|  |   const size_t target_bytes_per_sample = (this->bits_per_sample_ + 7) / 8; | ||||||
|  |   const size_t target_bytes_per_frame = target_bytes_per_sample * this->channels_.count(); | ||||||
|  |  | ||||||
|  |   std::vector<uint8_t> filtered_data; | ||||||
|  |   filtered_data.reserve(target_bytes_per_frame * total_frames); | ||||||
|  |  | ||||||
|  |   const int32_t target_min_value = -(1 << (8 * target_bytes_per_sample - 1)); | ||||||
|  |   const int32_t target_max_value = (1 << (8 * target_bytes_per_sample - 1)) - 1; | ||||||
|  |  | ||||||
|  |   for (size_t frame_index = 0; frame_index < total_frames; ++frame_index) { | ||||||
|  |     for (size_t channel_index = 0; channel_index < source_channels; ++channel_index) { | ||||||
|  |       if (this->channels_.test(channel_index)) { | ||||||
|  |         // Channel's current sample is included in the target mask. Convert bits per sample, if necessary. | ||||||
|  |  | ||||||
|  |         size_t sample_index = frame_index * source_bytes_per_frame + channel_index * source_bytes_per_sample; | ||||||
|  |  | ||||||
|  |         int32_t sample = 0; | ||||||
|  |  | ||||||
|  |         // Copy the data into the most significant bits of the sample variable to ensure the sign bit is correct | ||||||
|  |         uint8_t bit_offset = (4 - source_bytes_per_sample) * 8; | ||||||
|  |         for (int i = 0; i < source_bytes_per_sample; ++i) { | ||||||
|  |           sample |= data[sample_index + i] << bit_offset; | ||||||
|  |           bit_offset += 8; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Shift data back to the least significant bits | ||||||
|  |         if (source_bytes_per_sample >= target_bytes_per_sample) { | ||||||
|  |           // Keep source bytes per sample of data so that the gain multiplication uses all significant bits instead of | ||||||
|  |           // shifting to the target bytes per sample immediately, potentially losing information. | ||||||
|  |           sample >>= (4 - source_bytes_per_sample) * 8;  // ``source_bytes_per_sample`` bytes of valid data | ||||||
|  |         } else { | ||||||
|  |           // Keep padded zeros to match the target bytes per sample | ||||||
|  |           sample >>= (4 - target_bytes_per_sample) * 8;  // ``target_bytes_per_sample`` bytes of valid data | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Apply gain using multiplication | ||||||
|  |         sample *= this->gain_factor_; | ||||||
|  |  | ||||||
|  |         // Match target output bytes by shifting out the least significant bits | ||||||
|  |         if (source_bytes_per_sample > target_bytes_per_sample) { | ||||||
|  |           sample >>= 8 * (source_bytes_per_sample - | ||||||
|  |                           target_bytes_per_sample);  //  ``target_bytes_per_sample`` bytes of valid data | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // Clamp ``sample`` to the target bytes per sample range in case gain multiplication overflows | ||||||
|  |         sample = clamp<int32_t>(sample, target_min_value, target_max_value); | ||||||
|  |  | ||||||
|  |         // Copy ``target_bytes_per_sample`` bytes to the output buffer. | ||||||
|  |         for (int i = 0; i < target_bytes_per_sample; ++i) { | ||||||
|  |           filtered_data.push_back(static_cast<uint8_t>(sample)); | ||||||
|  |           sample >>= 8; | ||||||
|  |         } | ||||||
|  |       } | ||||||
|  |     } | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   return filtered_data; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | }  // namespace microphone | ||||||
|  | }  // namespace esphome | ||||||
							
								
								
									
										63
									
								
								esphome/components/microphone/microphone_source.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								esphome/components/microphone/microphone_source.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | |||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <bitset> | ||||||
|  | #include <cstddef> | ||||||
|  | #include <cstdint> | ||||||
|  | #include <functional> | ||||||
|  | #include <vector> | ||||||
|  | #include "microphone.h" | ||||||
|  |  | ||||||
|  | namespace esphome { | ||||||
|  | namespace microphone { | ||||||
|  |  | ||||||
|  | class MicrophoneSource { | ||||||
|  |   /* | ||||||
|  |    * @brief Helper class that handles converting raw microphone data to a requested format. | ||||||
|  |    * Components requesting microphone audio should register a callback through this class instead of registering a | ||||||
|  |    * callback directly with the microphone if a particular format is required. | ||||||
|  |    * | ||||||
|  |    * Raw microphone data may have a different number of bits per sample and number of channels than the requesting | ||||||
|  |    * component needs. This class handles the conversion by: | ||||||
|  |    *   - Internally adds a callback to receive the raw microphone data | ||||||
|  |    *   - The ``process_audio_`` handles the raw data | ||||||
|  |    *     - Only the channels set in the ``channels_`` bitset are passed through | ||||||
|  |    *     - Passed through samples have the bits per sample converted | ||||||
|  |    *     - A gain factor is optionally applied to increase the volume - audio may clip! | ||||||
|  |    *   - The processed audio is passed to the callback of the component requesting microphone data | ||||||
|  |    *   - It tracks an internal enabled state, so it ignores raw microphone data when the component requesting | ||||||
|  |    *     microphone data is not actively requesting audio. | ||||||
|  |    * | ||||||
|  |    * Note that this class cannot convert sample rates! | ||||||
|  |    */ | ||||||
|  |  public: | ||||||
|  |   MicrophoneSource(Microphone *mic, uint8_t bits_per_sample, int32_t gain_factor) | ||||||
|  |       : mic_(mic), bits_per_sample_(bits_per_sample), gain_factor_(gain_factor) {} | ||||||
|  |  | ||||||
|  |   /// @brief Enables a channel to be processed through the callback. | ||||||
|  |   /// | ||||||
|  |   /// If the microphone component only has reads from one channel, it is always in channel number 0, regardless if it | ||||||
|  |   /// represents left or right. If the microphone reads from both left and right, channel number 0 and 1 represent the | ||||||
|  |   /// left and right channels respectively. | ||||||
|  |   /// | ||||||
|  |   /// @param channel 0-indexed channel number to enable | ||||||
|  |   void add_channel(uint8_t channel) { this->channels_.set(channel); } | ||||||
|  |  | ||||||
|  |   void add_data_callback(std::function<void(const std::vector<uint8_t> &)> &&data_callback); | ||||||
|  |  | ||||||
|  |   void start(); | ||||||
|  |   void stop(); | ||||||
|  |   bool is_running() const { return (this->mic_->is_running() && this->enabled_); } | ||||||
|  |   bool is_stopped() const { return !this->enabled_; } | ||||||
|  |  | ||||||
|  |  protected: | ||||||
|  |   std::vector<uint8_t> process_audio_(const std::vector<uint8_t> &data); | ||||||
|  |  | ||||||
|  |   Microphone *mic_; | ||||||
|  |   uint8_t bits_per_sample_; | ||||||
|  |   std::bitset<8> channels_; | ||||||
|  |   int32_t gain_factor_; | ||||||
|  |   bool enabled_{false}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | }  // namespace microphone | ||||||
|  | }  // namespace esphome | ||||||
		Reference in New Issue
	
	Block a user