mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-30 22:53:59 +00:00 
			
		
		
		
	Add more debugging logs to microWakeWord (#6238)
This commit is contained in:
		| @@ -261,7 +261,7 @@ CONFIG_SCHEMA = cv.All( | ||||
|         { | ||||
|             cv.GenerateID(): cv.declare_id(MicroWakeWord), | ||||
|             cv.GenerateID(CONF_MICROPHONE): cv.use_id(microphone.Microphone), | ||||
|             cv.Optional(CONF_PROBABILITY_CUTOFF): cv.float_, | ||||
|             cv.Optional(CONF_PROBABILITY_CUTOFF): cv.percentage, | ||||
|             cv.Optional(CONF_SLIDING_WINDOW_AVERAGE_SIZE): cv.positive_int, | ||||
|             cv.Optional(CONF_ON_WAKE_WORD_DETECTED): automation.validate_automation( | ||||
|                 single=True | ||||
|   | ||||
| @@ -53,8 +53,15 @@ static const LogString *micro_wake_word_state_to_string(State state) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| void MicroWakeWord::dump_config() { | ||||
|   ESP_LOGCONFIG(TAG, "microWakeWord:"); | ||||
|   ESP_LOGCONFIG(TAG, "  Wake Word: %s", this->get_wake_word().c_str()); | ||||
|   ESP_LOGCONFIG(TAG, "  Probability cutoff: %.3f", this->probability_cutoff_); | ||||
|   ESP_LOGCONFIG(TAG, "  Sliding window size: %d", this->sliding_window_average_size_); | ||||
| } | ||||
|  | ||||
| void MicroWakeWord::setup() { | ||||
|   ESP_LOGCONFIG(TAG, "Setting up Micro Wake Word..."); | ||||
|   ESP_LOGCONFIG(TAG, "Setting up microWakeWord..."); | ||||
|  | ||||
|   if (!this->initialize_models()) { | ||||
|     ESP_LOGE(TAG, "Failed to initialize models"); | ||||
| @@ -63,7 +70,7 @@ void MicroWakeWord::setup() { | ||||
|   } | ||||
|  | ||||
|   ExternalRAMAllocator<int16_t> allocator(ExternalRAMAllocator<int16_t>::ALLOW_FAILURE); | ||||
|   this->input_buffer_ = allocator.allocate(NEW_SAMPLES_TO_GET); | ||||
|   this->input_buffer_ = allocator.allocate(INPUT_BUFFER_SIZE * sizeof(int16_t)); | ||||
|   if (this->input_buffer_ == nullptr) { | ||||
|     ESP_LOGW(TAG, "Could not allocate input buffer"); | ||||
|     this->mark_failed(); | ||||
| @@ -81,7 +88,7 @@ void MicroWakeWord::setup() { | ||||
| } | ||||
|  | ||||
| int MicroWakeWord::read_microphone_() { | ||||
|   size_t bytes_read = this->microphone_->read(this->input_buffer_, NEW_SAMPLES_TO_GET * sizeof(int16_t)); | ||||
|   size_t bytes_read = this->microphone_->read(this->input_buffer_, INPUT_BUFFER_SIZE * sizeof(int16_t)); | ||||
|   if (bytes_read == 0) { | ||||
|     return 0; | ||||
|   } | ||||
| @@ -279,11 +286,6 @@ bool MicroWakeWord::initialize_models() { | ||||
| } | ||||
|  | ||||
| bool MicroWakeWord::update_features_() { | ||||
|   // Verify we have enough samples for a feature slice | ||||
|   if (!this->slice_available_()) { | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   // Retrieve strided audio samples | ||||
|   int16_t *audio_samples = nullptr; | ||||
|   if (!this->stride_audio_samples_(&audio_samples)) { | ||||
| @@ -369,20 +371,36 @@ void MicroWakeWord::set_sliding_window_average_size(size_t size) { | ||||
| bool MicroWakeWord::slice_available_() { | ||||
|   size_t available = this->ring_buffer_->available(); | ||||
|  | ||||
|   size_t free = this->ring_buffer_->free(); | ||||
|  | ||||
|   if (free < NEW_SAMPLES_TO_GET * sizeof(int16_t)) { | ||||
|     // If the ring buffer is within one audio slice of being full, then wake word detection will have issues. | ||||
|     // If this is constantly occuring, then some possibilities why are | ||||
|     //  1) there are too many other slow components configured | ||||
|     //  2) the ESP32 isn't fast enough; e.g., an ESP32 is much slower than an ESP32-S3 at inferences. | ||||
|     //  3) the model is too large | ||||
|     //  4) the model uses operations that are not optimized | ||||
|     ESP_LOGW(TAG, | ||||
|              "Audio buffer is nearly full. Wake word detection may be less accurate and have slower reponse times. " | ||||
| #if !defined(USE_ESP32_VARIANT_ESP32S3) | ||||
|              "microWakeWord is designed for the ESP32-S3. The current platform is too slow for this model." | ||||
| #endif | ||||
|     ); | ||||
|   } | ||||
|  | ||||
|   return available > (NEW_SAMPLES_TO_GET * sizeof(int16_t)); | ||||
| } | ||||
|  | ||||
| bool MicroWakeWord::stride_audio_samples_(int16_t **audio_samples) { | ||||
|   if (!this->slice_available_()) { | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   // Copy 320 bytes (160 samples over 10 ms) into preprocessor_audio_buffer_ from history in | ||||
|   // preprocessor_stride_buffer_ | ||||
|   memcpy((void *) (this->preprocessor_audio_buffer_), (void *) (this->preprocessor_stride_buffer_), | ||||
|          HISTORY_SAMPLES_TO_KEEP * sizeof(int16_t)); | ||||
|  | ||||
|   if (this->ring_buffer_->available() < NEW_SAMPLES_TO_GET * sizeof(int16_t)) { | ||||
|     ESP_LOGD(TAG, "Audio Buffer not full enough"); | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   // Copy 640 bytes (320 samples over 20 ms) from the ring buffer | ||||
|   // The first 320 bytes (160 samples over 10 ms) will be from history | ||||
|   size_t bytes_read = this->ring_buffer_->read((void *) (this->preprocessor_audio_buffer_ + HISTORY_SAMPLES_TO_KEEP), | ||||
|   | ||||
| @@ -66,6 +66,7 @@ class MicroWakeWord : public Component { | ||||
|   void setup() override; | ||||
|   void loop() override; | ||||
|   float get_setup_priority() const override; | ||||
|   void dump_config() override; | ||||
|  | ||||
|   void start(); | ||||
|   void stop(); | ||||
| @@ -74,6 +75,8 @@ class MicroWakeWord : public Component { | ||||
|  | ||||
|   bool initialize_models(); | ||||
|  | ||||
|   std::string get_wake_word() { return this->wake_word_; } | ||||
|  | ||||
|   // Increasing either of these will reduce the rate of false acceptances while increasing the false rejection rate | ||||
|   void set_probability_cutoff(float probability_cutoff) { this->probability_cutoff_ = probability_cutoff; } | ||||
|   void set_sliding_window_average_size(size_t size); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user