mirror of
				https://github.com/esphome/esphome.git
				synced 2025-10-30 22:53:59 +00:00 
			
		
		
		
	Merge branch 'integration' into memory_api
This commit is contained in:
		| @@ -119,6 +119,12 @@ ENUM_SPECIAL_EFFECT = { | ||||
|     "SEPIA": ESP32SpecialEffect.ESP32_SPECIAL_EFFECT_SEPIA, | ||||
| } | ||||
|  | ||||
| camera_fb_location_t = cg.global_ns.enum("camera_fb_location_t") | ||||
| ENUM_FB_LOCATION = { | ||||
|     "PSRAM": cg.global_ns.CAMERA_FB_IN_PSRAM, | ||||
|     "DRAM": cg.global_ns.CAMERA_FB_IN_DRAM, | ||||
| } | ||||
|  | ||||
| # pin assignment | ||||
| CONF_HREF_PIN = "href_pin" | ||||
| CONF_PIXEL_CLOCK_PIN = "pixel_clock_pin" | ||||
| @@ -149,6 +155,7 @@ CONF_MAX_FRAMERATE = "max_framerate" | ||||
| CONF_IDLE_FRAMERATE = "idle_framerate" | ||||
| # frame buffer | ||||
| CONF_FRAME_BUFFER_COUNT = "frame_buffer_count" | ||||
| CONF_FRAME_BUFFER_LOCATION = "frame_buffer_location" | ||||
|  | ||||
| # stream trigger | ||||
| CONF_ON_STREAM_START = "on_stream_start" | ||||
| @@ -230,6 +237,9 @@ CONFIG_SCHEMA = cv.All( | ||||
|                 cv.framerate, cv.Range(min=0, max=1) | ||||
|             ), | ||||
|             cv.Optional(CONF_FRAME_BUFFER_COUNT, default=1): cv.int_range(min=1, max=2), | ||||
|             cv.Optional(CONF_FRAME_BUFFER_LOCATION, default="PSRAM"): cv.enum( | ||||
|                 ENUM_FB_LOCATION, upper=True | ||||
|             ), | ||||
|             cv.Optional(CONF_ON_STREAM_START): automation.validate_automation( | ||||
|                 { | ||||
|                     cv.GenerateID(CONF_TRIGGER_ID): cv.declare_id( | ||||
| @@ -301,6 +311,7 @@ SETTERS = { | ||||
|     CONF_WB_MODE: "set_wb_mode", | ||||
|     # test pattern | ||||
|     CONF_TEST_PATTERN: "set_test_pattern", | ||||
|     CONF_FRAME_BUFFER_LOCATION: "set_frame_buffer_location", | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -328,6 +339,7 @@ async def to_code(config): | ||||
|     else: | ||||
|         cg.add(var.set_idle_update_interval(1000 / config[CONF_IDLE_FRAMERATE])) | ||||
|     cg.add(var.set_frame_buffer_count(config[CONF_FRAME_BUFFER_COUNT])) | ||||
|     cg.add(var.set_frame_buffer_location(config[CONF_FRAME_BUFFER_LOCATION])) | ||||
|     cg.add(var.set_frame_size(config[CONF_RESOLUTION])) | ||||
|  | ||||
|     cg.add_define("USE_CAMERA") | ||||
|   | ||||
| @@ -133,6 +133,7 @@ void ESP32Camera::dump_config() { | ||||
|   ESP_LOGCONFIG(TAG, | ||||
|                 "  JPEG Quality: %u\n" | ||||
|                 "  Framebuffer Count: %u\n" | ||||
|                 "  Framebuffer Location: %s\n" | ||||
|                 "  Contrast: %d\n" | ||||
|                 "  Brightness: %d\n" | ||||
|                 "  Saturation: %d\n" | ||||
| @@ -140,8 +141,9 @@ void ESP32Camera::dump_config() { | ||||
|                 "  Horizontal Mirror: %s\n" | ||||
|                 "  Special Effect: %u\n" | ||||
|                 "  White Balance Mode: %u", | ||||
|                 st.quality, conf.fb_count, st.contrast, st.brightness, st.saturation, ONOFF(st.vflip), | ||||
|                 ONOFF(st.hmirror), st.special_effect, st.wb_mode); | ||||
|                 st.quality, conf.fb_count, this->config_.fb_location == CAMERA_FB_IN_PSRAM ? "PSRAM" : "DRAM", | ||||
|                 st.contrast, st.brightness, st.saturation, ONOFF(st.vflip), ONOFF(st.hmirror), st.special_effect, | ||||
|                 st.wb_mode); | ||||
|   // ESP_LOGCONFIG(TAG, "  Auto White Balance: %u", st.awb); | ||||
|   // ESP_LOGCONFIG(TAG, "  Auto White Balance Gain: %u", st.awb_gain); | ||||
|   ESP_LOGCONFIG(TAG, | ||||
| @@ -350,6 +352,9 @@ void ESP32Camera::set_frame_buffer_count(uint8_t fb_count) { | ||||
|   this->config_.fb_count = fb_count; | ||||
|   this->set_frame_buffer_mode(fb_count > 1 ? CAMERA_GRAB_LATEST : CAMERA_GRAB_WHEN_EMPTY); | ||||
| } | ||||
| void ESP32Camera::set_frame_buffer_location(camera_fb_location_t fb_location) { | ||||
|   this->config_.fb_location = fb_location; | ||||
| } | ||||
|  | ||||
| /* ---------------- public API (specific) ---------------- */ | ||||
| void ESP32Camera::add_image_callback(std::function<void(std::shared_ptr<camera::CameraImage>)> &&callback) { | ||||
|   | ||||
| @@ -152,6 +152,7 @@ class ESP32Camera : public camera::Camera { | ||||
|   /* -- frame buffer */ | ||||
|   void set_frame_buffer_mode(camera_grab_mode_t mode); | ||||
|   void set_frame_buffer_count(uint8_t fb_count); | ||||
|   void set_frame_buffer_location(camera_fb_location_t fb_location); | ||||
|  | ||||
|   /* public API (derivated) */ | ||||
|   void setup() override; | ||||
|   | ||||
| @@ -29,7 +29,21 @@ CONFIG_SCHEMA = ( | ||||
|     .extend( | ||||
|         { | ||||
|             cv.Required(CONF_PIN): pins.gpio_input_pin_schema, | ||||
|             cv.Optional(CONF_USE_INTERRUPT, default=True): cv.boolean, | ||||
|             # Interrupts are disabled by default for bk72xx, ln882x, and rtl87xx platforms | ||||
|             # due to hardware limitations or lack of reliable interrupt support. This ensures | ||||
|             # stable operation on these platforms. Future maintainers should verify platform | ||||
|             # capabilities before changing this default behavior. | ||||
|             cv.SplitDefault( | ||||
|                 CONF_USE_INTERRUPT, | ||||
|                 bk72xx=False, | ||||
|                 esp32=True, | ||||
|                 esp8266=True, | ||||
|                 host=True, | ||||
|                 ln882x=False, | ||||
|                 nrf52=True, | ||||
|                 rp2040=True, | ||||
|                 rtl87xx=False, | ||||
|             ): cv.boolean, | ||||
|             cv.Optional(CONF_INTERRUPT_TYPE, default="ANY"): cv.enum( | ||||
|                 INTERRUPT_TYPES, upper=True | ||||
|             ), | ||||
|   | ||||
| @@ -200,7 +200,7 @@ AudioPipelineState AudioPipeline::process_state() { | ||||
|       if ((this->read_task_handle_ != nullptr) || (this->decode_task_handle_ != nullptr)) { | ||||
|         this->delete_tasks_(); | ||||
|         if (this->hard_stop_) { | ||||
|           // Stop command was sent, so immediately end of the playback | ||||
|           // Stop command was sent, so immediately end the playback | ||||
|           this->speaker_->stop(); | ||||
|           this->hard_stop_ = false; | ||||
|         } else { | ||||
| @@ -210,13 +210,25 @@ AudioPipelineState AudioPipeline::process_state() { | ||||
|       } | ||||
|     } | ||||
|     this->is_playing_ = false; | ||||
|     return AudioPipelineState::STOPPED; | ||||
|     if (!this->speaker_->is_running()) { | ||||
|       return AudioPipelineState::STOPPED; | ||||
|     } else { | ||||
|       this->is_finishing_ = true; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if (this->pause_state_) { | ||||
|     return AudioPipelineState::PAUSED; | ||||
|   } | ||||
|  | ||||
|   if (this->is_finishing_) { | ||||
|     if (!this->speaker_->is_running()) { | ||||
|       this->is_finishing_ = false; | ||||
|     } else { | ||||
|       return AudioPipelineState::PLAYING; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if ((this->read_task_handle_ == nullptr) && (this->decode_task_handle_ == nullptr)) { | ||||
|     // No tasks are running, so the pipeline is stopped. | ||||
|     xEventGroupClearBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP); | ||||
|   | ||||
| @@ -114,6 +114,7 @@ class AudioPipeline { | ||||
|  | ||||
|   bool hard_stop_{false}; | ||||
|   bool is_playing_{false}; | ||||
|   bool is_finishing_{false}; | ||||
|   bool pause_state_{false}; | ||||
|   bool task_stack_in_psram_; | ||||
|  | ||||
|   | ||||
| @@ -35,6 +35,27 @@ void VoiceAssistant::setup() { | ||||
|       temp_ring_buffer->write((void *) data.data(), data.size()); | ||||
|     } | ||||
|   }); | ||||
|  | ||||
| #ifdef USE_MEDIA_PLAYER | ||||
|   if (this->media_player_ != nullptr) { | ||||
|     this->media_player_->add_on_state_callback([this]() { | ||||
|       switch (this->media_player_->state) { | ||||
|         case media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING: | ||||
|           if (this->media_player_response_state_ == MediaPlayerResponseState::URL_SENT) { | ||||
|             // State changed to announcing after receiving the url | ||||
|             this->media_player_response_state_ = MediaPlayerResponseState::PLAYING; | ||||
|           } | ||||
|           break; | ||||
|         default: | ||||
|           if (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING) { | ||||
|             // No longer announcing the TTS response | ||||
|             this->media_player_response_state_ = MediaPlayerResponseState::FINISHED; | ||||
|           } | ||||
|           break; | ||||
|       } | ||||
|     }); | ||||
|   } | ||||
| #endif | ||||
| } | ||||
|  | ||||
| float VoiceAssistant::get_setup_priority() const { return setup_priority::AFTER_CONNECTION; } | ||||
| @@ -223,6 +244,13 @@ void VoiceAssistant::loop() { | ||||
|       msg.wake_word_phrase = this->wake_word_; | ||||
|       this->wake_word_ = ""; | ||||
|  | ||||
|       // Reset media player state tracking | ||||
| #ifdef USE_MEDIA_PLAYER | ||||
|       if (this->media_player_ != nullptr) { | ||||
|         this->media_player_response_state_ = MediaPlayerResponseState::IDLE; | ||||
|       } | ||||
| #endif | ||||
|  | ||||
|       if (this->api_client_ == nullptr || | ||||
|           !this->api_client_->send_message(msg, api::VoiceAssistantRequest::MESSAGE_TYPE)) { | ||||
|         ESP_LOGW(TAG, "Could not request start"); | ||||
| @@ -315,17 +343,10 @@ void VoiceAssistant::loop() { | ||||
| #endif | ||||
| #ifdef USE_MEDIA_PLAYER | ||||
|       if (this->media_player_ != nullptr) { | ||||
|         playing = (this->media_player_->state == media_player::MediaPlayerState::MEDIA_PLAYER_STATE_ANNOUNCING); | ||||
|         playing = (this->media_player_response_state_ == MediaPlayerResponseState::PLAYING); | ||||
|  | ||||
|         if (playing && this->media_player_wait_for_announcement_start_) { | ||||
|           // Announcement has started playing, wait for it to finish | ||||
|           this->media_player_wait_for_announcement_start_ = false; | ||||
|           this->media_player_wait_for_announcement_end_ = true; | ||||
|         } | ||||
|  | ||||
|         if (!playing && this->media_player_wait_for_announcement_end_) { | ||||
|           // Announcement has finished playing | ||||
|           this->media_player_wait_for_announcement_end_ = false; | ||||
|         if (this->media_player_response_state_ == MediaPlayerResponseState::FINISHED) { | ||||
|           this->media_player_response_state_ = MediaPlayerResponseState::IDLE; | ||||
|           this->cancel_timeout("playing"); | ||||
|           ESP_LOGD(TAG, "Announcement finished playing"); | ||||
|           this->set_state_(State::RESPONSE_FINISHED, State::RESPONSE_FINISHED); | ||||
| @@ -556,7 +577,7 @@ void VoiceAssistant::request_stop() { | ||||
|       break; | ||||
|     case State::AWAITING_RESPONSE: | ||||
|       this->signal_stop_(); | ||||
|       // Fallthrough intended to stop a streaming TTS announcement that has potentially started | ||||
|       break; | ||||
|     case State::STREAMING_RESPONSE: | ||||
| #ifdef USE_MEDIA_PLAYER | ||||
|       // Stop any ongoing media player announcement | ||||
| @@ -566,6 +587,10 @@ void VoiceAssistant::request_stop() { | ||||
|             .set_announcement(true) | ||||
|             .perform(); | ||||
|       } | ||||
|       if (this->started_streaming_tts_) { | ||||
|         // Haven't reached the TTS_END stage, so send the stop signal to HA. | ||||
|         this->signal_stop_(); | ||||
|       } | ||||
| #endif | ||||
|       break; | ||||
|     case State::RESPONSE_FINISHED: | ||||
| @@ -649,13 +674,16 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { | ||||
|       if (this->media_player_ != nullptr) { | ||||
|         for (const auto &arg : msg.data) { | ||||
|           if ((arg.name == "tts_start_streaming") && (arg.value == "1") && !this->tts_response_url_.empty()) { | ||||
|             this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT; | ||||
|  | ||||
|             this->media_player_->make_call().set_media_url(this->tts_response_url_).set_announcement(true).perform(); | ||||
|  | ||||
|             this->media_player_wait_for_announcement_start_ = true; | ||||
|             this->media_player_wait_for_announcement_end_ = false; | ||||
|             this->started_streaming_tts_ = true; | ||||
|             this->start_playback_timeout_(); | ||||
|  | ||||
|             tts_url_for_trigger = this->tts_response_url_; | ||||
|             this->tts_response_url_.clear();  // Reset streaming URL | ||||
|             this->set_state_(State::STREAMING_RESPONSE, State::STREAMING_RESPONSE); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
| @@ -714,18 +742,22 @@ void VoiceAssistant::on_event(const api::VoiceAssistantEventResponse &msg) { | ||||
|       this->defer([this, url]() { | ||||
| #ifdef USE_MEDIA_PLAYER | ||||
|         if ((this->media_player_ != nullptr) && (!this->started_streaming_tts_)) { | ||||
|           this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT; | ||||
|  | ||||
|           this->media_player_->make_call().set_media_url(url).set_announcement(true).perform(); | ||||
|  | ||||
|           this->media_player_wait_for_announcement_start_ = true; | ||||
|           this->media_player_wait_for_announcement_end_ = false; | ||||
|           // Start the playback timeout, as the media player state isn't immediately updated | ||||
|           this->start_playback_timeout_(); | ||||
|         } | ||||
|         this->started_streaming_tts_ = false;  // Helps indicate reaching the TTS_END stage | ||||
| #endif | ||||
|         this->tts_end_trigger_->trigger(url); | ||||
|       }); | ||||
|       State new_state = this->local_output_ ? State::STREAMING_RESPONSE : State::IDLE; | ||||
|       this->set_state_(new_state, new_state); | ||||
|       if (new_state != this->state_) { | ||||
|         // Don't needlessly change the state. The intent progress stage may have already changed the state to streaming | ||||
|         // response. | ||||
|         this->set_state_(new_state, new_state); | ||||
|       } | ||||
|       break; | ||||
|     } | ||||
|     case api::enums::VOICE_ASSISTANT_RUN_END: { | ||||
| @@ -876,6 +908,9 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) | ||||
| #ifdef USE_MEDIA_PLAYER | ||||
|   if (this->media_player_ != nullptr) { | ||||
|     this->tts_start_trigger_->trigger(msg.text); | ||||
|  | ||||
|     this->media_player_response_state_ = MediaPlayerResponseState::URL_SENT; | ||||
|  | ||||
|     if (!msg.preannounce_media_id.empty()) { | ||||
|       this->media_player_->make_call().set_media_url(msg.preannounce_media_id).set_announcement(true).perform(); | ||||
|     } | ||||
| @@ -887,9 +922,6 @@ void VoiceAssistant::on_announce(const api::VoiceAssistantAnnounceRequest &msg) | ||||
|         .perform(); | ||||
|     this->continue_conversation_ = msg.start_conversation; | ||||
|  | ||||
|     this->media_player_wait_for_announcement_start_ = true; | ||||
|     this->media_player_wait_for_announcement_end_ = false; | ||||
|     // Start the playback timeout, as the media player state isn't immediately updated | ||||
|     this->start_playback_timeout_(); | ||||
|  | ||||
|     if (this->continuous_) { | ||||
|   | ||||
| @@ -90,6 +90,15 @@ struct Configuration { | ||||
|   uint32_t max_active_wake_words; | ||||
| }; | ||||
|  | ||||
| #ifdef USE_MEDIA_PLAYER | ||||
| enum class MediaPlayerResponseState { | ||||
|   IDLE, | ||||
|   URL_SENT, | ||||
|   PLAYING, | ||||
|   FINISHED, | ||||
| }; | ||||
| #endif | ||||
|  | ||||
| class VoiceAssistant : public Component { | ||||
|  public: | ||||
|   VoiceAssistant(); | ||||
| @@ -272,8 +281,8 @@ class VoiceAssistant : public Component { | ||||
|   media_player::MediaPlayer *media_player_{nullptr}; | ||||
|   std::string tts_response_url_{""}; | ||||
|   bool started_streaming_tts_{false}; | ||||
|   bool media_player_wait_for_announcement_start_{false}; | ||||
|   bool media_player_wait_for_announcement_end_{false}; | ||||
|  | ||||
|   MediaPlayerResponseState media_player_response_state_{MediaPlayerResponseState::IDLE}; | ||||
| #endif | ||||
|  | ||||
|   bool local_output_{false}; | ||||
|   | ||||
| @@ -12,7 +12,7 @@ platformio==6.1.18  # When updating platformio, also update /docker/Dockerfile | ||||
| esptool==4.9.0 | ||||
| click==8.1.7 | ||||
| esphome-dashboard==20250514.0 | ||||
| aioesphomeapi==37.0.0 | ||||
| aioesphomeapi==37.0.1 | ||||
| zeroconf==0.147.0 | ||||
| puremagic==1.30 | ||||
| ruamel.yaml==0.18.14 # dashboard_import | ||||
|   | ||||
| @@ -22,6 +22,7 @@ esp32_camera: | ||||
|   power_down_pin: 1 | ||||
|   resolution: 640x480 | ||||
|   jpeg_quality: 10 | ||||
|   frame_buffer_location: PSRAM | ||||
|   on_image: | ||||
|     then: | ||||
|       - lambda: |- | ||||
|   | ||||
		Reference in New Issue
	
	Block a user