diff --git a/esphome/components/i2s_audio/microphone/i2s_audio_microphone.cpp b/esphome/components/i2s_audio/microphone/i2s_audio_microphone.cpp
index 44c73eb8fd..ec2fe258c9 100644
--- a/esphome/components/i2s_audio/microphone/i2s_audio_microphone.cpp
+++ b/esphome/components/i2s_audio/microphone/i2s_audio_microphone.cpp
@@ -37,6 +37,8 @@ void I2SAudioMicrophone::setup() {
 void I2SAudioMicrophone::start() {
   if (this->is_failed())
     return;
+  if (this->state_ == microphone::STATE_RUNNING)
+    return;  // Already running
   this->state_ = microphone::STATE_STARTING;
 }
 void I2SAudioMicrophone::start_() {
diff --git a/esphome/components/voice_assistant/voice_assistant.cpp b/esphome/components/voice_assistant/voice_assistant.cpp
index 12fbdc97b4..27dc201073 100644
--- a/esphome/components/voice_assistant/voice_assistant.cpp
+++ b/esphome/components/voice_assistant/voice_assistant.cpp
@@ -287,6 +287,7 @@ void VoiceAssistant::loop() {
           }
         }
         if (this->wait_for_stream_end_) {
+          this->cancel_timeout("playing");
           break;  // We dont want to timeout here as the STREAM_END event will take care of that.
         }
         playing = this->speaker_->is_running();