From 84836f15db00e8eecc73fa06b169fa7265ed653b Mon Sep 17 00:00:00 2001 From: Kevin Ahrendt Date: Mon, 10 Feb 2025 13:00:23 -0600 Subject: [PATCH] [speaker] Media Player Components PR9 (#8171) Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com> --- CODEOWNERS | 1 + esphome/components/media_player/__init__.py | 59 +- esphome/components/media_player/automation.h | 13 +- .../components/media_player/media_player.cpp | 8 + .../components/media_player/media_player.h | 12 +- .../speaker/media_player/__init__.py | 458 ++++++++++++++ .../speaker/media_player/audio_pipeline.cpp | 560 +++++++++++++++++ .../speaker/media_player/audio_pipeline.h | 158 +++++ .../speaker/media_player/automation.h | 26 + .../media_player/speaker_media_player.cpp | 577 ++++++++++++++++++ .../media_player/speaker_media_player.h | 160 +++++ tests/components/media_player/common.yaml | 2 + .../speaker/common-media_player.yaml | 12 + .../speaker/media_player.esp32-idf.yaml | 9 + .../speaker/media_player.esp32-s3-idf.yaml | 9 + 15 files changed, 2043 insertions(+), 21 deletions(-) create mode 100644 esphome/components/speaker/media_player/__init__.py create mode 100644 esphome/components/speaker/media_player/audio_pipeline.cpp create mode 100644 esphome/components/speaker/media_player/audio_pipeline.h create mode 100644 esphome/components/speaker/media_player/automation.h create mode 100644 esphome/components/speaker/media_player/speaker_media_player.cpp create mode 100644 esphome/components/speaker/media_player/speaker_media_player.h create mode 100644 tests/components/speaker/common-media_player.yaml create mode 100644 tests/components/speaker/media_player.esp32-idf.yaml create mode 100644 tests/components/speaker/media_player.esp32-s3-idf.yaml diff --git a/CODEOWNERS b/CODEOWNERS index d4b3d7eff9..26e36befe5 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -390,6 +390,7 @@ esphome/components/sn74hc165/* @jesserockz esphome/components/socket/* @esphome/core esphome/components/sonoff_d1/* @anatoly-savchenkov esphome/components/speaker/* @jesserockz @kahrendt +esphome/components/speaker/media_player/* @kahrendt @synesthesiam esphome/components/spi/* @clydebarrow @esphome/core esphome/components/spi_device/* @clydebarrow esphome/components/spi_led_strip/* @clydebarrow diff --git a/esphome/components/media_player/__init__.py b/esphome/components/media_player/__init__.py index a46b30db29..b2543ac05f 100644 --- a/esphome/components/media_player/__init__.py +++ b/esphome/components/media_player/__init__.py @@ -1,5 +1,4 @@ from esphome import automation -from esphome.automation import maybe_simple_id import esphome.codegen as cg import esphome.config_validation as cv from esphome.const import ( @@ -21,6 +20,16 @@ media_player_ns = cg.esphome_ns.namespace("media_player") MediaPlayer = media_player_ns.class_("MediaPlayer") +MediaPlayerSupportedFormat = media_player_ns.struct("MediaPlayerSupportedFormat") + +MediaPlayerFormatPurpose = media_player_ns.enum( + "MediaPlayerFormatPurpose", is_class=True +) +MEDIA_PLAYER_FORMAT_PURPOSE_ENUM = { + "default": MediaPlayerFormatPurpose.PURPOSE_DEFAULT, + "announcement": MediaPlayerFormatPurpose.PURPOSE_ANNOUNCEMENT, +} + PlayAction = media_player_ns.class_( "PlayAction", automation.Action, cg.Parented.template(MediaPlayer) @@ -47,7 +56,7 @@ VolumeSetAction = media_player_ns.class_( "VolumeSetAction", automation.Action, cg.Parented.template(MediaPlayer) ) - +CONF_ANNOUNCEMENT = "announcement" CONF_ON_PLAY = "on_play" CONF_ON_PAUSE = "on_pause" CONF_ON_ANNOUNCEMENT = "on_announcement" @@ -125,7 +134,16 @@ MEDIA_PLAYER_SCHEMA = cv.ENTITY_BASE_SCHEMA.extend( ) -MEDIA_PLAYER_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(MediaPlayer)}) +MEDIA_PLAYER_ACTION_SCHEMA = cv.Schema( + { + cv.GenerateID(): cv.use_id(MediaPlayer), + cv.Optional(CONF_ANNOUNCEMENT, default=False): cv.templatable(cv.boolean), + } +) + +MEDIA_PLAYER_CONDITION_SCHEMA = automation.maybe_simple_id( + {cv.GenerateID(): cv.use_id(MediaPlayer)} +) @automation.register_action( @@ -135,6 +153,7 @@ MEDIA_PLAYER_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(MediaPl { cv.GenerateID(): cv.use_id(MediaPlayer), cv.Required(CONF_MEDIA_URL): cv.templatable(cv.url), + cv.Optional(CONF_ANNOUNCEMENT, default=False): cv.templatable(cv.boolean), }, key=CONF_MEDIA_URL, ), @@ -143,7 +162,9 @@ async def media_player_play_media_action(config, action_id, template_arg, args): var = cg.new_Pvariable(action_id, template_arg) await cg.register_parented(var, config[CONF_ID]) media_url = await cg.templatable(config[CONF_MEDIA_URL], args, cg.std_string) + announcement = await cg.templatable(config[CONF_ANNOUNCEMENT], args, cg.bool_) cg.add(var.set_media_url(media_url)) + cg.add(var.set_announcement(announcement)) return var @@ -161,19 +182,27 @@ async def media_player_play_media_action(config, action_id, template_arg, args): @automation.register_action( "media_player.volume_down", VolumeDownAction, MEDIA_PLAYER_ACTION_SCHEMA ) -@automation.register_condition( - "media_player.is_idle", IsIdleCondition, MEDIA_PLAYER_ACTION_SCHEMA -) -@automation.register_condition( - "media_player.is_paused", IsPausedCondition, MEDIA_PLAYER_ACTION_SCHEMA -) -@automation.register_condition( - "media_player.is_playing", IsPlayingCondition, MEDIA_PLAYER_ACTION_SCHEMA -) -@automation.register_condition( - "media_player.is_announcing", IsAnnouncingCondition, MEDIA_PLAYER_ACTION_SCHEMA -) async def media_player_action(config, action_id, template_arg, args): + var = cg.new_Pvariable(action_id, template_arg) + await cg.register_parented(var, config[CONF_ID]) + announcement = await cg.templatable(config[CONF_ANNOUNCEMENT], args, cg.bool_) + cg.add(var.set_announcement(announcement)) + return var + + +@automation.register_condition( + "media_player.is_idle", IsIdleCondition, MEDIA_PLAYER_CONDITION_SCHEMA +) +@automation.register_condition( + "media_player.is_paused", IsPausedCondition, MEDIA_PLAYER_CONDITION_SCHEMA +) +@automation.register_condition( + "media_player.is_playing", IsPlayingCondition, MEDIA_PLAYER_CONDITION_SCHEMA +) +@automation.register_condition( + "media_player.is_announcing", IsAnnouncingCondition, MEDIA_PLAYER_CONDITION_SCHEMA +) +async def media_player_condition(config, action_id, template_arg, args): var = cg.new_Pvariable(action_id, template_arg) await cg.register_parented(var, config[CONF_ID]) return var diff --git a/esphome/components/media_player/automation.h b/esphome/components/media_player/automation.h index 7b9220c4a5..422c224a85 100644 --- a/esphome/components/media_player/automation.h +++ b/esphome/components/media_player/automation.h @@ -10,7 +10,10 @@ namespace media_player { template class MediaPlayerCommandAction : public Action, public Parented { public: - void play(Ts... x) override { this->parent_->make_call().set_command(Command).perform(); } + TEMPLATABLE_VALUE(bool, announcement); + void play(Ts... x) override { + this->parent_->make_call().set_command(Command).set_announcement(this->announcement_.value(x...)).perform(); + } }; template @@ -28,7 +31,13 @@ using VolumeDownAction = MediaPlayerCommandAction class PlayMediaAction : public Action, public Parented { TEMPLATABLE_VALUE(std::string, media_url) - void play(Ts... x) override { this->parent_->make_call().set_media_url(this->media_url_.value(x...)).perform(); } + TEMPLATABLE_VALUE(bool, announcement) + void play(Ts... x) override { + this->parent_->make_call() + .set_media_url(this->media_url_.value(x...)) + .set_announcement(this->announcement_.value(x...)) + .perform(); + } }; template class VolumeSetAction : public Action, public Parented { diff --git a/esphome/components/media_player/media_player.cpp b/esphome/components/media_player/media_player.cpp index b5190d8573..01304d9135 100644 --- a/esphome/components/media_player/media_player.cpp +++ b/esphome/components/media_player/media_player.cpp @@ -41,6 +41,14 @@ const char *media_player_command_to_string(MediaPlayerCommand command) { return "VOLUME_UP"; case MEDIA_PLAYER_COMMAND_VOLUME_DOWN: return "VOLUME_DOWN"; + case MEDIA_PLAYER_COMMAND_ENQUEUE: + return "ENQUEUE"; + case MEDIA_PLAYER_COMMAND_REPEAT_ONE: + return "REPEAT_ONE"; + case MEDIA_PLAYER_COMMAND_REPEAT_OFF: + return "REPEAT_OFF"; + case MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST: + return "CLEAR_PLAYLIST"; default: return "UNKNOWN"; } diff --git a/esphome/components/media_player/media_player.h b/esphome/components/media_player/media_player.h index 78b3ed6216..ee5889901c 100644 --- a/esphome/components/media_player/media_player.h +++ b/esphome/components/media_player/media_player.h @@ -24,6 +24,10 @@ enum MediaPlayerCommand : uint8_t { MEDIA_PLAYER_COMMAND_TOGGLE = 5, MEDIA_PLAYER_COMMAND_VOLUME_UP = 6, MEDIA_PLAYER_COMMAND_VOLUME_DOWN = 7, + MEDIA_PLAYER_COMMAND_ENQUEUE = 8, + MEDIA_PLAYER_COMMAND_REPEAT_ONE = 9, + MEDIA_PLAYER_COMMAND_REPEAT_OFF = 10, + MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST = 11, }; const char *media_player_command_to_string(MediaPlayerCommand command); @@ -72,10 +76,10 @@ class MediaPlayerCall { void perform(); - const optional &get_command() const { return command_; } - const optional &get_media_url() const { return media_url_; } - const optional &get_volume() const { return volume_; } - const optional &get_announcement() const { return announcement_; } + const optional &get_command() const { return this->command_; } + const optional &get_media_url() const { return this->media_url_; } + const optional &get_volume() const { return this->volume_; } + const optional &get_announcement() const { return this->announcement_; } protected: void validate_(); diff --git a/esphome/components/speaker/media_player/__init__.py b/esphome/components/speaker/media_player/__init__.py new file mode 100644 index 0000000000..14b72cacc0 --- /dev/null +++ b/esphome/components/speaker/media_player/__init__.py @@ -0,0 +1,458 @@ +"""Speaker Media Player Setup.""" + +import hashlib +import logging +from pathlib import Path + +from esphome import automation, external_files +import esphome.codegen as cg +from esphome.components import audio, esp32, media_player, speaker +import esphome.config_validation as cv +from esphome.const import ( + CONF_BUFFER_SIZE, + CONF_FILE, + CONF_FILES, + CONF_FORMAT, + CONF_ID, + CONF_NUM_CHANNELS, + CONF_PATH, + CONF_RAW_DATA_ID, + CONF_SAMPLE_RATE, + CONF_SPEAKER, + CONF_TASK_STACK_IN_PSRAM, + CONF_TYPE, + CONF_URL, +) +from esphome.core import CORE, HexInt +from esphome.core.entity_helpers import inherit_property_from +from esphome.external_files import download_content + +_LOGGER = logging.getLogger(__name__) + +AUTO_LOAD = ["audio", "psram"] + +CODEOWNERS = ["@kahrendt", "@synesthesiam"] +DOMAIN = "media_player" + +TYPE_LOCAL = "local" +TYPE_WEB = "web" + +CONF_ANNOUNCEMENT = "announcement" +CONF_ANNOUNCEMENT_PIPELINE = "announcement_pipeline" +CONF_CODEC_SUPPORT_ENABLED = "codec_support_enabled" +CONF_ENQUEUE = "enqueue" +CONF_MEDIA_FILE = "media_file" +CONF_MEDIA_PIPELINE = "media_pipeline" +CONF_ON_MUTE = "on_mute" +CONF_ON_UNMUTE = "on_unmute" +CONF_ON_VOLUME = "on_volume" +CONF_STREAM = "stream" +CONF_VOLUME_INCREMENT = "volume_increment" +CONF_VOLUME_MIN = "volume_min" +CONF_VOLUME_MAX = "volume_max" + + +speaker_ns = cg.esphome_ns.namespace("speaker") +SpeakerMediaPlayer = speaker_ns.class_( + "SpeakerMediaPlayer", + media_player.MediaPlayer, + cg.Component, +) + +AudioPipeline = speaker_ns.class_("AudioPipeline") +AudioPipelineType = speaker_ns.enum("AudioPipelineType", is_class=True) +AUDIO_PIPELINE_TYPE_ENUM = { + "MEDIA": AudioPipelineType.MEDIA, + "ANNOUNCEMENT": AudioPipelineType.ANNOUNCEMENT, +} + +PlayOnDeviceMediaAction = speaker_ns.class_( + "PlayOnDeviceMediaAction", + automation.Action, + cg.Parented.template(SpeakerMediaPlayer), +) +StopStreamAction = speaker_ns.class_( + "StopStreamAction", automation.Action, cg.Parented.template(SpeakerMediaPlayer) +) + + +def _compute_local_file_path(value: dict) -> Path: + url = value[CONF_URL] + h = hashlib.new("sha256") + h.update(url.encode()) + key = h.hexdigest()[:8] + base_dir = external_files.compute_local_file_dir(DOMAIN) + _LOGGER.debug("_compute_local_file_path: base_dir=%s", base_dir / key) + return base_dir / key + + +def _download_web_file(value): + url = value[CONF_URL] + path = _compute_local_file_path(value) + + download_content(url, path) + _LOGGER.debug("download_web_file: path=%s", path) + return value + + +# Returns a media_player.MediaPlayerSupportedFormat struct with the configured +# format, sample rate, number of channels, purpose, and bytes per sample +def _get_supported_format_struct(pipeline, type): + args = [ + media_player.MediaPlayerSupportedFormat, + ] + + if pipeline[CONF_FORMAT] == "FLAC": + args.append(("format", "flac")) + elif pipeline[CONF_FORMAT] == "MP3": + args.append(("format", "mp3")) + elif pipeline[CONF_FORMAT] == "WAV": + args.append(("format", "wav")) + + args.append(("sample_rate", pipeline[CONF_SAMPLE_RATE])) + args.append(("num_channels", pipeline[CONF_NUM_CHANNELS])) + + if type == "MEDIA": + args.append( + ( + "purpose", + media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["default"], + ) + ) + elif type == "ANNOUNCEMENT": + args.append( + ( + "purpose", + media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["announcement"], + ) + ) + if pipeline[CONF_FORMAT] != "MP3": + args.append(("sample_bytes", 2)) + + return cg.StructInitializer(*args) + + +def _file_schema(value): + if isinstance(value, str): + return _validate_file_shorthand(value) + return TYPED_FILE_SCHEMA(value) + + +def _read_audio_file_and_type(file_config): + conf_file = file_config[CONF_FILE] + file_source = conf_file[CONF_TYPE] + if file_source == TYPE_LOCAL: + path = CORE.relative_config_path(conf_file[CONF_PATH]) + elif file_source == TYPE_WEB: + path = _compute_local_file_path(conf_file) + else: + raise cv.Invalid("Unsupported file source.") + + with open(path, "rb") as f: + data = f.read() + + import puremagic + + file_type: str = puremagic.from_string(data) + if file_type.startswith("."): + file_type = file_type[1:] + + media_file_type = audio.AUDIO_FILE_TYPE_ENUM["NONE"] + if file_type in ("wav"): + media_file_type = audio.AUDIO_FILE_TYPE_ENUM["WAV"] + elif file_type in ("mp3", "mpeg", "mpga"): + media_file_type = audio.AUDIO_FILE_TYPE_ENUM["MP3"] + elif file_type in ("flac"): + media_file_type = audio.AUDIO_FILE_TYPE_ENUM["FLAC"] + + return data, media_file_type + + +def _validate_file_shorthand(value): + value = cv.string_strict(value) + if value.startswith("http://") or value.startswith("https://"): + return _file_schema( + { + CONF_TYPE: TYPE_WEB, + CONF_URL: value, + } + ) + return _file_schema( + { + CONF_TYPE: TYPE_LOCAL, + CONF_PATH: value, + } + ) + + +def _validate_pipeline(config): + # Inherit transcoder settings from speaker if not manually set + inherit_property_from(CONF_NUM_CHANNELS, CONF_SPEAKER)(config) + inherit_property_from(CONF_SAMPLE_RATE, CONF_SPEAKER)(config) + + # Validate the transcoder settings is compatible with the speaker + audio.final_validate_audio_schema( + "speaker media_player", + audio_device=CONF_SPEAKER, + bits_per_sample=16, + channels=config.get(CONF_NUM_CHANNELS), + sample_rate=config.get(CONF_SAMPLE_RATE), + )(config) + + return config + + +def _validate_repeated_speaker(config): + if (announcement_config := config.get(CONF_ANNOUNCEMENT_PIPELINE)) and ( + media_config := config.get(CONF_MEDIA_PIPELINE) + ): + if announcement_config[CONF_SPEAKER] == media_config[CONF_SPEAKER]: + raise cv.Invalid( + "The announcement and media pipelines cannot use the same speaker. Use the `mixer` speaker component to create two source speakers." + ) + + return config + + +def _validate_supported_local_file(config): + for file_config in config.get(CONF_FILES, []): + _, media_file_type = _read_audio_file_and_type(file_config) + if str(media_file_type) == str(audio.AUDIO_FILE_TYPE_ENUM["NONE"]): + raise cv.Invalid("Unsupported local media file.") + if not config[CONF_CODEC_SUPPORT_ENABLED] and str(media_file_type) != str( + audio.AUDIO_FILE_TYPE_ENUM["WAV"] + ): + # Only wav files are supported + raise cv.Invalid( + f"Unsupported local media file type, set {CONF_CODEC_SUPPORT_ENABLED} to true or convert the media file to wav" + ) + + return config + + +LOCAL_SCHEMA = cv.Schema( + { + cv.Required(CONF_PATH): cv.file_, + } +) + +WEB_SCHEMA = cv.All( + { + cv.Required(CONF_URL): cv.url, + }, + _download_web_file, +) + + +TYPED_FILE_SCHEMA = cv.typed_schema( + { + TYPE_LOCAL: LOCAL_SCHEMA, + TYPE_WEB: WEB_SCHEMA, + }, +) + + +MEDIA_FILE_TYPE_SCHEMA = cv.Schema( + { + cv.Required(CONF_ID): cv.declare_id(audio.AudioFile), + cv.Required(CONF_FILE): _file_schema, + cv.GenerateID(CONF_RAW_DATA_ID): cv.declare_id(cg.uint8), + } +) + +PIPELINE_SCHEMA = cv.Schema( + { + cv.GenerateID(): cv.declare_id(AudioPipeline), + cv.Required(CONF_SPEAKER): cv.use_id(speaker.Speaker), + cv.Optional(CONF_FORMAT, default="FLAC"): cv.enum(audio.AUDIO_FILE_TYPE_ENUM), + cv.Optional(CONF_SAMPLE_RATE): cv.int_range(min=1), + cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2), + } +) + +CONFIG_SCHEMA = cv.All( + media_player.MEDIA_PLAYER_SCHEMA.extend( + { + cv.GenerateID(): cv.declare_id(SpeakerMediaPlayer), + cv.Required(CONF_ANNOUNCEMENT_PIPELINE): PIPELINE_SCHEMA, + cv.Optional(CONF_MEDIA_PIPELINE): PIPELINE_SCHEMA, + cv.Optional(CONF_BUFFER_SIZE, default=1000000): cv.int_range( + min=4000, max=4000000 + ), + cv.Optional(CONF_CODEC_SUPPORT_ENABLED, default=True): cv.boolean, + cv.Optional(CONF_FILES): cv.ensure_list(MEDIA_FILE_TYPE_SCHEMA), + cv.Optional(CONF_TASK_STACK_IN_PSRAM, default=False): cv.boolean, + cv.Optional(CONF_VOLUME_INCREMENT, default=0.05): cv.percentage, + cv.Optional(CONF_VOLUME_MAX, default=1.0): cv.percentage, + cv.Optional(CONF_VOLUME_MIN, default=0.0): cv.percentage, + cv.Optional(CONF_ON_MUTE): automation.validate_automation(single=True), + cv.Optional(CONF_ON_UNMUTE): automation.validate_automation(single=True), + cv.Optional(CONF_ON_VOLUME): automation.validate_automation(single=True), + } + ), + cv.only_with_esp_idf, + _validate_repeated_speaker, +) + + +FINAL_VALIDATE_SCHEMA = cv.All( + cv.Schema( + { + cv.Optional(CONF_ANNOUNCEMENT_PIPELINE): _validate_pipeline, + cv.Optional(CONF_MEDIA_PIPELINE): _validate_pipeline, + }, + extra=cv.ALLOW_EXTRA, + ), + _validate_supported_local_file, +) + + +async def to_code(config): + if config[CONF_CODEC_SUPPORT_ENABLED]: + # Compile all supported audio codecs and optimize the wifi settings + + cg.add_define("USE_AUDIO_FLAC_SUPPORT", True) + cg.add_define("USE_AUDIO_MP3_SUPPORT", True) + + # Wifi settings based on https://github.com/espressif/esp-adf/issues/297#issuecomment-783811702 + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_RX_BUFFER_NUM", 16) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_DYNAMIC_RX_BUFFER_NUM", 512) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_TX_BUFFER", True) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_TX_BUFFER_TYPE", 0) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_TX_BUFFER_NUM", 8) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_CACHE_TX_BUFFER_NUM", 32) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_AMPDU_TX_ENABLED", True) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_TX_BA_WIN", 16) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_AMPDU_RX_ENABLED", True) + esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_RX_BA_WIN", 32) + esp32.add_idf_sdkconfig_option("CONFIG_LWIP_MAX_ACTIVE_TCP", 16) + esp32.add_idf_sdkconfig_option("CONFIG_LWIP_MAX_LISTENING_TCP", 16) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_MAXRTX", 12) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_SYNMAXRTX", 6) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_MSS", 1436) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_MSL", 60000) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_SND_BUF_DEFAULT", 65535) + esp32.add_idf_sdkconfig_option( + "CONFIG_TCP_WND_DEFAULT", 65535 + ) # Adjusted from referenced settings to avoid compilation error + esp32.add_idf_sdkconfig_option("CONFIG_TCP_RECVMBOX_SIZE", 512) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_QUEUE_OOSEQ", True) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_OVERSIZE_MSS", True) + esp32.add_idf_sdkconfig_option("CONFIG_LWIP_WND_SCALE", True) + esp32.add_idf_sdkconfig_option("CONFIG_TCP_RCV_SCALE", 3) + esp32.add_idf_sdkconfig_option("CONFIG_LWIP_TCPIP_RECVMBOX_SIZE", 512) + + # Allocate wifi buffers in PSRAM + esp32.add_idf_sdkconfig_option("CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP", True) + + var = cg.new_Pvariable(config[CONF_ID]) + await cg.register_component(var, config) + await media_player.register_media_player(var, config) + + cg.add_define("USE_OTA_STATE_CALLBACK") + + cg.add(var.set_buffer_size(config[CONF_BUFFER_SIZE])) + + cg.add(var.set_task_stack_in_psram(config[CONF_TASK_STACK_IN_PSRAM])) + if config[CONF_TASK_STACK_IN_PSRAM]: + esp32.add_idf_sdkconfig_option( + "CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True + ) + + cg.add(var.set_volume_increment(config[CONF_VOLUME_INCREMENT])) + cg.add(var.set_volume_max(config[CONF_VOLUME_MAX])) + cg.add(var.set_volume_min(config[CONF_VOLUME_MIN])) + + announcement_pipeline_config = config[CONF_ANNOUNCEMENT_PIPELINE] + spkr = await cg.get_variable(announcement_pipeline_config[CONF_SPEAKER]) + cg.add(var.set_announcement_speaker(spkr)) + if announcement_pipeline_config[CONF_FORMAT] != "NONE": + cg.add( + var.set_announcement_format( + _get_supported_format_struct( + announcement_pipeline_config, "ANNOUNCEMENT" + ) + ) + ) + + if media_pipeline_config := config.get(CONF_MEDIA_PIPELINE): + spkr = await cg.get_variable(media_pipeline_config[CONF_SPEAKER]) + cg.add(var.set_media_speaker(spkr)) + if media_pipeline_config[CONF_FORMAT] != "NONE": + cg.add( + var.set_media_format( + _get_supported_format_struct(media_pipeline_config, "MEDIA") + ) + ) + + if on_mute := config.get(CONF_ON_MUTE): + await automation.build_automation( + var.get_mute_trigger(), + [], + on_mute, + ) + if on_unmute := config.get(CONF_ON_UNMUTE): + await automation.build_automation( + var.get_unmute_trigger(), + [], + on_unmute, + ) + if on_volume := config.get(CONF_ON_VOLUME): + await automation.build_automation( + var.get_volume_trigger(), + [(cg.float_, "x")], + on_volume, + ) + + for file_config in config.get(CONF_FILES, []): + data, media_file_type = _read_audio_file_and_type(file_config) + + rhs = [HexInt(x) for x in data] + prog_arr = cg.progmem_array(file_config[CONF_RAW_DATA_ID], rhs) + + media_files_struct = cg.StructInitializer( + audio.AudioFile, + ( + "data", + prog_arr, + ), + ( + "length", + len(rhs), + ), + ( + "file_type", + media_file_type, + ), + ) + + cg.new_Pvariable( + file_config[CONF_ID], + media_files_struct, + ) + + +@automation.register_action( + "media_player.speaker.play_on_device_media_file", + PlayOnDeviceMediaAction, + cv.maybe_simple_value( + { + cv.GenerateID(): cv.use_id(SpeakerMediaPlayer), + cv.Required(CONF_MEDIA_FILE): cv.use_id(audio.AudioFile), + cv.Optional(CONF_ANNOUNCEMENT, default=False): cv.templatable(cv.boolean), + cv.Optional(CONF_ENQUEUE, default=False): cv.templatable(cv.boolean), + }, + key=CONF_MEDIA_FILE, + ), +) +async def play_on_device_media_media_action(config, action_id, template_arg, args): + var = cg.new_Pvariable(action_id, template_arg) + await cg.register_parented(var, config[CONF_ID]) + media_file = await cg.get_variable(config[CONF_MEDIA_FILE]) + announcement = await cg.templatable(config[CONF_ANNOUNCEMENT], args, cg.bool_) + enqueue = await cg.templatable(config[CONF_ENQUEUE], args, cg.bool_) + + cg.add(var.set_audio_file(media_file)) + cg.add(var.set_announcement(announcement)) + cg.add(var.set_enqueue(enqueue)) + return var diff --git a/esphome/components/speaker/media_player/audio_pipeline.cpp b/esphome/components/speaker/media_player/audio_pipeline.cpp new file mode 100644 index 0000000000..73ec5a3334 --- /dev/null +++ b/esphome/components/speaker/media_player/audio_pipeline.cpp @@ -0,0 +1,560 @@ +#include "audio_pipeline.h" + +#ifdef USE_ESP_IDF + +#include "esphome/core/defines.h" +#include "esphome/core/hal.h" +#include "esphome/core/helpers.h" +#include "esphome/core/log.h" + +namespace esphome { +namespace speaker { + +static const uint32_t INITIAL_BUFFER_MS = 1000; // Start playback after buffering this duration of the file + +static const uint32_t READ_TASK_STACK_SIZE = 5 * 1024; +static const uint32_t DECODE_TASK_STACK_SIZE = 3 * 1024; + +static const uint32_t INFO_ERROR_QUEUE_COUNT = 5; + +static const char *const TAG = "speaker_media_player.pipeline"; + +enum EventGroupBits : uint32_t { + // MESSAGE_* bits are only set by their respective tasks + + // Stops all activity in the pipeline elements; cleared by process_state() and set by stop() or by each task + PIPELINE_COMMAND_STOP = (1 << 0), + + // Read audio from an HTTP source; cleared by reader task and set by start_url + READER_COMMAND_INIT_HTTP = (1 << 4), + // Read audio from an audio file from the flash; cleared by reader task and set by start_file + READER_COMMAND_INIT_FILE = (1 << 5), + + // Audio file type is read after checking it is supported; cleared by decoder task + READER_MESSAGE_LOADED_MEDIA_TYPE = (1 << 6), + // Reader is done (either through a failure or just end of the stream); cleared by reader task + READER_MESSAGE_FINISHED = (1 << 7), + // Error reading the file; cleared by process_state() + READER_MESSAGE_ERROR = (1 << 8), + + // Decoder is done (either through a faiilure or the end of the stream); cleared by decoder task + DECODER_MESSAGE_FINISHED = (1 << 12), + // Error decoding the file; cleared by process_state() by decoder task + DECODER_MESSAGE_ERROR = (1 << 13), +}; + +AudioPipeline::AudioPipeline(speaker::Speaker *speaker, size_t buffer_size, bool task_stack_in_psram, + std::string base_name, UBaseType_t priority) + : base_name_(std::move(base_name)), + priority_(priority), + task_stack_in_psram_(task_stack_in_psram), + speaker_(speaker), + buffer_size_(buffer_size) { + this->allocate_communications_(); + this->transfer_buffer_size_ = std::min(buffer_size_ / 4, DEFAULT_TRANSFER_BUFFER_SIZE); +} + +void AudioPipeline::start_url(const std::string &uri) { + if (this->is_playing_) { + xEventGroupSetBits(this->event_group_, PIPELINE_COMMAND_STOP); + } + this->current_uri_ = uri; + this->pending_url_ = true; +} + +void AudioPipeline::start_file(audio::AudioFile *audio_file) { + if (this->is_playing_) { + xEventGroupSetBits(this->event_group_, PIPELINE_COMMAND_STOP); + } + this->current_audio_file_ = audio_file; + this->pending_file_ = true; +} + +esp_err_t AudioPipeline::stop() { + xEventGroupSetBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP); + + return ESP_OK; +} +void AudioPipeline::set_pause_state(bool pause_state) { + this->speaker_->set_pause_state(pause_state); + + this->pause_state_ = pause_state; +} + +void AudioPipeline::suspend_tasks() { + if (this->read_task_handle_ != nullptr) { + vTaskSuspend(this->read_task_handle_); + } + if (this->decode_task_handle_ != nullptr) { + vTaskSuspend(this->decode_task_handle_); + } +} + +void AudioPipeline::resume_tasks() { + if (this->read_task_handle_ != nullptr) { + vTaskResume(this->read_task_handle_); + } + if (this->decode_task_handle_ != nullptr) { + vTaskResume(this->decode_task_handle_); + } +} + +AudioPipelineState AudioPipeline::process_state() { + /* + * Log items from info error queue + */ + InfoErrorEvent event; + if (this->info_error_queue_ != nullptr) { + while (xQueueReceive(this->info_error_queue_, &event, 0)) { + switch (event.source) { + case InfoErrorSource::READER: + if (event.err.has_value()) { + ESP_LOGE(TAG, "Media reader encountered an error: %s", esp_err_to_name(event.err.value())); + } else if (event.file_type.has_value()) { + ESP_LOGD(TAG, "Reading %s file type", audio_file_type_to_string(event.file_type.value())); + } + + break; + case InfoErrorSource::DECODER: + if (event.err.has_value()) { + ESP_LOGE(TAG, "Decoder encountered an error: %s", esp_err_to_name(event.err.value())); + } + + if (event.audio_stream_info.has_value()) { + ESP_LOGD(TAG, "Decoded audio has %d channels, %" PRId32 " Hz sample rate, and %d bits per sample", + event.audio_stream_info.value().get_channels(), event.audio_stream_info.value().get_sample_rate(), + event.audio_stream_info.value().get_bits_per_sample()); + } + + if (event.decoding_err.has_value()) { + switch (event.decoding_err.value()) { + case DecodingError::FAILED_HEADER: + ESP_LOGE(TAG, "Failed to parse the file's header."); + break; + case DecodingError::INCOMPATIBLE_BITS_PER_SAMPLE: + ESP_LOGE(TAG, "Incompatible bits per sample. Only 16 bits per sample is supported"); + break; + case DecodingError::INCOMPATIBLE_CHANNELS: + ESP_LOGE(TAG, "Incompatible number of channels. Only 1 or 2 channel audio is supported."); + break; + } + } + break; + } + } + } + + /* + * Determine the current state based on the event group bits and tasks' status + */ + + EventBits_t event_bits = xEventGroupGetBits(this->event_group_); + + if (this->pending_url_ || this->pending_file_) { + // Init command pending + if (!(event_bits & EventGroupBits::PIPELINE_COMMAND_STOP)) { + // Only start if there is no pending stop command + if ((this->read_task_handle_ == nullptr) || (this->decode_task_handle_ == nullptr)) { + // At least one task isn't running + this->start_tasks_(); + } + + if (this->pending_url_) { + xEventGroupSetBits(this->event_group_, EventGroupBits::READER_COMMAND_INIT_HTTP); + this->playback_ms_ = 0; + this->pending_url_ = false; + } else if (this->pending_file_) { + xEventGroupSetBits(this->event_group_, EventGroupBits::READER_COMMAND_INIT_FILE); + this->playback_ms_ = 0; + this->pending_file_ = false; + } + + this->is_playing_ = true; + return AudioPipelineState::PLAYING; + } + } + + if ((event_bits & EventGroupBits::READER_MESSAGE_FINISHED) && + (!(event_bits & EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE) && + (event_bits & EventGroupBits::DECODER_MESSAGE_FINISHED))) { + // Tasks are finished and there's no media in between the reader and decoder + + if (event_bits & EventGroupBits::PIPELINE_COMMAND_STOP) { + // Stop command is fully processed, so clear the command bit + xEventGroupClearBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP); + } + + if (!this->is_playing_) { + // The tasks have been stopped for two ``process_state`` calls in a row, so delete the tasks + if ((this->read_task_handle_ != nullptr) || (this->decode_task_handle_ != nullptr)) { + this->delete_tasks_(); + this->speaker_->stop(); + } + } + this->is_playing_ = false; + return AudioPipelineState::STOPPED; + } + + if ((event_bits & EventGroupBits::READER_MESSAGE_ERROR)) { + xEventGroupClearBits(this->event_group_, EventGroupBits::READER_MESSAGE_ERROR); + return AudioPipelineState::ERROR_READING; + } + + if ((event_bits & EventGroupBits::DECODER_MESSAGE_ERROR)) { + xEventGroupClearBits(this->event_group_, EventGroupBits::DECODER_MESSAGE_ERROR); + return AudioPipelineState::ERROR_DECODING; + } + + if (this->pause_state_) { + return AudioPipelineState::PAUSED; + } + + if ((this->read_task_handle_ == nullptr) && (this->decode_task_handle_ == nullptr)) { + // No tasks are running, so the pipeline is stopped. + xEventGroupClearBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP); + return AudioPipelineState::STOPPED; + } + + this->is_playing_ = true; + return AudioPipelineState::PLAYING; +} + +esp_err_t AudioPipeline::allocate_communications_() { + if (this->event_group_ == nullptr) + this->event_group_ = xEventGroupCreate(); + + if (this->event_group_ == nullptr) { + return ESP_ERR_NO_MEM; + } + + if (this->info_error_queue_ == nullptr) + this->info_error_queue_ = xQueueCreate(INFO_ERROR_QUEUE_COUNT, sizeof(InfoErrorEvent)); + + if (this->info_error_queue_ == nullptr) + return ESP_ERR_NO_MEM; + + return ESP_OK; +} + +esp_err_t AudioPipeline::start_tasks_() { + if (this->read_task_handle_ == nullptr) { + if (this->read_task_stack_buffer_ == nullptr) { + if (this->task_stack_in_psram_) { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_EXTERNAL); + this->read_task_stack_buffer_ = stack_allocator.allocate(READ_TASK_STACK_SIZE); + } else { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_INTERNAL); + this->read_task_stack_buffer_ = stack_allocator.allocate(READ_TASK_STACK_SIZE); + } + } + + if (this->read_task_stack_buffer_ == nullptr) { + return ESP_ERR_NO_MEM; + } + + if (this->read_task_handle_ == nullptr) { + this->read_task_handle_ = + xTaskCreateStatic(read_task, (this->base_name_ + "_read").c_str(), READ_TASK_STACK_SIZE, (void *) this, + this->priority_, this->read_task_stack_buffer_, &this->read_task_stack_); + } + + if (this->read_task_handle_ == nullptr) { + return ESP_ERR_INVALID_STATE; + } + } + + if (this->decode_task_handle_ == nullptr) { + if (this->decode_task_stack_buffer_ == nullptr) { + if (this->task_stack_in_psram_) { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_EXTERNAL); + this->decode_task_stack_buffer_ = stack_allocator.allocate(DECODE_TASK_STACK_SIZE); + } else { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_INTERNAL); + this->decode_task_stack_buffer_ = stack_allocator.allocate(DECODE_TASK_STACK_SIZE); + } + } + + if (this->decode_task_stack_buffer_ == nullptr) { + return ESP_ERR_NO_MEM; + } + + if (this->decode_task_handle_ == nullptr) { + this->decode_task_handle_ = + xTaskCreateStatic(decode_task, (this->base_name_ + "_decode").c_str(), DECODE_TASK_STACK_SIZE, (void *) this, + this->priority_, this->decode_task_stack_buffer_, &this->decode_task_stack_); + } + + if (this->decode_task_handle_ == nullptr) { + return ESP_ERR_INVALID_STATE; + } + } + + return ESP_OK; +} + +void AudioPipeline::delete_tasks_() { + if (this->read_task_handle_ != nullptr) { + vTaskDelete(this->read_task_handle_); + + if (this->read_task_stack_buffer_ != nullptr) { + if (this->task_stack_in_psram_) { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_EXTERNAL); + stack_allocator.deallocate(this->read_task_stack_buffer_, READ_TASK_STACK_SIZE); + } else { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_INTERNAL); + stack_allocator.deallocate(this->read_task_stack_buffer_, READ_TASK_STACK_SIZE); + } + + this->read_task_stack_buffer_ = nullptr; + this->read_task_handle_ = nullptr; + } + } + + if (this->decode_task_handle_ != nullptr) { + vTaskDelete(this->decode_task_handle_); + + if (this->decode_task_stack_buffer_ != nullptr) { + if (this->task_stack_in_psram_) { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_EXTERNAL); + stack_allocator.deallocate(this->decode_task_stack_buffer_, DECODE_TASK_STACK_SIZE); + } else { + RAMAllocator stack_allocator(RAMAllocator::ALLOC_INTERNAL); + stack_allocator.deallocate(this->decode_task_stack_buffer_, DECODE_TASK_STACK_SIZE); + } + + this->decode_task_stack_buffer_ = nullptr; + this->decode_task_handle_ = nullptr; + } + } +} + +void AudioPipeline::read_task(void *params) { + AudioPipeline *this_pipeline = (AudioPipeline *) params; + + while (true) { + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED); + + // Wait until the pipeline notifies us the source of the media file + EventBits_t event_bits = + xEventGroupWaitBits(this_pipeline->event_group_, + EventGroupBits::READER_COMMAND_INIT_FILE | EventGroupBits::READER_COMMAND_INIT_HTTP | + EventGroupBits::PIPELINE_COMMAND_STOP, // Bit message to read + pdFALSE, // Clear the bit on exit + pdFALSE, // Wait for all the bits, + portMAX_DELAY); // Block indefinitely until bit is set + + if (!(event_bits & EventGroupBits::PIPELINE_COMMAND_STOP)) { + xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED | + EventGroupBits::READER_COMMAND_INIT_FILE | + EventGroupBits::READER_COMMAND_INIT_HTTP); + InfoErrorEvent event; + event.source = InfoErrorSource::READER; + esp_err_t err = ESP_OK; + + std::unique_ptr reader = + make_unique(this_pipeline->transfer_buffer_size_); + + if (event_bits & EventGroupBits::READER_COMMAND_INIT_FILE) { + err = reader->start(this_pipeline->current_audio_file_, this_pipeline->current_audio_file_type_); + } else { + err = reader->start(this_pipeline->current_uri_, this_pipeline->current_audio_file_type_); + } + + if (err == ESP_OK) { + size_t file_ring_buffer_size = this_pipeline->buffer_size_; + + std::shared_ptr temp_ring_buffer; + + if (!this_pipeline->raw_file_ring_buffer_.use_count()) { + temp_ring_buffer = RingBuffer::create(file_ring_buffer_size); + this_pipeline->raw_file_ring_buffer_ = temp_ring_buffer; + } + + if (!this_pipeline->raw_file_ring_buffer_.use_count()) { + err = ESP_ERR_NO_MEM; + } else { + reader->add_sink(this_pipeline->raw_file_ring_buffer_); + } + } + + if (err != ESP_OK) { + // Send specific error message + event.err = err; + xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY); + + // Setting up the reader failed, stop the pipeline + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + } else { + // Send the file type to the pipeline + event.file_type = this_pipeline->current_audio_file_type_; + xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY); + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE); + } + + while (true) { + event_bits = xEventGroupGetBits(this_pipeline->event_group_); + + if (event_bits & EventGroupBits::PIPELINE_COMMAND_STOP) { + break; + } + + audio::AudioReaderState reader_state = reader->read(); + + if (reader_state == audio::AudioReaderState::FINISHED) { + break; + } else if (reader_state == audio::AudioReaderState::FAILED) { + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + break; + } + } + event_bits = xEventGroupGetBits(this_pipeline->event_group_); + if ((event_bits & EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE) || + (this_pipeline->raw_file_ring_buffer_.use_count() == 1)) { + // Decoder task hasn't started yet, so delay a bit before releasing ownership of the ring buffer + delay(10); + } + } + } +} + +void AudioPipeline::decode_task(void *params) { + AudioPipeline *this_pipeline = (AudioPipeline *) params; + + while (true) { + xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED); + + // Wait until the reader notifies us that the media type is available + EventBits_t event_bits = xEventGroupWaitBits(this_pipeline->event_group_, + EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE | + EventGroupBits::PIPELINE_COMMAND_STOP, // Bit message to read + pdFALSE, // Clear the bit on exit + pdFALSE, // Wait for all the bits, + portMAX_DELAY); // Block indefinitely until bit is set + + if (!(event_bits & EventGroupBits::PIPELINE_COMMAND_STOP)) { + xEventGroupClearBits(this_pipeline->event_group_, + EventGroupBits::DECODER_MESSAGE_FINISHED | EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE); + InfoErrorEvent event; + event.source = InfoErrorSource::DECODER; + + std::unique_ptr decoder = + make_unique(this_pipeline->transfer_buffer_size_, this_pipeline->transfer_buffer_size_); + + esp_err_t err = decoder->start(this_pipeline->current_audio_file_type_); + decoder->add_source(this_pipeline->raw_file_ring_buffer_); + + if (err != ESP_OK) { + // Send specific error message + event.err = err; + xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY); + + // Setting up the decoder failed, stop the pipeline + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + } + + bool has_stream_info = false; + bool started_playback = false; + + size_t initial_bytes_to_buffer = 0; + + while (true) { + event_bits = xEventGroupGetBits(this_pipeline->event_group_); + + if (event_bits & EventGroupBits::PIPELINE_COMMAND_STOP) { + break; + } + + // Update pause state + if (!started_playback) { + if (!(event_bits & EventGroupBits::READER_MESSAGE_FINISHED)) { + decoder->set_pause_output_state(true); + } else { + started_playback = true; + } + } else { + decoder->set_pause_output_state(this_pipeline->pause_state_); + } + + // Stop gracefully if the reader has finished + audio::AudioDecoderState decoder_state = decoder->decode(event_bits & EventGroupBits::READER_MESSAGE_FINISHED); + + if ((decoder_state == audio::AudioDecoderState::DECODING) || + (decoder_state == audio::AudioDecoderState::FINISHED)) { + this_pipeline->playback_ms_ = decoder->get_playback_ms(); + } + + if (decoder_state == audio::AudioDecoderState::FINISHED) { + break; + } else if (decoder_state == audio::AudioDecoderState::FAILED) { + if (!has_stream_info) { + event.decoding_err = DecodingError::FAILED_HEADER; + xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY); + } + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + break; + } + + if (!has_stream_info && decoder->get_audio_stream_info().has_value()) { + has_stream_info = true; + + this_pipeline->current_audio_stream_info_ = decoder->get_audio_stream_info().value(); + + // Send the stream information to the pipeline + event.audio_stream_info = this_pipeline->current_audio_stream_info_; + + if (this_pipeline->current_audio_stream_info_.get_bits_per_sample() != 16) { + // Error state, incompatible bits per sample + event.decoding_err = DecodingError::INCOMPATIBLE_BITS_PER_SAMPLE; + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + } else if ((this_pipeline->current_audio_stream_info_.get_channels() > 2)) { + // Error state, incompatible number of channels + event.decoding_err = DecodingError::INCOMPATIBLE_CHANNELS; + xEventGroupSetBits(this_pipeline->event_group_, + EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP); + } else { + // Send audio directly to the speaker + this_pipeline->speaker_->set_audio_stream_info(this_pipeline->current_audio_stream_info_); + decoder->add_sink(this_pipeline->speaker_); + } + + initial_bytes_to_buffer = std::min(this_pipeline->current_audio_stream_info_.ms_to_bytes(INITIAL_BUFFER_MS), + this_pipeline->buffer_size_ * 3 / 4); + + switch (this_pipeline->current_audio_file_type_) { +#ifdef USE_AUDIO_MP3_SUPPORT + case audio::AudioFileType::MP3: + initial_bytes_to_buffer /= 8; // Estimate the MP3 compression factor is 8 + break; +#endif +#ifdef USE_AUDIO_FLAC_SUPPORT + case audio::AudioFileType::FLAC: + initial_bytes_to_buffer /= 2; // Estimate the FLAC compression factor is 2 + break; +#endif + default: + break; + } + xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY); + } + + if (!started_playback && has_stream_info) { + // Verify enough data is available before starting playback + std::shared_ptr temp_ring_buffer = this_pipeline->raw_file_ring_buffer_.lock(); + if (temp_ring_buffer->available() >= initial_bytes_to_buffer) { + started_playback = true; + } + } + } + } + } +} + +} // namespace speaker +} // namespace esphome + +#endif diff --git a/esphome/components/speaker/media_player/audio_pipeline.h b/esphome/components/speaker/media_player/audio_pipeline.h new file mode 100644 index 0000000000..c382e1eebe --- /dev/null +++ b/esphome/components/speaker/media_player/audio_pipeline.h @@ -0,0 +1,158 @@ +#pragma once + +#ifdef USE_ESP_IDF + +#include "esphome/components/audio/audio.h" +#include "esphome/components/audio/audio_reader.h" +#include "esphome/components/audio/audio_decoder.h" +#include "esphome/components/speaker/speaker.h" + +#include "esphome/core/ring_buffer.h" + +#include "esp_err.h" + +#include +#include +#include + +namespace esphome { +namespace speaker { + +// Internal sink/source buffers for reader and decoder +static const size_t DEFAULT_TRANSFER_BUFFER_SIZE = 24 * 1024; + +enum class AudioPipelineType : uint8_t { + MEDIA, + ANNOUNCEMENT, +}; + +enum class AudioPipelineState : uint8_t { + STARTING_FILE, + STARTING_URL, + PLAYING, + STOPPING, + STOPPED, + PAUSED, + ERROR_READING, + ERROR_DECODING, +}; + +enum class InfoErrorSource : uint8_t { + READER = 0, + DECODER, +}; + +enum class DecodingError : uint8_t { + FAILED_HEADER = 0, + INCOMPATIBLE_BITS_PER_SAMPLE, + INCOMPATIBLE_CHANNELS, +}; + +// Used to pass information from each task. +struct InfoErrorEvent { + InfoErrorSource source; + optional err; + optional file_type; + optional audio_stream_info; + optional decoding_err; +}; + +class AudioPipeline { + public: + /// @param speaker ESPHome speaker component for pipeline's audio output + /// @param buffer_size Size of the buffer in bytes between the reader and decoder + /// @param task_stack_in_psram True if the task stack should be allocated in PSRAM, false otherwise + /// @param task_name FreeRTOS task base name + /// @param priority FreeRTOS task priority + AudioPipeline(speaker::Speaker *speaker, size_t buffer_size, bool task_stack_in_psram, std::string base_name, + UBaseType_t priority); + + /// @brief Starts an audio pipeline given a media url + /// @param uri media file url + /// @return ESP_OK if successful or an appropriate error if not + void start_url(const std::string &uri); + + /// @brief Starts an audio pipeline given a AudioFile pointer + /// @param audio_file pointer to an AudioFile object + /// @return ESP_OK if successful or an appropriate error if not + void start_file(audio::AudioFile *audio_file); + + /// @brief Stops the pipeline. Sends a stop signal to each task (if running) and clears the ring buffers. + /// @return ESP_OK if successful or ESP_ERR_TIMEOUT if the tasks did not indicate they stopped + esp_err_t stop(); + + /// @brief Processes the state of the audio pipeline based on the info_error_queue_ and event_group_. Handles creating + /// and stopping the pipeline tasks. Needs to be regularly called to update the internal pipeline state. + /// @return AudioPipelineState + AudioPipelineState process_state(); + + /// @brief Suspends any running tasks + void suspend_tasks(); + /// @brief Resumes any running tasks + void resume_tasks(); + + uint32_t get_playback_ms() { return this->playback_ms_; } + + void set_pause_state(bool pause_state); + + protected: + /// @brief Allocates the event group and info error queue. + /// @return ESP_OK if successful or ESP_ERR_NO_MEM if it is unable to allocate all parts + esp_err_t allocate_communications_(); + + /// @brief Common start code for the pipeline, regardless if the source is a file or url. + /// @return ESP_OK if successful or an appropriate error if not + esp_err_t start_tasks_(); + + /// @brief Resets the task related pointers and deallocates their stacks. + void delete_tasks_(); + + std::string base_name_; + UBaseType_t priority_; + + uint32_t playback_ms_{0}; + + bool is_playing_{false}; + bool pause_state_{false}; + bool task_stack_in_psram_; + + // Pending file start state used to ensure the pipeline fully stops before attempting to start the next file + bool pending_url_{false}; + bool pending_file_{false}; + + speaker::Speaker *speaker_{nullptr}; + + std::string current_uri_{}; + audio::AudioFile *current_audio_file_{nullptr}; + + audio::AudioFileType current_audio_file_type_; + audio::AudioStreamInfo current_audio_stream_info_; + + size_t buffer_size_; // Ring buffer between reader and decoder + size_t transfer_buffer_size_; // Internal source/sink buffers for the audio reader and decoder + + std::weak_ptr raw_file_ring_buffer_; + + // Handles basic control/state of the three tasks + EventGroupHandle_t event_group_{nullptr}; + + // Receives detailed info (file type, stream info, resampling info) or specific errors from the three tasks + QueueHandle_t info_error_queue_{nullptr}; + + // Handles reading the media file from flash or a url + static void read_task(void *params); + TaskHandle_t read_task_handle_{nullptr}; + StaticTask_t read_task_stack_; + StackType_t *read_task_stack_buffer_{nullptr}; + + // Decodes the media file into PCM audio + static void decode_task(void *params); + TaskHandle_t decode_task_handle_{nullptr}; + StaticTask_t decode_task_stack_; + StackType_t *decode_task_stack_buffer_{nullptr}; +}; + +} // namespace speaker +} // namespace esphome + +#endif diff --git a/esphome/components/speaker/media_player/automation.h b/esphome/components/speaker/media_player/automation.h new file mode 100644 index 0000000000..d1a01aabc4 --- /dev/null +++ b/esphome/components/speaker/media_player/automation.h @@ -0,0 +1,26 @@ +#pragma once + +#include "speaker_media_player.h" + +#ifdef USE_ESP_IDF + +#include "esphome/components/audio/audio.h" +#include "esphome/core/automation.h" + +namespace esphome { +namespace speaker { + +template class PlayOnDeviceMediaAction : public Action, public Parented { + TEMPLATABLE_VALUE(audio::AudioFile *, audio_file) + TEMPLATABLE_VALUE(bool, announcement) + TEMPLATABLE_VALUE(bool, enqueue) + void play(Ts... x) override { + this->parent_->play_file(this->audio_file_.value(x...), this->announcement_.value(x...), + this->enqueue_.value(x...)); + } +}; + +} // namespace speaker +} // namespace esphome + +#endif diff --git a/esphome/components/speaker/media_player/speaker_media_player.cpp b/esphome/components/speaker/media_player/speaker_media_player.cpp new file mode 100644 index 0000000000..0a2585ce60 --- /dev/null +++ b/esphome/components/speaker/media_player/speaker_media_player.cpp @@ -0,0 +1,577 @@ +#include "speaker_media_player.h" + +#ifdef USE_ESP_IDF + +#include "esphome/core/log.h" + +#include "esphome/components/audio/audio.h" +#ifdef USE_OTA +#include "esphome/components/ota/ota_backend.h" +#endif + +namespace esphome { +namespace speaker { + +// Framework: +// - Media player that can handle two streams: one for media and one for announcements +// - Each stream has an individual speaker component for output +// - Each stream is handled by an ``AudioPipeline`` object with two parts/tasks +// - ``AudioReader`` handles reading from an HTTP source or from a PROGMEM flash set at compile time +// - ``AudioDecoder`` handles decoding the audio file. All formats are limited to two channels and 16 bits per sample +// - FLAC +// - MP3 (based on the libhelix decoder) +// - WAV +// - Each task runs until it is done processing the file or it receives a stop command +// - Inter-task communication uses a FreeRTOS Event Group +// - The ``AudioPipeline`` sets up a ring buffer between the reader and decoder tasks. The decoder task outputs audio +// directly to a speaker component. +// - The pipelines internal state needs to be processed by regularly calling ``process_state``. +// - Generic media player commands are received by the ``control`` function. The commands are added to the +// ``media_control_command_queue_`` to be processed in the component's loop +// - Local file play back is initiatied with ``play_file`` and adds it to the ``media_control_command_queue_`` +// - Starting a stream intializes the appropriate pipeline or stops it if it is already running +// - Volume and mute commands are achieved by the ``mute``, ``unmute``, ``set_volume`` functions. +// - Volume commands are ignored if the media control queue is full to avoid crashing with rapid volume +// increases/decreases. +// - These functions all send the appropriate information to the speakers to implement. +// - Pausing is implemented in the decoder task and is also sent directly to the media speaker component to decrease +// latency. +// - The components main loop performs housekeeping: +// - It reads the media control queue and processes it directly +// - It determines the overall state of the media player by considering the state of each pipeline +// - announcement playback takes highest priority +// - Handles playlists and repeating by starting the appropriate file when a previous file is finished +// - Logging only happens in the main loop task to reduce task stack memory usage. + +static const uint32_t MEDIA_CONTROLS_QUEUE_LENGTH = 20; + +static const UBaseType_t MEDIA_PIPELINE_TASK_PRIORITY = 1; +static const UBaseType_t ANNOUNCEMENT_PIPELINE_TASK_PRIORITY = 1; + +static const float FIRST_BOOT_DEFAULT_VOLUME = 0.5f; + +static const char *const TAG = "speaker_media_player"; + +void SpeakerMediaPlayer::setup() { + state = media_player::MEDIA_PLAYER_STATE_IDLE; + + this->media_control_command_queue_ = xQueueCreate(MEDIA_CONTROLS_QUEUE_LENGTH, sizeof(MediaCallCommand)); + + this->pref_ = global_preferences->make_preference(this->get_object_id_hash()); + + VolumeRestoreState volume_restore_state; + if (this->pref_.load(&volume_restore_state)) { + this->set_volume_(volume_restore_state.volume); + this->set_mute_state_(volume_restore_state.is_muted); + } else { + this->set_volume_(FIRST_BOOT_DEFAULT_VOLUME); + this->set_mute_state_(false); + } + +#ifdef USE_OTA + ota::get_global_ota_callback()->add_on_state_callback( + [this](ota::OTAState state, float progress, uint8_t error, ota::OTAComponent *comp) { + if (state == ota::OTA_STARTED) { + if (this->media_pipeline_ != nullptr) { + this->media_pipeline_->suspend_tasks(); + } + if (this->announcement_pipeline_ != nullptr) { + this->announcement_pipeline_->suspend_tasks(); + } + } else if (state == ota::OTA_ERROR) { + if (this->media_pipeline_ != nullptr) { + this->media_pipeline_->resume_tasks(); + } + if (this->announcement_pipeline_ != nullptr) { + this->announcement_pipeline_->resume_tasks(); + } + } + }); +#endif + + this->announcement_pipeline_ = + make_unique(this->announcement_speaker_, this->buffer_size_, this->task_stack_in_psram_, "ann", + ANNOUNCEMENT_PIPELINE_TASK_PRIORITY); + + if (this->announcement_pipeline_ == nullptr) { + ESP_LOGE(TAG, "Failed to create announcement pipeline"); + this->mark_failed(); + } + + if (!this->single_pipeline_()) { + this->media_pipeline_ = make_unique(this->media_speaker_, this->buffer_size_, + this->task_stack_in_psram_, "ann", MEDIA_PIPELINE_TASK_PRIORITY); + + if (this->media_pipeline_ == nullptr) { + ESP_LOGE(TAG, "Failed to create media pipeline"); + this->mark_failed(); + } + + // Setup callback to track the duration of audio played by the media pipeline + this->media_speaker_->add_audio_output_callback( + [this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) { + this->playback_ms_ += new_playback_ms; + this->remainder_us_ = remainder_us; + this->pending_ms_ = pending_ms; + this->last_audio_write_timestamp_ = write_timestamp; + this->playback_us_ = this->playback_ms_ * 1000 + this->remainder_us_; + }); + } + + ESP_LOGI(TAG, "Set up speaker media player"); +} + +void SpeakerMediaPlayer::set_playlist_delay_ms(AudioPipelineType pipeline_type, uint32_t delay_ms) { + switch (pipeline_type) { + case AudioPipelineType::ANNOUNCEMENT: + this->announcement_playlist_delay_ms_ = delay_ms; + break; + case AudioPipelineType::MEDIA: + this->media_playlist_delay_ms_ = delay_ms; + break; + } +} + +void SpeakerMediaPlayer::watch_media_commands_() { + if (!this->is_ready()) { + return; + } + + MediaCallCommand media_command; + esp_err_t err = ESP_OK; + + if (xQueueReceive(this->media_control_command_queue_, &media_command, 0) == pdTRUE) { + bool new_url = media_command.new_url.has_value() && media_command.new_url.value(); + bool new_file = media_command.new_file.has_value() && media_command.new_file.value(); + + if (new_url || new_file) { + bool enqueue = media_command.enqueue.has_value() && media_command.enqueue.value(); + + if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) { + // Announcement playlist/pipeline + + if (!enqueue) { + // Clear the queue and ensure the loaded next item doesn't start playing + this->cancel_timeout("next_ann"); + this->announcement_playlist_.clear(); + } + + PlaylistItem playlist_item; + if (new_url) { + playlist_item.url = this->announcement_url_; + if (!enqueue) { + // Not adding to the queue, so directly start playback and internally unpause the pipeline + this->announcement_pipeline_->start_url(playlist_item.url.value()); + this->announcement_pipeline_->set_pause_state(false); + } + } else { + playlist_item.file = this->announcement_file_; + if (!enqueue) { + // Not adding to the queue, so directly start playback and internally unpause the pipeline + this->announcement_pipeline_->start_file(playlist_item.file.value()); + this->announcement_pipeline_->set_pause_state(false); + } + } + this->announcement_playlist_.push_back(playlist_item); + } else { + // Media playlist/pipeline + + if (!enqueue) { + // Clear the queue and ensure the loaded next item doesn't start playing + this->cancel_timeout("next_media"); + this->media_playlist_.clear(); + } + + this->is_paused_ = false; + PlaylistItem playlist_item; + if (new_url) { + playlist_item.url = this->media_url_; + if (!enqueue) { + // Not adding to the queue, so directly start playback and internally unpause the pipeline + this->media_pipeline_->start_url(playlist_item.url.value()); + this->media_pipeline_->set_pause_state(false); + } + } else { + playlist_item.file = this->media_file_; + if (!enqueue) { + // Not adding to the queue, so directly start playback and internally unpause the pipeline + this->media_pipeline_->start_file(playlist_item.file.value()); + this->media_pipeline_->set_pause_state(false); + } + } + this->media_playlist_.push_back(playlist_item); + } + + if (err != ESP_OK) { + ESP_LOGE(TAG, "Error starting the audio pipeline: %s", esp_err_to_name(err)); + this->status_set_error(); + } else { + this->status_clear_error(); + } + + return; // Don't process the new file play command further + } + + if (media_command.volume.has_value()) { + this->set_volume_(media_command.volume.value()); + this->publish_state(); + } + + if (media_command.command.has_value()) { + switch (media_command.command.value()) { + case media_player::MEDIA_PLAYER_COMMAND_PLAY: + if ((this->media_pipeline_ != nullptr) && (this->is_paused_)) { + this->media_pipeline_->set_pause_state(false); + } + this->is_paused_ = false; + break; + case media_player::MEDIA_PLAYER_COMMAND_PAUSE: + if ((this->media_pipeline_ != nullptr) && (!this->is_paused_)) { + this->media_pipeline_->set_pause_state(true); + } + this->is_paused_ = true; + break; + case media_player::MEDIA_PLAYER_COMMAND_STOP: + if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) { + if (this->announcement_pipeline_ != nullptr) { + this->cancel_timeout("next_ann"); + this->announcement_playlist_.clear(); + this->announcement_pipeline_->stop(); + } + } else { + if (this->media_pipeline_ != nullptr) { + this->cancel_timeout("next_media"); + this->media_playlist_.clear(); + this->media_pipeline_->stop(); + } + } + break; + case media_player::MEDIA_PLAYER_COMMAND_TOGGLE: + if (this->media_pipeline_ != nullptr) { + if (this->is_paused_) { + this->media_pipeline_->set_pause_state(false); + this->is_paused_ = false; + } else { + this->media_pipeline_->set_pause_state(true); + this->is_paused_ = true; + } + } + break; + case media_player::MEDIA_PLAYER_COMMAND_MUTE: { + this->set_mute_state_(true); + + this->publish_state(); + break; + } + case media_player::MEDIA_PLAYER_COMMAND_UNMUTE: + this->set_mute_state_(false); + this->publish_state(); + break; + case media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP: + this->set_volume_(std::min(1.0f, this->volume + this->volume_increment_)); + this->publish_state(); + break; + case media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN: + this->set_volume_(std::max(0.0f, this->volume - this->volume_increment_)); + this->publish_state(); + break; + case media_player::MEDIA_PLAYER_COMMAND_REPEAT_ONE: + if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) { + this->announcement_repeat_one_ = true; + } else { + this->media_repeat_one_ = true; + } + break; + case media_player::MEDIA_PLAYER_COMMAND_REPEAT_OFF: + if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) { + this->announcement_repeat_one_ = false; + } else { + this->media_repeat_one_ = false; + } + break; + case media_player::MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST: + if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) { + if (this->announcement_playlist_.empty()) { + this->announcement_playlist_.resize(1); + } + } else { + if (this->media_playlist_.empty()) { + this->media_playlist_.resize(1); + } + } + break; + default: + break; + } + } + } +} + +void SpeakerMediaPlayer::loop() { + this->watch_media_commands_(); + + // Determine state of the media player + media_player::MediaPlayerState old_state = this->state; + + AudioPipelineState old_media_pipeline_state = this->media_pipeline_state_; + if (this->media_pipeline_ != nullptr) { + this->media_pipeline_state_ = this->media_pipeline_->process_state(); + this->decoded_playback_ms_ = this->media_pipeline_->get_playback_ms(); + } + + if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) { + ESP_LOGE(TAG, "The media pipeline's file reader encountered an error."); + } else if (this->media_pipeline_state_ == AudioPipelineState::ERROR_DECODING) { + ESP_LOGE(TAG, "The media pipeline's audio decoder encountered an error."); + } + + AudioPipelineState old_announcement_pipeline_state = this->announcement_pipeline_state_; + if (this->announcement_pipeline_ != nullptr) { + this->announcement_pipeline_state_ = this->announcement_pipeline_->process_state(); + } + + if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_READING) { + ESP_LOGE(TAG, "The announcement pipeline's file reader encountered an error."); + } else if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_DECODING) { + ESP_LOGE(TAG, "The announcement pipeline's audio decoder encountered an error."); + } + + if (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED) { + this->state = media_player::MEDIA_PLAYER_STATE_ANNOUNCING; + } else { + if (!this->announcement_playlist_.empty()) { + uint32_t timeout_ms = 0; + if (old_announcement_pipeline_state == AudioPipelineState::PLAYING) { + // Finished the current announcement file + if (!this->announcement_repeat_one_) { + // Pop item off the playlist if repeat is disabled + this->announcement_playlist_.pop_front(); + } + // Only delay starting playback if moving on the next playlist item or repeating the current item + timeout_ms = this->announcement_playlist_delay_ms_; + } + + if (!this->announcement_playlist_.empty()) { + // Start the next announcement file + PlaylistItem playlist_item = this->announcement_playlist_.front(); + if (playlist_item.url.has_value()) { + this->announcement_pipeline_->start_url(playlist_item.url.value()); + } else if (playlist_item.file.has_value()) { + this->announcement_pipeline_->start_file(playlist_item.file.value()); + } + + if (timeout_ms > 0) { + // Pause pipeline internally to facilitiate delay between items + this->announcement_pipeline_->set_pause_state(true); + // Internally unpause the pipeline after the delay between playlist items + this->set_timeout("next_ann", timeout_ms, + [this]() { this->announcement_pipeline_->set_pause_state(this->is_paused_); }); + } + } + } else { + if (this->is_paused_) { + this->state = media_player::MEDIA_PLAYER_STATE_PAUSED; + } else if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) { + this->state = media_player::MEDIA_PLAYER_STATE_PLAYING; + } else if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) { + // Reset playback durations + this->decoded_playback_ms_ = 0; + this->playback_us_ = 0; + this->playback_ms_ = 0; + this->remainder_us_ = 0; + this->pending_ms_ = 0; + + if (!media_playlist_.empty()) { + uint32_t timeout_ms = 0; + if (old_media_pipeline_state == AudioPipelineState::PLAYING) { + // Finished the current media file + if (!this->media_repeat_one_) { + // Pop item off the playlist if repeat is disabled + this->media_playlist_.pop_front(); + } + // Only delay starting playback if moving on the next playlist item or repeating the current item + timeout_ms = this->announcement_playlist_delay_ms_; + } + if (!this->media_playlist_.empty()) { + PlaylistItem playlist_item = this->media_playlist_.front(); + if (playlist_item.url.has_value()) { + this->media_pipeline_->start_url(playlist_item.url.value()); + } else if (playlist_item.file.has_value()) { + this->media_pipeline_->start_file(playlist_item.file.value()); + } + + if (timeout_ms > 0) { + // Pause pipeline internally to facilitiate delay between items + this->media_pipeline_->set_pause_state(true); + // Internally unpause the pipeline after the delay between playlist items + this->set_timeout("next_media", timeout_ms, + [this]() { this->media_pipeline_->set_pause_state(this->is_paused_); }); + } + } + } else { + this->state = media_player::MEDIA_PLAYER_STATE_IDLE; + } + } + } + } + + if (this->state != old_state) { + this->publish_state(); + ESP_LOGD(TAG, "State changed to %s", media_player::media_player_state_to_string(this->state)); + } +} + +void SpeakerMediaPlayer::play_file(audio::AudioFile *media_file, bool announcement, bool enqueue) { + if (!this->is_ready()) { + // Ignore any commands sent before the media player is setup + return; + } + + MediaCallCommand media_command; + + media_command.new_file = true; + if (this->single_pipeline_() || announcement) { + this->announcement_file_ = media_file; + media_command.announce = true; + } else { + this->media_file_ = media_file; + media_command.announce = false; + } + media_command.enqueue = enqueue; + xQueueSend(this->media_control_command_queue_, &media_command, portMAX_DELAY); +} + +void SpeakerMediaPlayer::control(const media_player::MediaPlayerCall &call) { + if (!this->is_ready()) { + // Ignore any commands sent before the media player is setup + return; + } + + MediaCallCommand media_command; + + if (this->single_pipeline_() || (call.get_announcement().has_value() && call.get_announcement().value())) { + media_command.announce = true; + } else { + media_command.announce = false; + } + + if (call.get_media_url().has_value()) { + std::string new_uri = call.get_media_url().value(); + + media_command.new_url = true; + if (this->single_pipeline_() || (call.get_announcement().has_value() && call.get_announcement().value())) { + this->announcement_url_ = new_uri; + } else { + this->media_url_ = new_uri; + } + + if (call.get_command().has_value()) { + if (call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_ENQUEUE) { + media_command.enqueue = true; + } + } + + xQueueSend(this->media_control_command_queue_, &media_command, portMAX_DELAY); + return; + } + + if (call.get_volume().has_value()) { + media_command.volume = call.get_volume().value(); + // Wait 0 ticks for queue to be free, volume sets aren't that important! + xQueueSend(this->media_control_command_queue_, &media_command, 0); + return; + } + + if (call.get_command().has_value()) { + media_command.command = call.get_command().value(); + TickType_t ticks_to_wait = portMAX_DELAY; + if ((call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP) || + (call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN)) { + ticks_to_wait = 0; // Wait 0 ticks for queue to be free, volume sets aren't that important! + } + xQueueSend(this->media_control_command_queue_, &media_command, ticks_to_wait); + return; + } +} + +media_player::MediaPlayerTraits SpeakerMediaPlayer::get_traits() { + auto traits = media_player::MediaPlayerTraits(); + if (!this->single_pipeline_()) { + traits.set_supports_pause(true); + } + + if (this->announcement_format_.has_value()) { + traits.get_supported_formats().push_back(this->announcement_format_.value()); + } + if (this->media_format_.has_value()) { + traits.get_supported_formats().push_back(this->media_format_.value()); + } else if (this->single_pipeline_() && this->announcement_format_.has_value()) { + // Only one pipeline is defined, so use the announcement format (if configured) for the default purpose + media_player::MediaPlayerSupportedFormat media_format = this->announcement_format_.value(); + media_format.purpose = media_player::MediaPlayerFormatPurpose::PURPOSE_DEFAULT; + traits.get_supported_formats().push_back(media_format); + } + + return traits; +}; + +void SpeakerMediaPlayer::save_volume_restore_state_() { + VolumeRestoreState volume_restore_state; + volume_restore_state.volume = this->volume; + volume_restore_state.is_muted = this->is_muted_; + this->pref_.save(&volume_restore_state); +} + +void SpeakerMediaPlayer::set_mute_state_(bool mute_state) { + if (this->media_speaker_ != nullptr) { + this->media_speaker_->set_mute_state(mute_state); + } + if (this->announcement_speaker_ != nullptr) { + this->announcement_speaker_->set_mute_state(mute_state); + } + + bool old_mute_state = this->is_muted_; + this->is_muted_ = mute_state; + + this->save_volume_restore_state_(); + + if (old_mute_state != mute_state) { + if (mute_state) { + this->defer([this]() { this->mute_trigger_->trigger(); }); + } else { + this->defer([this]() { this->unmute_trigger_->trigger(); }); + } + } +} + +void SpeakerMediaPlayer::set_volume_(float volume, bool publish) { + // Remap the volume to fit with in the configured limits + float bounded_volume = remap(volume, 0.0f, 1.0f, this->volume_min_, this->volume_max_); + + if (this->media_speaker_ != nullptr) { + this->media_speaker_->set_volume(bounded_volume); + } + + if (this->announcement_speaker_ != nullptr) { + this->announcement_speaker_->set_volume(bounded_volume); + } + + if (publish) { + this->volume = volume; + this->save_volume_restore_state_(); + } + + // Turn on the mute state if the volume is effectively zero, off otherwise + if (volume < 0.001) { + this->set_mute_state_(true); + } else { + this->set_mute_state_(false); + } + + this->defer([this, volume]() { this->volume_trigger_->trigger(volume); }); +} + +} // namespace speaker +} // namespace esphome + +#endif diff --git a/esphome/components/speaker/media_player/speaker_media_player.h b/esphome/components/speaker/media_player/speaker_media_player.h new file mode 100644 index 0000000000..6cbce91866 --- /dev/null +++ b/esphome/components/speaker/media_player/speaker_media_player.h @@ -0,0 +1,160 @@ +#pragma once + +#ifdef USE_ESP_IDF + +#include "audio_pipeline.h" + +#include "esphome/components/audio/audio.h" + +#include "esphome/components/media_player/media_player.h" +#include "esphome/components/speaker/speaker.h" + +#include "esphome/core/automation.h" +#include "esphome/core/component.h" +#include "esphome/core/preferences.h" + +#include +#include +#include + +namespace esphome { +namespace speaker { + +struct MediaCallCommand { + optional command; + optional volume; + optional announce; + optional new_url; + optional new_file; + optional enqueue; +}; + +struct PlaylistItem { + optional url; + optional file; +}; + +struct VolumeRestoreState { + float volume; + bool is_muted; +}; + +class SpeakerMediaPlayer : public Component, public media_player::MediaPlayer { + public: + float get_setup_priority() const override { return esphome::setup_priority::PROCESSOR; } + void setup() override; + void loop() override; + + // MediaPlayer implementations + media_player::MediaPlayerTraits get_traits() override; + bool is_muted() const override { return this->is_muted_; } + + void set_buffer_size(size_t buffer_size) { this->buffer_size_ = buffer_size; } + void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; } + + // Percentage to increase or decrease the volume for volume up or volume down commands + void set_volume_increment(float volume_increment) { this->volume_increment_ = volume_increment; } + + void set_volume_max(float volume_max) { this->volume_max_ = volume_max; } + void set_volume_min(float volume_min) { this->volume_min_ = volume_min; } + + void set_announcement_speaker(Speaker *announcement_speaker) { this->announcement_speaker_ = announcement_speaker; } + void set_announcement_format(const media_player::MediaPlayerSupportedFormat &announcement_format) { + this->announcement_format_ = announcement_format; + } + void set_media_speaker(Speaker *media_speaker) { this->media_speaker_ = media_speaker; } + void set_media_format(const media_player::MediaPlayerSupportedFormat &media_format) { + this->media_format_ = media_format; + } + + Trigger<> *get_mute_trigger() const { return this->mute_trigger_; } + Trigger<> *get_unmute_trigger() const { return this->unmute_trigger_; } + Trigger *get_volume_trigger() const { return this->volume_trigger_; } + + void play_file(audio::AudioFile *media_file, bool announcement, bool enqueue); + + uint32_t get_playback_ms() const { return this->playback_ms_; } + uint32_t get_playback_us() const { return this->playback_us_; } + uint32_t get_decoded_playback_ms() const { return this->decoded_playback_ms_; } + + void set_playlist_delay_ms(AudioPipelineType pipeline_type, uint32_t delay_ms); + + protected: + // Receives commands from HA or from the voice assistant component + // Sends commands to the media_control_commanda_queue_ + void control(const media_player::MediaPlayerCall &call) override; + + /// @brief Updates this->volume and saves volume/mute state to flash for restortation if publish is true. + void set_volume_(float volume, bool publish = true); + + /// @brief Sets the mute state. Restores previous volume if unmuting. Always saves volume/mute state to flash for + /// restoration. + /// @param mute_state If true, audio will be muted. If false, audio will be unmuted + void set_mute_state_(bool mute_state); + + /// @brief Saves the current volume and mute state to the flash for restoration. + void save_volume_restore_state_(); + + /// Returns true if the media player has only the announcement pipeline defined, false if both the announcement and + /// media pipelines are defined. + inline bool single_pipeline_() { return (this->media_speaker_ == nullptr); } + + // Processes commands from media_control_command_queue_. + void watch_media_commands_(); + + std::unique_ptr announcement_pipeline_; + std::unique_ptr media_pipeline_; + Speaker *media_speaker_{nullptr}; + Speaker *announcement_speaker_{nullptr}; + + optional media_format_; + AudioPipelineState media_pipeline_state_{AudioPipelineState::STOPPED}; + std::string media_url_{}; // only modified by control function + audio::AudioFile *media_file_{}; // only modified by play_file function + bool media_repeat_one_{false}; + uint32_t media_playlist_delay_ms_{0}; + + optional announcement_format_; + AudioPipelineState announcement_pipeline_state_{AudioPipelineState::STOPPED}; + std::string announcement_url_{}; // only modified by control function + audio::AudioFile *announcement_file_{}; // only modified by play_file function + bool announcement_repeat_one_{false}; + uint32_t announcement_playlist_delay_ms_{0}; + + QueueHandle_t media_control_command_queue_; + + std::deque announcement_playlist_; + std::deque media_playlist_; + + size_t buffer_size_; + + bool task_stack_in_psram_; + + bool is_paused_{false}; + bool is_muted_{false}; + + // The amount to change the volume on volume up/down commands + float volume_increment_; + + float volume_max_; + float volume_min_; + + // Used to save volume/mute state for restoration on reboot + ESPPreferenceObject pref_; + + Trigger<> *mute_trigger_ = new Trigger<>(); + Trigger<> *unmute_trigger_ = new Trigger<>(); + Trigger *volume_trigger_ = new Trigger(); + + uint32_t decoded_playback_ms_{0}; + uint32_t playback_us_{0}; + uint32_t playback_ms_{0}; + uint32_t remainder_us_{0}; + uint32_t pending_ms_{0}; + uint32_t last_audio_write_timestamp_{0}; +}; + +} // namespace speaker +} // namespace esphome + +#endif diff --git a/tests/components/media_player/common.yaml b/tests/components/media_player/common.yaml index af0d5c7765..763bc231c0 100644 --- a/tests/components/media_player/common.yaml +++ b/tests/components/media_player/common.yaml @@ -21,6 +21,8 @@ media_player: - media_player.pause: on_play: - media_player.stop: + - media_player.stop: + announcement: true on_pause: - media_player.toggle: - wait_until: diff --git a/tests/components/speaker/common-media_player.yaml b/tests/components/speaker/common-media_player.yaml new file mode 100644 index 0000000000..edc9f670fc --- /dev/null +++ b/tests/components/speaker/common-media_player.yaml @@ -0,0 +1,12 @@ +<<: !include common.yaml + +media_player: + - platform: speaker + id: speaker_media_player_id + announcement_pipeline: + speaker: speaker_id + buffer_size: 1000000 + volume_increment: 0.02 + volume_max: 0.95 + volume_min: 0.0 + task_stack_in_psram: true diff --git a/tests/components/speaker/media_player.esp32-idf.yaml b/tests/components/speaker/media_player.esp32-idf.yaml new file mode 100644 index 0000000000..4712e4bae8 --- /dev/null +++ b/tests/components/speaker/media_player.esp32-idf.yaml @@ -0,0 +1,9 @@ +substitutions: + scl_pin: GPIO16 + sda_pin: GPIO17 + i2s_bclk_pin: GPIO27 + i2s_lrclk_pin: GPIO26 + i2s_mclk_pin: GPIO25 + i2s_dout_pin: GPIO23 + +<<: !include common-media_player.yaml diff --git a/tests/components/speaker/media_player.esp32-s3-idf.yaml b/tests/components/speaker/media_player.esp32-s3-idf.yaml new file mode 100644 index 0000000000..b3eec04d23 --- /dev/null +++ b/tests/components/speaker/media_player.esp32-s3-idf.yaml @@ -0,0 +1,9 @@ +substitutions: + scl_pin: GPIO2 + sda_pin: GPIO3 + i2s_bclk_pin: GPIO4 + i2s_lrclk_pin: GPIO5 + i2s_mclk_pin: GPIO6 + i2s_dout_pin: GPIO7 + +<<: !include common-media_player.yaml