1
0
mirror of https://github.com/esphome/esphome.git synced 2025-02-14 17:08:22 +00:00

[speaker] Media Player Components PR9 (#8171)

Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
Kevin Ahrendt 2025-02-10 13:00:23 -06:00 committed by GitHub
parent 8be9f02693
commit 84836f15db
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 2043 additions and 21 deletions

View File

@ -390,6 +390,7 @@ esphome/components/sn74hc165/* @jesserockz
esphome/components/socket/* @esphome/core
esphome/components/sonoff_d1/* @anatoly-savchenkov
esphome/components/speaker/* @jesserockz @kahrendt
esphome/components/speaker/media_player/* @kahrendt @synesthesiam
esphome/components/spi/* @clydebarrow @esphome/core
esphome/components/spi_device/* @clydebarrow
esphome/components/spi_led_strip/* @clydebarrow

View File

@ -1,5 +1,4 @@
from esphome import automation
from esphome.automation import maybe_simple_id
import esphome.codegen as cg
import esphome.config_validation as cv
from esphome.const import (
@ -21,6 +20,16 @@ media_player_ns = cg.esphome_ns.namespace("media_player")
MediaPlayer = media_player_ns.class_("MediaPlayer")
MediaPlayerSupportedFormat = media_player_ns.struct("MediaPlayerSupportedFormat")
MediaPlayerFormatPurpose = media_player_ns.enum(
"MediaPlayerFormatPurpose", is_class=True
)
MEDIA_PLAYER_FORMAT_PURPOSE_ENUM = {
"default": MediaPlayerFormatPurpose.PURPOSE_DEFAULT,
"announcement": MediaPlayerFormatPurpose.PURPOSE_ANNOUNCEMENT,
}
PlayAction = media_player_ns.class_(
"PlayAction", automation.Action, cg.Parented.template(MediaPlayer)
@ -47,7 +56,7 @@ VolumeSetAction = media_player_ns.class_(
"VolumeSetAction", automation.Action, cg.Parented.template(MediaPlayer)
)
CONF_ANNOUNCEMENT = "announcement"
CONF_ON_PLAY = "on_play"
CONF_ON_PAUSE = "on_pause"
CONF_ON_ANNOUNCEMENT = "on_announcement"
@ -125,7 +134,16 @@ MEDIA_PLAYER_SCHEMA = cv.ENTITY_BASE_SCHEMA.extend(
)
MEDIA_PLAYER_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(MediaPlayer)})
MEDIA_PLAYER_ACTION_SCHEMA = cv.Schema(
{
cv.GenerateID(): cv.use_id(MediaPlayer),
cv.Optional(CONF_ANNOUNCEMENT, default=False): cv.templatable(cv.boolean),
}
)
MEDIA_PLAYER_CONDITION_SCHEMA = automation.maybe_simple_id(
{cv.GenerateID(): cv.use_id(MediaPlayer)}
)
@automation.register_action(
@ -135,6 +153,7 @@ MEDIA_PLAYER_ACTION_SCHEMA = maybe_simple_id({cv.GenerateID(): cv.use_id(MediaPl
{
cv.GenerateID(): cv.use_id(MediaPlayer),
cv.Required(CONF_MEDIA_URL): cv.templatable(cv.url),
cv.Optional(CONF_ANNOUNCEMENT, default=False): cv.templatable(cv.boolean),
},
key=CONF_MEDIA_URL,
),
@ -143,7 +162,9 @@ async def media_player_play_media_action(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
media_url = await cg.templatable(config[CONF_MEDIA_URL], args, cg.std_string)
announcement = await cg.templatable(config[CONF_ANNOUNCEMENT], args, cg.bool_)
cg.add(var.set_media_url(media_url))
cg.add(var.set_announcement(announcement))
return var
@ -161,19 +182,27 @@ async def media_player_play_media_action(config, action_id, template_arg, args):
@automation.register_action(
"media_player.volume_down", VolumeDownAction, MEDIA_PLAYER_ACTION_SCHEMA
)
@automation.register_condition(
"media_player.is_idle", IsIdleCondition, MEDIA_PLAYER_ACTION_SCHEMA
)
@automation.register_condition(
"media_player.is_paused", IsPausedCondition, MEDIA_PLAYER_ACTION_SCHEMA
)
@automation.register_condition(
"media_player.is_playing", IsPlayingCondition, MEDIA_PLAYER_ACTION_SCHEMA
)
@automation.register_condition(
"media_player.is_announcing", IsAnnouncingCondition, MEDIA_PLAYER_ACTION_SCHEMA
)
async def media_player_action(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
announcement = await cg.templatable(config[CONF_ANNOUNCEMENT], args, cg.bool_)
cg.add(var.set_announcement(announcement))
return var
@automation.register_condition(
"media_player.is_idle", IsIdleCondition, MEDIA_PLAYER_CONDITION_SCHEMA
)
@automation.register_condition(
"media_player.is_paused", IsPausedCondition, MEDIA_PLAYER_CONDITION_SCHEMA
)
@automation.register_condition(
"media_player.is_playing", IsPlayingCondition, MEDIA_PLAYER_CONDITION_SCHEMA
)
@automation.register_condition(
"media_player.is_announcing", IsAnnouncingCondition, MEDIA_PLAYER_CONDITION_SCHEMA
)
async def media_player_condition(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
return var

View File

@ -10,7 +10,10 @@ namespace media_player {
template<MediaPlayerCommand Command, typename... Ts>
class MediaPlayerCommandAction : public Action<Ts...>, public Parented<MediaPlayer> {
public:
void play(Ts... x) override { this->parent_->make_call().set_command(Command).perform(); }
TEMPLATABLE_VALUE(bool, announcement);
void play(Ts... x) override {
this->parent_->make_call().set_command(Command).set_announcement(this->announcement_.value(x...)).perform();
}
};
template<typename... Ts>
@ -28,7 +31,13 @@ using VolumeDownAction = MediaPlayerCommandAction<MediaPlayerCommand::MEDIA_PLAY
template<typename... Ts> class PlayMediaAction : public Action<Ts...>, public Parented<MediaPlayer> {
TEMPLATABLE_VALUE(std::string, media_url)
void play(Ts... x) override { this->parent_->make_call().set_media_url(this->media_url_.value(x...)).perform(); }
TEMPLATABLE_VALUE(bool, announcement)
void play(Ts... x) override {
this->parent_->make_call()
.set_media_url(this->media_url_.value(x...))
.set_announcement(this->announcement_.value(x...))
.perform();
}
};
template<typename... Ts> class VolumeSetAction : public Action<Ts...>, public Parented<MediaPlayer> {

View File

@ -41,6 +41,14 @@ const char *media_player_command_to_string(MediaPlayerCommand command) {
return "VOLUME_UP";
case MEDIA_PLAYER_COMMAND_VOLUME_DOWN:
return "VOLUME_DOWN";
case MEDIA_PLAYER_COMMAND_ENQUEUE:
return "ENQUEUE";
case MEDIA_PLAYER_COMMAND_REPEAT_ONE:
return "REPEAT_ONE";
case MEDIA_PLAYER_COMMAND_REPEAT_OFF:
return "REPEAT_OFF";
case MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST:
return "CLEAR_PLAYLIST";
default:
return "UNKNOWN";
}

View File

@ -24,6 +24,10 @@ enum MediaPlayerCommand : uint8_t {
MEDIA_PLAYER_COMMAND_TOGGLE = 5,
MEDIA_PLAYER_COMMAND_VOLUME_UP = 6,
MEDIA_PLAYER_COMMAND_VOLUME_DOWN = 7,
MEDIA_PLAYER_COMMAND_ENQUEUE = 8,
MEDIA_PLAYER_COMMAND_REPEAT_ONE = 9,
MEDIA_PLAYER_COMMAND_REPEAT_OFF = 10,
MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST = 11,
};
const char *media_player_command_to_string(MediaPlayerCommand command);
@ -72,10 +76,10 @@ class MediaPlayerCall {
void perform();
const optional<MediaPlayerCommand> &get_command() const { return command_; }
const optional<std::string> &get_media_url() const { return media_url_; }
const optional<float> &get_volume() const { return volume_; }
const optional<bool> &get_announcement() const { return announcement_; }
const optional<MediaPlayerCommand> &get_command() const { return this->command_; }
const optional<std::string> &get_media_url() const { return this->media_url_; }
const optional<float> &get_volume() const { return this->volume_; }
const optional<bool> &get_announcement() const { return this->announcement_; }
protected:
void validate_();

View File

@ -0,0 +1,458 @@
"""Speaker Media Player Setup."""
import hashlib
import logging
from pathlib import Path
from esphome import automation, external_files
import esphome.codegen as cg
from esphome.components import audio, esp32, media_player, speaker
import esphome.config_validation as cv
from esphome.const import (
CONF_BUFFER_SIZE,
CONF_FILE,
CONF_FILES,
CONF_FORMAT,
CONF_ID,
CONF_NUM_CHANNELS,
CONF_PATH,
CONF_RAW_DATA_ID,
CONF_SAMPLE_RATE,
CONF_SPEAKER,
CONF_TASK_STACK_IN_PSRAM,
CONF_TYPE,
CONF_URL,
)
from esphome.core import CORE, HexInt
from esphome.core.entity_helpers import inherit_property_from
from esphome.external_files import download_content
_LOGGER = logging.getLogger(__name__)
AUTO_LOAD = ["audio", "psram"]
CODEOWNERS = ["@kahrendt", "@synesthesiam"]
DOMAIN = "media_player"
TYPE_LOCAL = "local"
TYPE_WEB = "web"
CONF_ANNOUNCEMENT = "announcement"
CONF_ANNOUNCEMENT_PIPELINE = "announcement_pipeline"
CONF_CODEC_SUPPORT_ENABLED = "codec_support_enabled"
CONF_ENQUEUE = "enqueue"
CONF_MEDIA_FILE = "media_file"
CONF_MEDIA_PIPELINE = "media_pipeline"
CONF_ON_MUTE = "on_mute"
CONF_ON_UNMUTE = "on_unmute"
CONF_ON_VOLUME = "on_volume"
CONF_STREAM = "stream"
CONF_VOLUME_INCREMENT = "volume_increment"
CONF_VOLUME_MIN = "volume_min"
CONF_VOLUME_MAX = "volume_max"
speaker_ns = cg.esphome_ns.namespace("speaker")
SpeakerMediaPlayer = speaker_ns.class_(
"SpeakerMediaPlayer",
media_player.MediaPlayer,
cg.Component,
)
AudioPipeline = speaker_ns.class_("AudioPipeline")
AudioPipelineType = speaker_ns.enum("AudioPipelineType", is_class=True)
AUDIO_PIPELINE_TYPE_ENUM = {
"MEDIA": AudioPipelineType.MEDIA,
"ANNOUNCEMENT": AudioPipelineType.ANNOUNCEMENT,
}
PlayOnDeviceMediaAction = speaker_ns.class_(
"PlayOnDeviceMediaAction",
automation.Action,
cg.Parented.template(SpeakerMediaPlayer),
)
StopStreamAction = speaker_ns.class_(
"StopStreamAction", automation.Action, cg.Parented.template(SpeakerMediaPlayer)
)
def _compute_local_file_path(value: dict) -> Path:
url = value[CONF_URL]
h = hashlib.new("sha256")
h.update(url.encode())
key = h.hexdigest()[:8]
base_dir = external_files.compute_local_file_dir(DOMAIN)
_LOGGER.debug("_compute_local_file_path: base_dir=%s", base_dir / key)
return base_dir / key
def _download_web_file(value):
url = value[CONF_URL]
path = _compute_local_file_path(value)
download_content(url, path)
_LOGGER.debug("download_web_file: path=%s", path)
return value
# Returns a media_player.MediaPlayerSupportedFormat struct with the configured
# format, sample rate, number of channels, purpose, and bytes per sample
def _get_supported_format_struct(pipeline, type):
args = [
media_player.MediaPlayerSupportedFormat,
]
if pipeline[CONF_FORMAT] == "FLAC":
args.append(("format", "flac"))
elif pipeline[CONF_FORMAT] == "MP3":
args.append(("format", "mp3"))
elif pipeline[CONF_FORMAT] == "WAV":
args.append(("format", "wav"))
args.append(("sample_rate", pipeline[CONF_SAMPLE_RATE]))
args.append(("num_channels", pipeline[CONF_NUM_CHANNELS]))
if type == "MEDIA":
args.append(
(
"purpose",
media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["default"],
)
)
elif type == "ANNOUNCEMENT":
args.append(
(
"purpose",
media_player.MEDIA_PLAYER_FORMAT_PURPOSE_ENUM["announcement"],
)
)
if pipeline[CONF_FORMAT] != "MP3":
args.append(("sample_bytes", 2))
return cg.StructInitializer(*args)
def _file_schema(value):
if isinstance(value, str):
return _validate_file_shorthand(value)
return TYPED_FILE_SCHEMA(value)
def _read_audio_file_and_type(file_config):
conf_file = file_config[CONF_FILE]
file_source = conf_file[CONF_TYPE]
if file_source == TYPE_LOCAL:
path = CORE.relative_config_path(conf_file[CONF_PATH])
elif file_source == TYPE_WEB:
path = _compute_local_file_path(conf_file)
else:
raise cv.Invalid("Unsupported file source.")
with open(path, "rb") as f:
data = f.read()
import puremagic
file_type: str = puremagic.from_string(data)
if file_type.startswith("."):
file_type = file_type[1:]
media_file_type = audio.AUDIO_FILE_TYPE_ENUM["NONE"]
if file_type in ("wav"):
media_file_type = audio.AUDIO_FILE_TYPE_ENUM["WAV"]
elif file_type in ("mp3", "mpeg", "mpga"):
media_file_type = audio.AUDIO_FILE_TYPE_ENUM["MP3"]
elif file_type in ("flac"):
media_file_type = audio.AUDIO_FILE_TYPE_ENUM["FLAC"]
return data, media_file_type
def _validate_file_shorthand(value):
value = cv.string_strict(value)
if value.startswith("http://") or value.startswith("https://"):
return _file_schema(
{
CONF_TYPE: TYPE_WEB,
CONF_URL: value,
}
)
return _file_schema(
{
CONF_TYPE: TYPE_LOCAL,
CONF_PATH: value,
}
)
def _validate_pipeline(config):
# Inherit transcoder settings from speaker if not manually set
inherit_property_from(CONF_NUM_CHANNELS, CONF_SPEAKER)(config)
inherit_property_from(CONF_SAMPLE_RATE, CONF_SPEAKER)(config)
# Validate the transcoder settings is compatible with the speaker
audio.final_validate_audio_schema(
"speaker media_player",
audio_device=CONF_SPEAKER,
bits_per_sample=16,
channels=config.get(CONF_NUM_CHANNELS),
sample_rate=config.get(CONF_SAMPLE_RATE),
)(config)
return config
def _validate_repeated_speaker(config):
if (announcement_config := config.get(CONF_ANNOUNCEMENT_PIPELINE)) and (
media_config := config.get(CONF_MEDIA_PIPELINE)
):
if announcement_config[CONF_SPEAKER] == media_config[CONF_SPEAKER]:
raise cv.Invalid(
"The announcement and media pipelines cannot use the same speaker. Use the `mixer` speaker component to create two source speakers."
)
return config
def _validate_supported_local_file(config):
for file_config in config.get(CONF_FILES, []):
_, media_file_type = _read_audio_file_and_type(file_config)
if str(media_file_type) == str(audio.AUDIO_FILE_TYPE_ENUM["NONE"]):
raise cv.Invalid("Unsupported local media file.")
if not config[CONF_CODEC_SUPPORT_ENABLED] and str(media_file_type) != str(
audio.AUDIO_FILE_TYPE_ENUM["WAV"]
):
# Only wav files are supported
raise cv.Invalid(
f"Unsupported local media file type, set {CONF_CODEC_SUPPORT_ENABLED} to true or convert the media file to wav"
)
return config
LOCAL_SCHEMA = cv.Schema(
{
cv.Required(CONF_PATH): cv.file_,
}
)
WEB_SCHEMA = cv.All(
{
cv.Required(CONF_URL): cv.url,
},
_download_web_file,
)
TYPED_FILE_SCHEMA = cv.typed_schema(
{
TYPE_LOCAL: LOCAL_SCHEMA,
TYPE_WEB: WEB_SCHEMA,
},
)
MEDIA_FILE_TYPE_SCHEMA = cv.Schema(
{
cv.Required(CONF_ID): cv.declare_id(audio.AudioFile),
cv.Required(CONF_FILE): _file_schema,
cv.GenerateID(CONF_RAW_DATA_ID): cv.declare_id(cg.uint8),
}
)
PIPELINE_SCHEMA = cv.Schema(
{
cv.GenerateID(): cv.declare_id(AudioPipeline),
cv.Required(CONF_SPEAKER): cv.use_id(speaker.Speaker),
cv.Optional(CONF_FORMAT, default="FLAC"): cv.enum(audio.AUDIO_FILE_TYPE_ENUM),
cv.Optional(CONF_SAMPLE_RATE): cv.int_range(min=1),
cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2),
}
)
CONFIG_SCHEMA = cv.All(
media_player.MEDIA_PLAYER_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(SpeakerMediaPlayer),
cv.Required(CONF_ANNOUNCEMENT_PIPELINE): PIPELINE_SCHEMA,
cv.Optional(CONF_MEDIA_PIPELINE): PIPELINE_SCHEMA,
cv.Optional(CONF_BUFFER_SIZE, default=1000000): cv.int_range(
min=4000, max=4000000
),
cv.Optional(CONF_CODEC_SUPPORT_ENABLED, default=True): cv.boolean,
cv.Optional(CONF_FILES): cv.ensure_list(MEDIA_FILE_TYPE_SCHEMA),
cv.Optional(CONF_TASK_STACK_IN_PSRAM, default=False): cv.boolean,
cv.Optional(CONF_VOLUME_INCREMENT, default=0.05): cv.percentage,
cv.Optional(CONF_VOLUME_MAX, default=1.0): cv.percentage,
cv.Optional(CONF_VOLUME_MIN, default=0.0): cv.percentage,
cv.Optional(CONF_ON_MUTE): automation.validate_automation(single=True),
cv.Optional(CONF_ON_UNMUTE): automation.validate_automation(single=True),
cv.Optional(CONF_ON_VOLUME): automation.validate_automation(single=True),
}
),
cv.only_with_esp_idf,
_validate_repeated_speaker,
)
FINAL_VALIDATE_SCHEMA = cv.All(
cv.Schema(
{
cv.Optional(CONF_ANNOUNCEMENT_PIPELINE): _validate_pipeline,
cv.Optional(CONF_MEDIA_PIPELINE): _validate_pipeline,
},
extra=cv.ALLOW_EXTRA,
),
_validate_supported_local_file,
)
async def to_code(config):
if config[CONF_CODEC_SUPPORT_ENABLED]:
# Compile all supported audio codecs and optimize the wifi settings
cg.add_define("USE_AUDIO_FLAC_SUPPORT", True)
cg.add_define("USE_AUDIO_MP3_SUPPORT", True)
# Wifi settings based on https://github.com/espressif/esp-adf/issues/297#issuecomment-783811702
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_RX_BUFFER_NUM", 16)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_DYNAMIC_RX_BUFFER_NUM", 512)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_TX_BUFFER", True)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_TX_BUFFER_TYPE", 0)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_STATIC_TX_BUFFER_NUM", 8)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_CACHE_TX_BUFFER_NUM", 32)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_AMPDU_TX_ENABLED", True)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_TX_BA_WIN", 16)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_AMPDU_RX_ENABLED", True)
esp32.add_idf_sdkconfig_option("CONFIG_ESP32_WIFI_RX_BA_WIN", 32)
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_MAX_ACTIVE_TCP", 16)
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_MAX_LISTENING_TCP", 16)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_MAXRTX", 12)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_SYNMAXRTX", 6)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_MSS", 1436)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_MSL", 60000)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_SND_BUF_DEFAULT", 65535)
esp32.add_idf_sdkconfig_option(
"CONFIG_TCP_WND_DEFAULT", 65535
) # Adjusted from referenced settings to avoid compilation error
esp32.add_idf_sdkconfig_option("CONFIG_TCP_RECVMBOX_SIZE", 512)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_QUEUE_OOSEQ", True)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_OVERSIZE_MSS", True)
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_WND_SCALE", True)
esp32.add_idf_sdkconfig_option("CONFIG_TCP_RCV_SCALE", 3)
esp32.add_idf_sdkconfig_option("CONFIG_LWIP_TCPIP_RECVMBOX_SIZE", 512)
# Allocate wifi buffers in PSRAM
esp32.add_idf_sdkconfig_option("CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP", True)
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
await media_player.register_media_player(var, config)
cg.add_define("USE_OTA_STATE_CALLBACK")
cg.add(var.set_buffer_size(config[CONF_BUFFER_SIZE]))
cg.add(var.set_task_stack_in_psram(config[CONF_TASK_STACK_IN_PSRAM]))
if config[CONF_TASK_STACK_IN_PSRAM]:
esp32.add_idf_sdkconfig_option(
"CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True
)
cg.add(var.set_volume_increment(config[CONF_VOLUME_INCREMENT]))
cg.add(var.set_volume_max(config[CONF_VOLUME_MAX]))
cg.add(var.set_volume_min(config[CONF_VOLUME_MIN]))
announcement_pipeline_config = config[CONF_ANNOUNCEMENT_PIPELINE]
spkr = await cg.get_variable(announcement_pipeline_config[CONF_SPEAKER])
cg.add(var.set_announcement_speaker(spkr))
if announcement_pipeline_config[CONF_FORMAT] != "NONE":
cg.add(
var.set_announcement_format(
_get_supported_format_struct(
announcement_pipeline_config, "ANNOUNCEMENT"
)
)
)
if media_pipeline_config := config.get(CONF_MEDIA_PIPELINE):
spkr = await cg.get_variable(media_pipeline_config[CONF_SPEAKER])
cg.add(var.set_media_speaker(spkr))
if media_pipeline_config[CONF_FORMAT] != "NONE":
cg.add(
var.set_media_format(
_get_supported_format_struct(media_pipeline_config, "MEDIA")
)
)
if on_mute := config.get(CONF_ON_MUTE):
await automation.build_automation(
var.get_mute_trigger(),
[],
on_mute,
)
if on_unmute := config.get(CONF_ON_UNMUTE):
await automation.build_automation(
var.get_unmute_trigger(),
[],
on_unmute,
)
if on_volume := config.get(CONF_ON_VOLUME):
await automation.build_automation(
var.get_volume_trigger(),
[(cg.float_, "x")],
on_volume,
)
for file_config in config.get(CONF_FILES, []):
data, media_file_type = _read_audio_file_and_type(file_config)
rhs = [HexInt(x) for x in data]
prog_arr = cg.progmem_array(file_config[CONF_RAW_DATA_ID], rhs)
media_files_struct = cg.StructInitializer(
audio.AudioFile,
(
"data",
prog_arr,
),
(
"length",
len(rhs),
),
(
"file_type",
media_file_type,
),
)
cg.new_Pvariable(
file_config[CONF_ID],
media_files_struct,
)
@automation.register_action(
"media_player.speaker.play_on_device_media_file",
PlayOnDeviceMediaAction,
cv.maybe_simple_value(
{
cv.GenerateID(): cv.use_id(SpeakerMediaPlayer),
cv.Required(CONF_MEDIA_FILE): cv.use_id(audio.AudioFile),
cv.Optional(CONF_ANNOUNCEMENT, default=False): cv.templatable(cv.boolean),
cv.Optional(CONF_ENQUEUE, default=False): cv.templatable(cv.boolean),
},
key=CONF_MEDIA_FILE,
),
)
async def play_on_device_media_media_action(config, action_id, template_arg, args):
var = cg.new_Pvariable(action_id, template_arg)
await cg.register_parented(var, config[CONF_ID])
media_file = await cg.get_variable(config[CONF_MEDIA_FILE])
announcement = await cg.templatable(config[CONF_ANNOUNCEMENT], args, cg.bool_)
enqueue = await cg.templatable(config[CONF_ENQUEUE], args, cg.bool_)
cg.add(var.set_audio_file(media_file))
cg.add(var.set_announcement(announcement))
cg.add(var.set_enqueue(enqueue))
return var

View File

@ -0,0 +1,560 @@
#include "audio_pipeline.h"
#ifdef USE_ESP_IDF
#include "esphome/core/defines.h"
#include "esphome/core/hal.h"
#include "esphome/core/helpers.h"
#include "esphome/core/log.h"
namespace esphome {
namespace speaker {
static const uint32_t INITIAL_BUFFER_MS = 1000; // Start playback after buffering this duration of the file
static const uint32_t READ_TASK_STACK_SIZE = 5 * 1024;
static const uint32_t DECODE_TASK_STACK_SIZE = 3 * 1024;
static const uint32_t INFO_ERROR_QUEUE_COUNT = 5;
static const char *const TAG = "speaker_media_player.pipeline";
enum EventGroupBits : uint32_t {
// MESSAGE_* bits are only set by their respective tasks
// Stops all activity in the pipeline elements; cleared by process_state() and set by stop() or by each task
PIPELINE_COMMAND_STOP = (1 << 0),
// Read audio from an HTTP source; cleared by reader task and set by start_url
READER_COMMAND_INIT_HTTP = (1 << 4),
// Read audio from an audio file from the flash; cleared by reader task and set by start_file
READER_COMMAND_INIT_FILE = (1 << 5),
// Audio file type is read after checking it is supported; cleared by decoder task
READER_MESSAGE_LOADED_MEDIA_TYPE = (1 << 6),
// Reader is done (either through a failure or just end of the stream); cleared by reader task
READER_MESSAGE_FINISHED = (1 << 7),
// Error reading the file; cleared by process_state()
READER_MESSAGE_ERROR = (1 << 8),
// Decoder is done (either through a faiilure or the end of the stream); cleared by decoder task
DECODER_MESSAGE_FINISHED = (1 << 12),
// Error decoding the file; cleared by process_state() by decoder task
DECODER_MESSAGE_ERROR = (1 << 13),
};
AudioPipeline::AudioPipeline(speaker::Speaker *speaker, size_t buffer_size, bool task_stack_in_psram,
std::string base_name, UBaseType_t priority)
: base_name_(std::move(base_name)),
priority_(priority),
task_stack_in_psram_(task_stack_in_psram),
speaker_(speaker),
buffer_size_(buffer_size) {
this->allocate_communications_();
this->transfer_buffer_size_ = std::min(buffer_size_ / 4, DEFAULT_TRANSFER_BUFFER_SIZE);
}
void AudioPipeline::start_url(const std::string &uri) {
if (this->is_playing_) {
xEventGroupSetBits(this->event_group_, PIPELINE_COMMAND_STOP);
}
this->current_uri_ = uri;
this->pending_url_ = true;
}
void AudioPipeline::start_file(audio::AudioFile *audio_file) {
if (this->is_playing_) {
xEventGroupSetBits(this->event_group_, PIPELINE_COMMAND_STOP);
}
this->current_audio_file_ = audio_file;
this->pending_file_ = true;
}
esp_err_t AudioPipeline::stop() {
xEventGroupSetBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP);
return ESP_OK;
}
void AudioPipeline::set_pause_state(bool pause_state) {
this->speaker_->set_pause_state(pause_state);
this->pause_state_ = pause_state;
}
void AudioPipeline::suspend_tasks() {
if (this->read_task_handle_ != nullptr) {
vTaskSuspend(this->read_task_handle_);
}
if (this->decode_task_handle_ != nullptr) {
vTaskSuspend(this->decode_task_handle_);
}
}
void AudioPipeline::resume_tasks() {
if (this->read_task_handle_ != nullptr) {
vTaskResume(this->read_task_handle_);
}
if (this->decode_task_handle_ != nullptr) {
vTaskResume(this->decode_task_handle_);
}
}
AudioPipelineState AudioPipeline::process_state() {
/*
* Log items from info error queue
*/
InfoErrorEvent event;
if (this->info_error_queue_ != nullptr) {
while (xQueueReceive(this->info_error_queue_, &event, 0)) {
switch (event.source) {
case InfoErrorSource::READER:
if (event.err.has_value()) {
ESP_LOGE(TAG, "Media reader encountered an error: %s", esp_err_to_name(event.err.value()));
} else if (event.file_type.has_value()) {
ESP_LOGD(TAG, "Reading %s file type", audio_file_type_to_string(event.file_type.value()));
}
break;
case InfoErrorSource::DECODER:
if (event.err.has_value()) {
ESP_LOGE(TAG, "Decoder encountered an error: %s", esp_err_to_name(event.err.value()));
}
if (event.audio_stream_info.has_value()) {
ESP_LOGD(TAG, "Decoded audio has %d channels, %" PRId32 " Hz sample rate, and %d bits per sample",
event.audio_stream_info.value().get_channels(), event.audio_stream_info.value().get_sample_rate(),
event.audio_stream_info.value().get_bits_per_sample());
}
if (event.decoding_err.has_value()) {
switch (event.decoding_err.value()) {
case DecodingError::FAILED_HEADER:
ESP_LOGE(TAG, "Failed to parse the file's header.");
break;
case DecodingError::INCOMPATIBLE_BITS_PER_SAMPLE:
ESP_LOGE(TAG, "Incompatible bits per sample. Only 16 bits per sample is supported");
break;
case DecodingError::INCOMPATIBLE_CHANNELS:
ESP_LOGE(TAG, "Incompatible number of channels. Only 1 or 2 channel audio is supported.");
break;
}
}
break;
}
}
}
/*
* Determine the current state based on the event group bits and tasks' status
*/
EventBits_t event_bits = xEventGroupGetBits(this->event_group_);
if (this->pending_url_ || this->pending_file_) {
// Init command pending
if (!(event_bits & EventGroupBits::PIPELINE_COMMAND_STOP)) {
// Only start if there is no pending stop command
if ((this->read_task_handle_ == nullptr) || (this->decode_task_handle_ == nullptr)) {
// At least one task isn't running
this->start_tasks_();
}
if (this->pending_url_) {
xEventGroupSetBits(this->event_group_, EventGroupBits::READER_COMMAND_INIT_HTTP);
this->playback_ms_ = 0;
this->pending_url_ = false;
} else if (this->pending_file_) {
xEventGroupSetBits(this->event_group_, EventGroupBits::READER_COMMAND_INIT_FILE);
this->playback_ms_ = 0;
this->pending_file_ = false;
}
this->is_playing_ = true;
return AudioPipelineState::PLAYING;
}
}
if ((event_bits & EventGroupBits::READER_MESSAGE_FINISHED) &&
(!(event_bits & EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE) &&
(event_bits & EventGroupBits::DECODER_MESSAGE_FINISHED))) {
// Tasks are finished and there's no media in between the reader and decoder
if (event_bits & EventGroupBits::PIPELINE_COMMAND_STOP) {
// Stop command is fully processed, so clear the command bit
xEventGroupClearBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP);
}
if (!this->is_playing_) {
// The tasks have been stopped for two ``process_state`` calls in a row, so delete the tasks
if ((this->read_task_handle_ != nullptr) || (this->decode_task_handle_ != nullptr)) {
this->delete_tasks_();
this->speaker_->stop();
}
}
this->is_playing_ = false;
return AudioPipelineState::STOPPED;
}
if ((event_bits & EventGroupBits::READER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, EventGroupBits::READER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_READING;
}
if ((event_bits & EventGroupBits::DECODER_MESSAGE_ERROR)) {
xEventGroupClearBits(this->event_group_, EventGroupBits::DECODER_MESSAGE_ERROR);
return AudioPipelineState::ERROR_DECODING;
}
if (this->pause_state_) {
return AudioPipelineState::PAUSED;
}
if ((this->read_task_handle_ == nullptr) && (this->decode_task_handle_ == nullptr)) {
// No tasks are running, so the pipeline is stopped.
xEventGroupClearBits(this->event_group_, EventGroupBits::PIPELINE_COMMAND_STOP);
return AudioPipelineState::STOPPED;
}
this->is_playing_ = true;
return AudioPipelineState::PLAYING;
}
esp_err_t AudioPipeline::allocate_communications_() {
if (this->event_group_ == nullptr)
this->event_group_ = xEventGroupCreate();
if (this->event_group_ == nullptr) {
return ESP_ERR_NO_MEM;
}
if (this->info_error_queue_ == nullptr)
this->info_error_queue_ = xQueueCreate(INFO_ERROR_QUEUE_COUNT, sizeof(InfoErrorEvent));
if (this->info_error_queue_ == nullptr)
return ESP_ERR_NO_MEM;
return ESP_OK;
}
esp_err_t AudioPipeline::start_tasks_() {
if (this->read_task_handle_ == nullptr) {
if (this->read_task_stack_buffer_ == nullptr) {
if (this->task_stack_in_psram_) {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
this->read_task_stack_buffer_ = stack_allocator.allocate(READ_TASK_STACK_SIZE);
} else {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
this->read_task_stack_buffer_ = stack_allocator.allocate(READ_TASK_STACK_SIZE);
}
}
if (this->read_task_stack_buffer_ == nullptr) {
return ESP_ERR_NO_MEM;
}
if (this->read_task_handle_ == nullptr) {
this->read_task_handle_ =
xTaskCreateStatic(read_task, (this->base_name_ + "_read").c_str(), READ_TASK_STACK_SIZE, (void *) this,
this->priority_, this->read_task_stack_buffer_, &this->read_task_stack_);
}
if (this->read_task_handle_ == nullptr) {
return ESP_ERR_INVALID_STATE;
}
}
if (this->decode_task_handle_ == nullptr) {
if (this->decode_task_stack_buffer_ == nullptr) {
if (this->task_stack_in_psram_) {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
this->decode_task_stack_buffer_ = stack_allocator.allocate(DECODE_TASK_STACK_SIZE);
} else {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
this->decode_task_stack_buffer_ = stack_allocator.allocate(DECODE_TASK_STACK_SIZE);
}
}
if (this->decode_task_stack_buffer_ == nullptr) {
return ESP_ERR_NO_MEM;
}
if (this->decode_task_handle_ == nullptr) {
this->decode_task_handle_ =
xTaskCreateStatic(decode_task, (this->base_name_ + "_decode").c_str(), DECODE_TASK_STACK_SIZE, (void *) this,
this->priority_, this->decode_task_stack_buffer_, &this->decode_task_stack_);
}
if (this->decode_task_handle_ == nullptr) {
return ESP_ERR_INVALID_STATE;
}
}
return ESP_OK;
}
void AudioPipeline::delete_tasks_() {
if (this->read_task_handle_ != nullptr) {
vTaskDelete(this->read_task_handle_);
if (this->read_task_stack_buffer_ != nullptr) {
if (this->task_stack_in_psram_) {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
stack_allocator.deallocate(this->read_task_stack_buffer_, READ_TASK_STACK_SIZE);
} else {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
stack_allocator.deallocate(this->read_task_stack_buffer_, READ_TASK_STACK_SIZE);
}
this->read_task_stack_buffer_ = nullptr;
this->read_task_handle_ = nullptr;
}
}
if (this->decode_task_handle_ != nullptr) {
vTaskDelete(this->decode_task_handle_);
if (this->decode_task_stack_buffer_ != nullptr) {
if (this->task_stack_in_psram_) {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
stack_allocator.deallocate(this->decode_task_stack_buffer_, DECODE_TASK_STACK_SIZE);
} else {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
stack_allocator.deallocate(this->decode_task_stack_buffer_, DECODE_TASK_STACK_SIZE);
}
this->decode_task_stack_buffer_ = nullptr;
this->decode_task_handle_ = nullptr;
}
}
}
void AudioPipeline::read_task(void *params) {
AudioPipeline *this_pipeline = (AudioPipeline *) params;
while (true) {
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED);
// Wait until the pipeline notifies us the source of the media file
EventBits_t event_bits =
xEventGroupWaitBits(this_pipeline->event_group_,
EventGroupBits::READER_COMMAND_INIT_FILE | EventGroupBits::READER_COMMAND_INIT_HTTP |
EventGroupBits::PIPELINE_COMMAND_STOP, // Bit message to read
pdFALSE, // Clear the bit on exit
pdFALSE, // Wait for all the bits,
portMAX_DELAY); // Block indefinitely until bit is set
if (!(event_bits & EventGroupBits::PIPELINE_COMMAND_STOP)) {
xEventGroupClearBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_FINISHED |
EventGroupBits::READER_COMMAND_INIT_FILE |
EventGroupBits::READER_COMMAND_INIT_HTTP);
InfoErrorEvent event;
event.source = InfoErrorSource::READER;
esp_err_t err = ESP_OK;
std::unique_ptr<audio::AudioReader> reader =
make_unique<audio::AudioReader>(this_pipeline->transfer_buffer_size_);
if (event_bits & EventGroupBits::READER_COMMAND_INIT_FILE) {
err = reader->start(this_pipeline->current_audio_file_, this_pipeline->current_audio_file_type_);
} else {
err = reader->start(this_pipeline->current_uri_, this_pipeline->current_audio_file_type_);
}
if (err == ESP_OK) {
size_t file_ring_buffer_size = this_pipeline->buffer_size_;
std::shared_ptr<RingBuffer> temp_ring_buffer;
if (!this_pipeline->raw_file_ring_buffer_.use_count()) {
temp_ring_buffer = RingBuffer::create(file_ring_buffer_size);
this_pipeline->raw_file_ring_buffer_ = temp_ring_buffer;
}
if (!this_pipeline->raw_file_ring_buffer_.use_count()) {
err = ESP_ERR_NO_MEM;
} else {
reader->add_sink(this_pipeline->raw_file_ring_buffer_);
}
}
if (err != ESP_OK) {
// Send specific error message
event.err = err;
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
// Setting up the reader failed, stop the pipeline
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
} else {
// Send the file type to the pipeline
event.file_type = this_pipeline->current_audio_file_type_;
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE);
}
while (true) {
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
if (event_bits & EventGroupBits::PIPELINE_COMMAND_STOP) {
break;
}
audio::AudioReaderState reader_state = reader->read();
if (reader_state == audio::AudioReaderState::FINISHED) {
break;
} else if (reader_state == audio::AudioReaderState::FAILED) {
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::READER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
break;
}
}
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
if ((event_bits & EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE) ||
(this_pipeline->raw_file_ring_buffer_.use_count() == 1)) {
// Decoder task hasn't started yet, so delay a bit before releasing ownership of the ring buffer
delay(10);
}
}
}
}
void AudioPipeline::decode_task(void *params) {
AudioPipeline *this_pipeline = (AudioPipeline *) params;
while (true) {
xEventGroupSetBits(this_pipeline->event_group_, EventGroupBits::DECODER_MESSAGE_FINISHED);
// Wait until the reader notifies us that the media type is available
EventBits_t event_bits = xEventGroupWaitBits(this_pipeline->event_group_,
EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE |
EventGroupBits::PIPELINE_COMMAND_STOP, // Bit message to read
pdFALSE, // Clear the bit on exit
pdFALSE, // Wait for all the bits,
portMAX_DELAY); // Block indefinitely until bit is set
if (!(event_bits & EventGroupBits::PIPELINE_COMMAND_STOP)) {
xEventGroupClearBits(this_pipeline->event_group_,
EventGroupBits::DECODER_MESSAGE_FINISHED | EventGroupBits::READER_MESSAGE_LOADED_MEDIA_TYPE);
InfoErrorEvent event;
event.source = InfoErrorSource::DECODER;
std::unique_ptr<audio::AudioDecoder> decoder =
make_unique<audio::AudioDecoder>(this_pipeline->transfer_buffer_size_, this_pipeline->transfer_buffer_size_);
esp_err_t err = decoder->start(this_pipeline->current_audio_file_type_);
decoder->add_source(this_pipeline->raw_file_ring_buffer_);
if (err != ESP_OK) {
// Send specific error message
event.err = err;
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
// Setting up the decoder failed, stop the pipeline
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
}
bool has_stream_info = false;
bool started_playback = false;
size_t initial_bytes_to_buffer = 0;
while (true) {
event_bits = xEventGroupGetBits(this_pipeline->event_group_);
if (event_bits & EventGroupBits::PIPELINE_COMMAND_STOP) {
break;
}
// Update pause state
if (!started_playback) {
if (!(event_bits & EventGroupBits::READER_MESSAGE_FINISHED)) {
decoder->set_pause_output_state(true);
} else {
started_playback = true;
}
} else {
decoder->set_pause_output_state(this_pipeline->pause_state_);
}
// Stop gracefully if the reader has finished
audio::AudioDecoderState decoder_state = decoder->decode(event_bits & EventGroupBits::READER_MESSAGE_FINISHED);
if ((decoder_state == audio::AudioDecoderState::DECODING) ||
(decoder_state == audio::AudioDecoderState::FINISHED)) {
this_pipeline->playback_ms_ = decoder->get_playback_ms();
}
if (decoder_state == audio::AudioDecoderState::FINISHED) {
break;
} else if (decoder_state == audio::AudioDecoderState::FAILED) {
if (!has_stream_info) {
event.decoding_err = DecodingError::FAILED_HEADER;
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
}
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
break;
}
if (!has_stream_info && decoder->get_audio_stream_info().has_value()) {
has_stream_info = true;
this_pipeline->current_audio_stream_info_ = decoder->get_audio_stream_info().value();
// Send the stream information to the pipeline
event.audio_stream_info = this_pipeline->current_audio_stream_info_;
if (this_pipeline->current_audio_stream_info_.get_bits_per_sample() != 16) {
// Error state, incompatible bits per sample
event.decoding_err = DecodingError::INCOMPATIBLE_BITS_PER_SAMPLE;
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
} else if ((this_pipeline->current_audio_stream_info_.get_channels() > 2)) {
// Error state, incompatible number of channels
event.decoding_err = DecodingError::INCOMPATIBLE_CHANNELS;
xEventGroupSetBits(this_pipeline->event_group_,
EventGroupBits::DECODER_MESSAGE_ERROR | EventGroupBits::PIPELINE_COMMAND_STOP);
} else {
// Send audio directly to the speaker
this_pipeline->speaker_->set_audio_stream_info(this_pipeline->current_audio_stream_info_);
decoder->add_sink(this_pipeline->speaker_);
}
initial_bytes_to_buffer = std::min(this_pipeline->current_audio_stream_info_.ms_to_bytes(INITIAL_BUFFER_MS),
this_pipeline->buffer_size_ * 3 / 4);
switch (this_pipeline->current_audio_file_type_) {
#ifdef USE_AUDIO_MP3_SUPPORT
case audio::AudioFileType::MP3:
initial_bytes_to_buffer /= 8; // Estimate the MP3 compression factor is 8
break;
#endif
#ifdef USE_AUDIO_FLAC_SUPPORT
case audio::AudioFileType::FLAC:
initial_bytes_to_buffer /= 2; // Estimate the FLAC compression factor is 2
break;
#endif
default:
break;
}
xQueueSend(this_pipeline->info_error_queue_, &event, portMAX_DELAY);
}
if (!started_playback && has_stream_info) {
// Verify enough data is available before starting playback
std::shared_ptr<RingBuffer> temp_ring_buffer = this_pipeline->raw_file_ring_buffer_.lock();
if (temp_ring_buffer->available() >= initial_bytes_to_buffer) {
started_playback = true;
}
}
}
}
}
}
} // namespace speaker
} // namespace esphome
#endif

View File

@ -0,0 +1,158 @@
#pragma once
#ifdef USE_ESP_IDF
#include "esphome/components/audio/audio.h"
#include "esphome/components/audio/audio_reader.h"
#include "esphome/components/audio/audio_decoder.h"
#include "esphome/components/speaker/speaker.h"
#include "esphome/core/ring_buffer.h"
#include "esp_err.h"
#include <freertos/FreeRTOS.h>
#include <freertos/event_groups.h>
#include <freertos/queue.h>
namespace esphome {
namespace speaker {
// Internal sink/source buffers for reader and decoder
static const size_t DEFAULT_TRANSFER_BUFFER_SIZE = 24 * 1024;
enum class AudioPipelineType : uint8_t {
MEDIA,
ANNOUNCEMENT,
};
enum class AudioPipelineState : uint8_t {
STARTING_FILE,
STARTING_URL,
PLAYING,
STOPPING,
STOPPED,
PAUSED,
ERROR_READING,
ERROR_DECODING,
};
enum class InfoErrorSource : uint8_t {
READER = 0,
DECODER,
};
enum class DecodingError : uint8_t {
FAILED_HEADER = 0,
INCOMPATIBLE_BITS_PER_SAMPLE,
INCOMPATIBLE_CHANNELS,
};
// Used to pass information from each task.
struct InfoErrorEvent {
InfoErrorSource source;
optional<esp_err_t> err;
optional<audio::AudioFileType> file_type;
optional<audio::AudioStreamInfo> audio_stream_info;
optional<DecodingError> decoding_err;
};
class AudioPipeline {
public:
/// @param speaker ESPHome speaker component for pipeline's audio output
/// @param buffer_size Size of the buffer in bytes between the reader and decoder
/// @param task_stack_in_psram True if the task stack should be allocated in PSRAM, false otherwise
/// @param task_name FreeRTOS task base name
/// @param priority FreeRTOS task priority
AudioPipeline(speaker::Speaker *speaker, size_t buffer_size, bool task_stack_in_psram, std::string base_name,
UBaseType_t priority);
/// @brief Starts an audio pipeline given a media url
/// @param uri media file url
/// @return ESP_OK if successful or an appropriate error if not
void start_url(const std::string &uri);
/// @brief Starts an audio pipeline given a AudioFile pointer
/// @param audio_file pointer to an AudioFile object
/// @return ESP_OK if successful or an appropriate error if not
void start_file(audio::AudioFile *audio_file);
/// @brief Stops the pipeline. Sends a stop signal to each task (if running) and clears the ring buffers.
/// @return ESP_OK if successful or ESP_ERR_TIMEOUT if the tasks did not indicate they stopped
esp_err_t stop();
/// @brief Processes the state of the audio pipeline based on the info_error_queue_ and event_group_. Handles creating
/// and stopping the pipeline tasks. Needs to be regularly called to update the internal pipeline state.
/// @return AudioPipelineState
AudioPipelineState process_state();
/// @brief Suspends any running tasks
void suspend_tasks();
/// @brief Resumes any running tasks
void resume_tasks();
uint32_t get_playback_ms() { return this->playback_ms_; }
void set_pause_state(bool pause_state);
protected:
/// @brief Allocates the event group and info error queue.
/// @return ESP_OK if successful or ESP_ERR_NO_MEM if it is unable to allocate all parts
esp_err_t allocate_communications_();
/// @brief Common start code for the pipeline, regardless if the source is a file or url.
/// @return ESP_OK if successful or an appropriate error if not
esp_err_t start_tasks_();
/// @brief Resets the task related pointers and deallocates their stacks.
void delete_tasks_();
std::string base_name_;
UBaseType_t priority_;
uint32_t playback_ms_{0};
bool is_playing_{false};
bool pause_state_{false};
bool task_stack_in_psram_;
// Pending file start state used to ensure the pipeline fully stops before attempting to start the next file
bool pending_url_{false};
bool pending_file_{false};
speaker::Speaker *speaker_{nullptr};
std::string current_uri_{};
audio::AudioFile *current_audio_file_{nullptr};
audio::AudioFileType current_audio_file_type_;
audio::AudioStreamInfo current_audio_stream_info_;
size_t buffer_size_; // Ring buffer between reader and decoder
size_t transfer_buffer_size_; // Internal source/sink buffers for the audio reader and decoder
std::weak_ptr<RingBuffer> raw_file_ring_buffer_;
// Handles basic control/state of the three tasks
EventGroupHandle_t event_group_{nullptr};
// Receives detailed info (file type, stream info, resampling info) or specific errors from the three tasks
QueueHandle_t info_error_queue_{nullptr};
// Handles reading the media file from flash or a url
static void read_task(void *params);
TaskHandle_t read_task_handle_{nullptr};
StaticTask_t read_task_stack_;
StackType_t *read_task_stack_buffer_{nullptr};
// Decodes the media file into PCM audio
static void decode_task(void *params);
TaskHandle_t decode_task_handle_{nullptr};
StaticTask_t decode_task_stack_;
StackType_t *decode_task_stack_buffer_{nullptr};
};
} // namespace speaker
} // namespace esphome
#endif

View File

@ -0,0 +1,26 @@
#pragma once
#include "speaker_media_player.h"
#ifdef USE_ESP_IDF
#include "esphome/components/audio/audio.h"
#include "esphome/core/automation.h"
namespace esphome {
namespace speaker {
template<typename... Ts> class PlayOnDeviceMediaAction : public Action<Ts...>, public Parented<SpeakerMediaPlayer> {
TEMPLATABLE_VALUE(audio::AudioFile *, audio_file)
TEMPLATABLE_VALUE(bool, announcement)
TEMPLATABLE_VALUE(bool, enqueue)
void play(Ts... x) override {
this->parent_->play_file(this->audio_file_.value(x...), this->announcement_.value(x...),
this->enqueue_.value(x...));
}
};
} // namespace speaker
} // namespace esphome
#endif

View File

@ -0,0 +1,577 @@
#include "speaker_media_player.h"
#ifdef USE_ESP_IDF
#include "esphome/core/log.h"
#include "esphome/components/audio/audio.h"
#ifdef USE_OTA
#include "esphome/components/ota/ota_backend.h"
#endif
namespace esphome {
namespace speaker {
// Framework:
// - Media player that can handle two streams: one for media and one for announcements
// - Each stream has an individual speaker component for output
// - Each stream is handled by an ``AudioPipeline`` object with two parts/tasks
// - ``AudioReader`` handles reading from an HTTP source or from a PROGMEM flash set at compile time
// - ``AudioDecoder`` handles decoding the audio file. All formats are limited to two channels and 16 bits per sample
// - FLAC
// - MP3 (based on the libhelix decoder)
// - WAV
// - Each task runs until it is done processing the file or it receives a stop command
// - Inter-task communication uses a FreeRTOS Event Group
// - The ``AudioPipeline`` sets up a ring buffer between the reader and decoder tasks. The decoder task outputs audio
// directly to a speaker component.
// - The pipelines internal state needs to be processed by regularly calling ``process_state``.
// - Generic media player commands are received by the ``control`` function. The commands are added to the
// ``media_control_command_queue_`` to be processed in the component's loop
// - Local file play back is initiatied with ``play_file`` and adds it to the ``media_control_command_queue_``
// - Starting a stream intializes the appropriate pipeline or stops it if it is already running
// - Volume and mute commands are achieved by the ``mute``, ``unmute``, ``set_volume`` functions.
// - Volume commands are ignored if the media control queue is full to avoid crashing with rapid volume
// increases/decreases.
// - These functions all send the appropriate information to the speakers to implement.
// - Pausing is implemented in the decoder task and is also sent directly to the media speaker component to decrease
// latency.
// - The components main loop performs housekeeping:
// - It reads the media control queue and processes it directly
// - It determines the overall state of the media player by considering the state of each pipeline
// - announcement playback takes highest priority
// - Handles playlists and repeating by starting the appropriate file when a previous file is finished
// - Logging only happens in the main loop task to reduce task stack memory usage.
static const uint32_t MEDIA_CONTROLS_QUEUE_LENGTH = 20;
static const UBaseType_t MEDIA_PIPELINE_TASK_PRIORITY = 1;
static const UBaseType_t ANNOUNCEMENT_PIPELINE_TASK_PRIORITY = 1;
static const float FIRST_BOOT_DEFAULT_VOLUME = 0.5f;
static const char *const TAG = "speaker_media_player";
void SpeakerMediaPlayer::setup() {
state = media_player::MEDIA_PLAYER_STATE_IDLE;
this->media_control_command_queue_ = xQueueCreate(MEDIA_CONTROLS_QUEUE_LENGTH, sizeof(MediaCallCommand));
this->pref_ = global_preferences->make_preference<VolumeRestoreState>(this->get_object_id_hash());
VolumeRestoreState volume_restore_state;
if (this->pref_.load(&volume_restore_state)) {
this->set_volume_(volume_restore_state.volume);
this->set_mute_state_(volume_restore_state.is_muted);
} else {
this->set_volume_(FIRST_BOOT_DEFAULT_VOLUME);
this->set_mute_state_(false);
}
#ifdef USE_OTA
ota::get_global_ota_callback()->add_on_state_callback(
[this](ota::OTAState state, float progress, uint8_t error, ota::OTAComponent *comp) {
if (state == ota::OTA_STARTED) {
if (this->media_pipeline_ != nullptr) {
this->media_pipeline_->suspend_tasks();
}
if (this->announcement_pipeline_ != nullptr) {
this->announcement_pipeline_->suspend_tasks();
}
} else if (state == ota::OTA_ERROR) {
if (this->media_pipeline_ != nullptr) {
this->media_pipeline_->resume_tasks();
}
if (this->announcement_pipeline_ != nullptr) {
this->announcement_pipeline_->resume_tasks();
}
}
});
#endif
this->announcement_pipeline_ =
make_unique<AudioPipeline>(this->announcement_speaker_, this->buffer_size_, this->task_stack_in_psram_, "ann",
ANNOUNCEMENT_PIPELINE_TASK_PRIORITY);
if (this->announcement_pipeline_ == nullptr) {
ESP_LOGE(TAG, "Failed to create announcement pipeline");
this->mark_failed();
}
if (!this->single_pipeline_()) {
this->media_pipeline_ = make_unique<AudioPipeline>(this->media_speaker_, this->buffer_size_,
this->task_stack_in_psram_, "ann", MEDIA_PIPELINE_TASK_PRIORITY);
if (this->media_pipeline_ == nullptr) {
ESP_LOGE(TAG, "Failed to create media pipeline");
this->mark_failed();
}
// Setup callback to track the duration of audio played by the media pipeline
this->media_speaker_->add_audio_output_callback(
[this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
this->playback_ms_ += new_playback_ms;
this->remainder_us_ = remainder_us;
this->pending_ms_ = pending_ms;
this->last_audio_write_timestamp_ = write_timestamp;
this->playback_us_ = this->playback_ms_ * 1000 + this->remainder_us_;
});
}
ESP_LOGI(TAG, "Set up speaker media player");
}
void SpeakerMediaPlayer::set_playlist_delay_ms(AudioPipelineType pipeline_type, uint32_t delay_ms) {
switch (pipeline_type) {
case AudioPipelineType::ANNOUNCEMENT:
this->announcement_playlist_delay_ms_ = delay_ms;
break;
case AudioPipelineType::MEDIA:
this->media_playlist_delay_ms_ = delay_ms;
break;
}
}
void SpeakerMediaPlayer::watch_media_commands_() {
if (!this->is_ready()) {
return;
}
MediaCallCommand media_command;
esp_err_t err = ESP_OK;
if (xQueueReceive(this->media_control_command_queue_, &media_command, 0) == pdTRUE) {
bool new_url = media_command.new_url.has_value() && media_command.new_url.value();
bool new_file = media_command.new_file.has_value() && media_command.new_file.value();
if (new_url || new_file) {
bool enqueue = media_command.enqueue.has_value() && media_command.enqueue.value();
if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) {
// Announcement playlist/pipeline
if (!enqueue) {
// Clear the queue and ensure the loaded next item doesn't start playing
this->cancel_timeout("next_ann");
this->announcement_playlist_.clear();
}
PlaylistItem playlist_item;
if (new_url) {
playlist_item.url = this->announcement_url_;
if (!enqueue) {
// Not adding to the queue, so directly start playback and internally unpause the pipeline
this->announcement_pipeline_->start_url(playlist_item.url.value());
this->announcement_pipeline_->set_pause_state(false);
}
} else {
playlist_item.file = this->announcement_file_;
if (!enqueue) {
// Not adding to the queue, so directly start playback and internally unpause the pipeline
this->announcement_pipeline_->start_file(playlist_item.file.value());
this->announcement_pipeline_->set_pause_state(false);
}
}
this->announcement_playlist_.push_back(playlist_item);
} else {
// Media playlist/pipeline
if (!enqueue) {
// Clear the queue and ensure the loaded next item doesn't start playing
this->cancel_timeout("next_media");
this->media_playlist_.clear();
}
this->is_paused_ = false;
PlaylistItem playlist_item;
if (new_url) {
playlist_item.url = this->media_url_;
if (!enqueue) {
// Not adding to the queue, so directly start playback and internally unpause the pipeline
this->media_pipeline_->start_url(playlist_item.url.value());
this->media_pipeline_->set_pause_state(false);
}
} else {
playlist_item.file = this->media_file_;
if (!enqueue) {
// Not adding to the queue, so directly start playback and internally unpause the pipeline
this->media_pipeline_->start_file(playlist_item.file.value());
this->media_pipeline_->set_pause_state(false);
}
}
this->media_playlist_.push_back(playlist_item);
}
if (err != ESP_OK) {
ESP_LOGE(TAG, "Error starting the audio pipeline: %s", esp_err_to_name(err));
this->status_set_error();
} else {
this->status_clear_error();
}
return; // Don't process the new file play command further
}
if (media_command.volume.has_value()) {
this->set_volume_(media_command.volume.value());
this->publish_state();
}
if (media_command.command.has_value()) {
switch (media_command.command.value()) {
case media_player::MEDIA_PLAYER_COMMAND_PLAY:
if ((this->media_pipeline_ != nullptr) && (this->is_paused_)) {
this->media_pipeline_->set_pause_state(false);
}
this->is_paused_ = false;
break;
case media_player::MEDIA_PLAYER_COMMAND_PAUSE:
if ((this->media_pipeline_ != nullptr) && (!this->is_paused_)) {
this->media_pipeline_->set_pause_state(true);
}
this->is_paused_ = true;
break;
case media_player::MEDIA_PLAYER_COMMAND_STOP:
if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) {
if (this->announcement_pipeline_ != nullptr) {
this->cancel_timeout("next_ann");
this->announcement_playlist_.clear();
this->announcement_pipeline_->stop();
}
} else {
if (this->media_pipeline_ != nullptr) {
this->cancel_timeout("next_media");
this->media_playlist_.clear();
this->media_pipeline_->stop();
}
}
break;
case media_player::MEDIA_PLAYER_COMMAND_TOGGLE:
if (this->media_pipeline_ != nullptr) {
if (this->is_paused_) {
this->media_pipeline_->set_pause_state(false);
this->is_paused_ = false;
} else {
this->media_pipeline_->set_pause_state(true);
this->is_paused_ = true;
}
}
break;
case media_player::MEDIA_PLAYER_COMMAND_MUTE: {
this->set_mute_state_(true);
this->publish_state();
break;
}
case media_player::MEDIA_PLAYER_COMMAND_UNMUTE:
this->set_mute_state_(false);
this->publish_state();
break;
case media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP:
this->set_volume_(std::min(1.0f, this->volume + this->volume_increment_));
this->publish_state();
break;
case media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN:
this->set_volume_(std::max(0.0f, this->volume - this->volume_increment_));
this->publish_state();
break;
case media_player::MEDIA_PLAYER_COMMAND_REPEAT_ONE:
if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) {
this->announcement_repeat_one_ = true;
} else {
this->media_repeat_one_ = true;
}
break;
case media_player::MEDIA_PLAYER_COMMAND_REPEAT_OFF:
if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) {
this->announcement_repeat_one_ = false;
} else {
this->media_repeat_one_ = false;
}
break;
case media_player::MEDIA_PLAYER_COMMAND_CLEAR_PLAYLIST:
if (this->single_pipeline_() || (media_command.announce.has_value() && media_command.announce.value())) {
if (this->announcement_playlist_.empty()) {
this->announcement_playlist_.resize(1);
}
} else {
if (this->media_playlist_.empty()) {
this->media_playlist_.resize(1);
}
}
break;
default:
break;
}
}
}
}
void SpeakerMediaPlayer::loop() {
this->watch_media_commands_();
// Determine state of the media player
media_player::MediaPlayerState old_state = this->state;
AudioPipelineState old_media_pipeline_state = this->media_pipeline_state_;
if (this->media_pipeline_ != nullptr) {
this->media_pipeline_state_ = this->media_pipeline_->process_state();
this->decoded_playback_ms_ = this->media_pipeline_->get_playback_ms();
}
if (this->media_pipeline_state_ == AudioPipelineState::ERROR_READING) {
ESP_LOGE(TAG, "The media pipeline's file reader encountered an error.");
} else if (this->media_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
ESP_LOGE(TAG, "The media pipeline's audio decoder encountered an error.");
}
AudioPipelineState old_announcement_pipeline_state = this->announcement_pipeline_state_;
if (this->announcement_pipeline_ != nullptr) {
this->announcement_pipeline_state_ = this->announcement_pipeline_->process_state();
}
if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_READING) {
ESP_LOGE(TAG, "The announcement pipeline's file reader encountered an error.");
} else if (this->announcement_pipeline_state_ == AudioPipelineState::ERROR_DECODING) {
ESP_LOGE(TAG, "The announcement pipeline's audio decoder encountered an error.");
}
if (this->announcement_pipeline_state_ != AudioPipelineState::STOPPED) {
this->state = media_player::MEDIA_PLAYER_STATE_ANNOUNCING;
} else {
if (!this->announcement_playlist_.empty()) {
uint32_t timeout_ms = 0;
if (old_announcement_pipeline_state == AudioPipelineState::PLAYING) {
// Finished the current announcement file
if (!this->announcement_repeat_one_) {
// Pop item off the playlist if repeat is disabled
this->announcement_playlist_.pop_front();
}
// Only delay starting playback if moving on the next playlist item or repeating the current item
timeout_ms = this->announcement_playlist_delay_ms_;
}
if (!this->announcement_playlist_.empty()) {
// Start the next announcement file
PlaylistItem playlist_item = this->announcement_playlist_.front();
if (playlist_item.url.has_value()) {
this->announcement_pipeline_->start_url(playlist_item.url.value());
} else if (playlist_item.file.has_value()) {
this->announcement_pipeline_->start_file(playlist_item.file.value());
}
if (timeout_ms > 0) {
// Pause pipeline internally to facilitiate delay between items
this->announcement_pipeline_->set_pause_state(true);
// Internally unpause the pipeline after the delay between playlist items
this->set_timeout("next_ann", timeout_ms,
[this]() { this->announcement_pipeline_->set_pause_state(this->is_paused_); });
}
}
} else {
if (this->is_paused_) {
this->state = media_player::MEDIA_PLAYER_STATE_PAUSED;
} else if (this->media_pipeline_state_ == AudioPipelineState::PLAYING) {
this->state = media_player::MEDIA_PLAYER_STATE_PLAYING;
} else if (this->media_pipeline_state_ == AudioPipelineState::STOPPED) {
// Reset playback durations
this->decoded_playback_ms_ = 0;
this->playback_us_ = 0;
this->playback_ms_ = 0;
this->remainder_us_ = 0;
this->pending_ms_ = 0;
if (!media_playlist_.empty()) {
uint32_t timeout_ms = 0;
if (old_media_pipeline_state == AudioPipelineState::PLAYING) {
// Finished the current media file
if (!this->media_repeat_one_) {
// Pop item off the playlist if repeat is disabled
this->media_playlist_.pop_front();
}
// Only delay starting playback if moving on the next playlist item or repeating the current item
timeout_ms = this->announcement_playlist_delay_ms_;
}
if (!this->media_playlist_.empty()) {
PlaylistItem playlist_item = this->media_playlist_.front();
if (playlist_item.url.has_value()) {
this->media_pipeline_->start_url(playlist_item.url.value());
} else if (playlist_item.file.has_value()) {
this->media_pipeline_->start_file(playlist_item.file.value());
}
if (timeout_ms > 0) {
// Pause pipeline internally to facilitiate delay between items
this->media_pipeline_->set_pause_state(true);
// Internally unpause the pipeline after the delay between playlist items
this->set_timeout("next_media", timeout_ms,
[this]() { this->media_pipeline_->set_pause_state(this->is_paused_); });
}
}
} else {
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
}
}
}
}
if (this->state != old_state) {
this->publish_state();
ESP_LOGD(TAG, "State changed to %s", media_player::media_player_state_to_string(this->state));
}
}
void SpeakerMediaPlayer::play_file(audio::AudioFile *media_file, bool announcement, bool enqueue) {
if (!this->is_ready()) {
// Ignore any commands sent before the media player is setup
return;
}
MediaCallCommand media_command;
media_command.new_file = true;
if (this->single_pipeline_() || announcement) {
this->announcement_file_ = media_file;
media_command.announce = true;
} else {
this->media_file_ = media_file;
media_command.announce = false;
}
media_command.enqueue = enqueue;
xQueueSend(this->media_control_command_queue_, &media_command, portMAX_DELAY);
}
void SpeakerMediaPlayer::control(const media_player::MediaPlayerCall &call) {
if (!this->is_ready()) {
// Ignore any commands sent before the media player is setup
return;
}
MediaCallCommand media_command;
if (this->single_pipeline_() || (call.get_announcement().has_value() && call.get_announcement().value())) {
media_command.announce = true;
} else {
media_command.announce = false;
}
if (call.get_media_url().has_value()) {
std::string new_uri = call.get_media_url().value();
media_command.new_url = true;
if (this->single_pipeline_() || (call.get_announcement().has_value() && call.get_announcement().value())) {
this->announcement_url_ = new_uri;
} else {
this->media_url_ = new_uri;
}
if (call.get_command().has_value()) {
if (call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_ENQUEUE) {
media_command.enqueue = true;
}
}
xQueueSend(this->media_control_command_queue_, &media_command, portMAX_DELAY);
return;
}
if (call.get_volume().has_value()) {
media_command.volume = call.get_volume().value();
// Wait 0 ticks for queue to be free, volume sets aren't that important!
xQueueSend(this->media_control_command_queue_, &media_command, 0);
return;
}
if (call.get_command().has_value()) {
media_command.command = call.get_command().value();
TickType_t ticks_to_wait = portMAX_DELAY;
if ((call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_UP) ||
(call.get_command().value() == media_player::MEDIA_PLAYER_COMMAND_VOLUME_DOWN)) {
ticks_to_wait = 0; // Wait 0 ticks for queue to be free, volume sets aren't that important!
}
xQueueSend(this->media_control_command_queue_, &media_command, ticks_to_wait);
return;
}
}
media_player::MediaPlayerTraits SpeakerMediaPlayer::get_traits() {
auto traits = media_player::MediaPlayerTraits();
if (!this->single_pipeline_()) {
traits.set_supports_pause(true);
}
if (this->announcement_format_.has_value()) {
traits.get_supported_formats().push_back(this->announcement_format_.value());
}
if (this->media_format_.has_value()) {
traits.get_supported_formats().push_back(this->media_format_.value());
} else if (this->single_pipeline_() && this->announcement_format_.has_value()) {
// Only one pipeline is defined, so use the announcement format (if configured) for the default purpose
media_player::MediaPlayerSupportedFormat media_format = this->announcement_format_.value();
media_format.purpose = media_player::MediaPlayerFormatPurpose::PURPOSE_DEFAULT;
traits.get_supported_formats().push_back(media_format);
}
return traits;
};
void SpeakerMediaPlayer::save_volume_restore_state_() {
VolumeRestoreState volume_restore_state;
volume_restore_state.volume = this->volume;
volume_restore_state.is_muted = this->is_muted_;
this->pref_.save(&volume_restore_state);
}
void SpeakerMediaPlayer::set_mute_state_(bool mute_state) {
if (this->media_speaker_ != nullptr) {
this->media_speaker_->set_mute_state(mute_state);
}
if (this->announcement_speaker_ != nullptr) {
this->announcement_speaker_->set_mute_state(mute_state);
}
bool old_mute_state = this->is_muted_;
this->is_muted_ = mute_state;
this->save_volume_restore_state_();
if (old_mute_state != mute_state) {
if (mute_state) {
this->defer([this]() { this->mute_trigger_->trigger(); });
} else {
this->defer([this]() { this->unmute_trigger_->trigger(); });
}
}
}
void SpeakerMediaPlayer::set_volume_(float volume, bool publish) {
// Remap the volume to fit with in the configured limits
float bounded_volume = remap<float, float>(volume, 0.0f, 1.0f, this->volume_min_, this->volume_max_);
if (this->media_speaker_ != nullptr) {
this->media_speaker_->set_volume(bounded_volume);
}
if (this->announcement_speaker_ != nullptr) {
this->announcement_speaker_->set_volume(bounded_volume);
}
if (publish) {
this->volume = volume;
this->save_volume_restore_state_();
}
// Turn on the mute state if the volume is effectively zero, off otherwise
if (volume < 0.001) {
this->set_mute_state_(true);
} else {
this->set_mute_state_(false);
}
this->defer([this, volume]() { this->volume_trigger_->trigger(volume); });
}
} // namespace speaker
} // namespace esphome
#endif

View File

@ -0,0 +1,160 @@
#pragma once
#ifdef USE_ESP_IDF
#include "audio_pipeline.h"
#include "esphome/components/audio/audio.h"
#include "esphome/components/media_player/media_player.h"
#include "esphome/components/speaker/speaker.h"
#include "esphome/core/automation.h"
#include "esphome/core/component.h"
#include "esphome/core/preferences.h"
#include <deque>
#include <freertos/FreeRTOS.h>
#include <freertos/queue.h>
namespace esphome {
namespace speaker {
struct MediaCallCommand {
optional<media_player::MediaPlayerCommand> command;
optional<float> volume;
optional<bool> announce;
optional<bool> new_url;
optional<bool> new_file;
optional<bool> enqueue;
};
struct PlaylistItem {
optional<std::string> url;
optional<audio::AudioFile *> file;
};
struct VolumeRestoreState {
float volume;
bool is_muted;
};
class SpeakerMediaPlayer : public Component, public media_player::MediaPlayer {
public:
float get_setup_priority() const override { return esphome::setup_priority::PROCESSOR; }
void setup() override;
void loop() override;
// MediaPlayer implementations
media_player::MediaPlayerTraits get_traits() override;
bool is_muted() const override { return this->is_muted_; }
void set_buffer_size(size_t buffer_size) { this->buffer_size_ = buffer_size; }
void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; }
// Percentage to increase or decrease the volume for volume up or volume down commands
void set_volume_increment(float volume_increment) { this->volume_increment_ = volume_increment; }
void set_volume_max(float volume_max) { this->volume_max_ = volume_max; }
void set_volume_min(float volume_min) { this->volume_min_ = volume_min; }
void set_announcement_speaker(Speaker *announcement_speaker) { this->announcement_speaker_ = announcement_speaker; }
void set_announcement_format(const media_player::MediaPlayerSupportedFormat &announcement_format) {
this->announcement_format_ = announcement_format;
}
void set_media_speaker(Speaker *media_speaker) { this->media_speaker_ = media_speaker; }
void set_media_format(const media_player::MediaPlayerSupportedFormat &media_format) {
this->media_format_ = media_format;
}
Trigger<> *get_mute_trigger() const { return this->mute_trigger_; }
Trigger<> *get_unmute_trigger() const { return this->unmute_trigger_; }
Trigger<float> *get_volume_trigger() const { return this->volume_trigger_; }
void play_file(audio::AudioFile *media_file, bool announcement, bool enqueue);
uint32_t get_playback_ms() const { return this->playback_ms_; }
uint32_t get_playback_us() const { return this->playback_us_; }
uint32_t get_decoded_playback_ms() const { return this->decoded_playback_ms_; }
void set_playlist_delay_ms(AudioPipelineType pipeline_type, uint32_t delay_ms);
protected:
// Receives commands from HA or from the voice assistant component
// Sends commands to the media_control_commanda_queue_
void control(const media_player::MediaPlayerCall &call) override;
/// @brief Updates this->volume and saves volume/mute state to flash for restortation if publish is true.
void set_volume_(float volume, bool publish = true);
/// @brief Sets the mute state. Restores previous volume if unmuting. Always saves volume/mute state to flash for
/// restoration.
/// @param mute_state If true, audio will be muted. If false, audio will be unmuted
void set_mute_state_(bool mute_state);
/// @brief Saves the current volume and mute state to the flash for restoration.
void save_volume_restore_state_();
/// Returns true if the media player has only the announcement pipeline defined, false if both the announcement and
/// media pipelines are defined.
inline bool single_pipeline_() { return (this->media_speaker_ == nullptr); }
// Processes commands from media_control_command_queue_.
void watch_media_commands_();
std::unique_ptr<AudioPipeline> announcement_pipeline_;
std::unique_ptr<AudioPipeline> media_pipeline_;
Speaker *media_speaker_{nullptr};
Speaker *announcement_speaker_{nullptr};
optional<media_player::MediaPlayerSupportedFormat> media_format_;
AudioPipelineState media_pipeline_state_{AudioPipelineState::STOPPED};
std::string media_url_{}; // only modified by control function
audio::AudioFile *media_file_{}; // only modified by play_file function
bool media_repeat_one_{false};
uint32_t media_playlist_delay_ms_{0};
optional<media_player::MediaPlayerSupportedFormat> announcement_format_;
AudioPipelineState announcement_pipeline_state_{AudioPipelineState::STOPPED};
std::string announcement_url_{}; // only modified by control function
audio::AudioFile *announcement_file_{}; // only modified by play_file function
bool announcement_repeat_one_{false};
uint32_t announcement_playlist_delay_ms_{0};
QueueHandle_t media_control_command_queue_;
std::deque<PlaylistItem> announcement_playlist_;
std::deque<PlaylistItem> media_playlist_;
size_t buffer_size_;
bool task_stack_in_psram_;
bool is_paused_{false};
bool is_muted_{false};
// The amount to change the volume on volume up/down commands
float volume_increment_;
float volume_max_;
float volume_min_;
// Used to save volume/mute state for restoration on reboot
ESPPreferenceObject pref_;
Trigger<> *mute_trigger_ = new Trigger<>();
Trigger<> *unmute_trigger_ = new Trigger<>();
Trigger<float> *volume_trigger_ = new Trigger<float>();
uint32_t decoded_playback_ms_{0};
uint32_t playback_us_{0};
uint32_t playback_ms_{0};
uint32_t remainder_us_{0};
uint32_t pending_ms_{0};
uint32_t last_audio_write_timestamp_{0};
};
} // namespace speaker
} // namespace esphome
#endif

View File

@ -21,6 +21,8 @@ media_player:
- media_player.pause:
on_play:
- media_player.stop:
- media_player.stop:
announcement: true
on_pause:
- media_player.toggle:
- wait_until:

View File

@ -0,0 +1,12 @@
<<: !include common.yaml
media_player:
- platform: speaker
id: speaker_media_player_id
announcement_pipeline:
speaker: speaker_id
buffer_size: 1000000
volume_increment: 0.02
volume_max: 0.95
volume_min: 0.0
task_stack_in_psram: true

View File

@ -0,0 +1,9 @@
substitutions:
scl_pin: GPIO16
sda_pin: GPIO17
i2s_bclk_pin: GPIO27
i2s_lrclk_pin: GPIO26
i2s_mclk_pin: GPIO25
i2s_dout_pin: GPIO23
<<: !include common-media_player.yaml

View File

@ -0,0 +1,9 @@
substitutions:
scl_pin: GPIO2
sda_pin: GPIO3
i2s_bclk_pin: GPIO4
i2s_lrclk_pin: GPIO5
i2s_mclk_pin: GPIO6
i2s_dout_pin: GPIO7
<<: !include common-media_player.yaml