1
0
mirror of https://github.com/esphome/esphome.git synced 2025-03-12 13:48:14 +00:00

[resampler] Media Player Components PR7 (#8169)

Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
Kevin Ahrendt 2025-02-04 14:18:11 -06:00 committed by GitHub
parent bd34697715
commit 847cff06b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 590 additions and 4 deletions

View File

@ -343,6 +343,7 @@ esphome/components/radon_eye_rd200/* @jeffeb3
esphome/components/rc522/* @glmnet esphome/components/rc522/* @glmnet
esphome/components/rc522_i2c/* @glmnet esphome/components/rc522_i2c/* @glmnet
esphome/components/rc522_spi/* @glmnet esphome/components/rc522_spi/* @glmnet
esphome/components/resampler/speaker/* @kahrendt
esphome/components/restart/* @esphome/core esphome/components/restart/* @esphome/core
esphome/components/rf_bridge/* @jesserockz esphome/components/rf_bridge/* @jesserockz
esphome/components/rgbct/* @jesserockz esphome/components/rgbct/* @jesserockz

View File

@ -280,8 +280,7 @@ FileDecoderState AudioDecoder::decode_mp3_() {
if (err) { if (err) {
switch (err) { switch (err) {
case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY: case esp_audio_libs::helix_decoder::ERR_MP3_OUT_OF_MEMORY:
return FileDecoderState::FAILED; // Intentional fallthrough
break;
case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER: case esp_audio_libs::helix_decoder::ERR_MP3_NULL_POINTER:
return FileDecoderState::FAILED; return FileDecoderState::FAILED;
break; break;

View File

@ -5,12 +5,13 @@
#include "audio.h" #include "audio.h"
#include "audio_transfer_buffer.h" #include "audio_transfer_buffer.h"
#include "esphome/core/defines.h"
#include "esphome/core/ring_buffer.h"
#ifdef USE_SPEAKER #ifdef USE_SPEAKER
#include "esphome/components/speaker/speaker.h" #include "esphome/components/speaker/speaker.h"
#endif #endif
#include "esphome/core/ring_buffer.h"
#include "esp_err.h" #include "esp_err.h"
#include <resampler.h> // esp-audio-libs #include <resampler.h> // esp-audio-libs

View File

View File

@ -0,0 +1,103 @@
import esphome.codegen as cg
from esphome.components import audio, esp32, speaker
import esphome.config_validation as cv
from esphome.const import (
CONF_BITS_PER_SAMPLE,
CONF_BUFFER_DURATION,
CONF_FILTERS,
CONF_ID,
CONF_NUM_CHANNELS,
CONF_OUTPUT_SPEAKER,
CONF_SAMPLE_RATE,
CONF_TASK_STACK_IN_PSRAM,
PLATFORM_ESP32,
)
from esphome.core.entity_helpers import inherit_property_from
AUTO_LOAD = ["audio"]
CODEOWNERS = ["@kahrendt"]
resampler_ns = cg.esphome_ns.namespace("resampler")
ResamplerSpeaker = resampler_ns.class_(
"ResamplerSpeaker", cg.Component, speaker.Speaker
)
CONF_TAPS = "taps"
def _set_stream_limits(config):
audio.set_stream_limits(
min_bits_per_sample=16,
max_bits_per_sample=32,
)(config)
return config
def _validate_audio_compatability(config):
inherit_property_from(CONF_BITS_PER_SAMPLE, CONF_OUTPUT_SPEAKER)(config)
inherit_property_from(CONF_NUM_CHANNELS, CONF_OUTPUT_SPEAKER)(config)
inherit_property_from(CONF_SAMPLE_RATE, CONF_OUTPUT_SPEAKER)(config)
audio.final_validate_audio_schema(
"source_speaker",
audio_device=CONF_OUTPUT_SPEAKER,
bits_per_sample=config.get(CONF_BITS_PER_SAMPLE),
channels=config.get(CONF_NUM_CHANNELS),
sample_rate=config.get(CONF_SAMPLE_RATE),
)(config)
def _validate_taps(taps):
value = cv.int_range(min=16, max=128)(taps)
if value % 4 != 0:
raise cv.Invalid("Number of taps must be divisible by 4")
return value
CONFIG_SCHEMA = cv.All(
speaker.SPEAKER_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(ResamplerSpeaker),
cv.Required(CONF_OUTPUT_SPEAKER): cv.use_id(speaker.Speaker),
cv.Optional(
CONF_BUFFER_DURATION, default="100ms"
): cv.positive_time_period_milliseconds,
cv.SplitDefault(CONF_TASK_STACK_IN_PSRAM, esp32_idf=False): cv.All(
cv.boolean, cv.only_with_esp_idf
),
cv.Optional(CONF_FILTERS, default=16): cv.int_range(min=2, max=1024),
cv.Optional(CONF_TAPS, default=16): _validate_taps,
}
).extend(cv.COMPONENT_SCHEMA),
cv.only_on([PLATFORM_ESP32]),
_set_stream_limits,
)
FINAL_VALIDATE_SCHEMA = _validate_audio_compatability
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
await speaker.register_speaker(var, config)
output_spkr = await cg.get_variable(config[CONF_OUTPUT_SPEAKER])
cg.add(var.set_output_speaker(output_spkr))
cg.add(var.set_buffer_duration(config[CONF_BUFFER_DURATION]))
if task_stack_in_psram := config.get(CONF_TASK_STACK_IN_PSRAM):
cg.add(var.set_task_stack_in_psram(task_stack_in_psram))
if task_stack_in_psram:
if config[CONF_TASK_STACK_IN_PSRAM]:
esp32.add_idf_sdkconfig_option(
"CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY", True
)
cg.add(var.set_target_bits_per_sample(config[CONF_BITS_PER_SAMPLE]))
cg.add(var.set_target_sample_rate(config[CONF_SAMPLE_RATE]))
cg.add(var.set_filters(config[CONF_FILTERS]))
cg.add(var.set_taps(config[CONF_TAPS]))

View File

@ -0,0 +1,318 @@
#include "resampler_speaker.h"
#ifdef USE_ESP32
#include "esphome/components/audio/audio_resampler.h"
#include "esphome/core/helpers.h"
#include "esphome/core/log.h"
#include <algorithm>
#include <cstring>
namespace esphome {
namespace resampler {
static const UBaseType_t RESAMPLER_TASK_PRIORITY = 1;
static const uint32_t TRANSFER_BUFFER_DURATION_MS = 50;
static const uint32_t TASK_DELAY_MS = 20;
static const uint32_t TASK_STACK_SIZE = 3072;
static const char *const TAG = "resampler_speaker";
enum ResamplingEventGroupBits : uint32_t {
COMMAND_STOP = (1 << 0), // stops the resampler task
STATE_STARTING = (1 << 10),
STATE_RUNNING = (1 << 11),
STATE_STOPPING = (1 << 12),
STATE_STOPPED = (1 << 13),
ERR_ESP_NO_MEM = (1 << 19),
ERR_ESP_NOT_SUPPORTED = (1 << 20),
ERR_ESP_FAIL = (1 << 21),
ALL_BITS = 0x00FFFFFF, // All valid FreeRTOS event group bits
};
void ResamplerSpeaker::setup() {
this->event_group_ = xEventGroupCreate();
if (this->event_group_ == nullptr) {
ESP_LOGE(TAG, "Failed to create event group");
this->mark_failed();
return;
}
this->output_speaker_->add_audio_output_callback(
[this](uint32_t new_playback_ms, uint32_t remainder_us, uint32_t pending_ms, uint32_t write_timestamp) {
int32_t adjustment = this->playback_differential_ms_;
this->playback_differential_ms_ -= adjustment;
int32_t adjusted_playback_ms = static_cast<int32_t>(new_playback_ms) + adjustment;
this->audio_output_callback_(adjusted_playback_ms, remainder_us, pending_ms, write_timestamp);
});
}
void ResamplerSpeaker::loop() {
uint32_t event_group_bits = xEventGroupGetBits(this->event_group_);
if (event_group_bits & ResamplingEventGroupBits::STATE_STARTING) {
ESP_LOGD(TAG, "Starting resampler task");
xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::STATE_STARTING);
}
if (event_group_bits & ResamplingEventGroupBits::ERR_ESP_NO_MEM) {
this->status_set_error("Resampler task failed to allocate the internal buffers");
xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ERR_ESP_NO_MEM);
this->state_ = speaker::STATE_STOPPING;
}
if (event_group_bits & ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED) {
this->status_set_error("Cannot resample due to an unsupported audio stream");
xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED);
this->state_ = speaker::STATE_STOPPING;
}
if (event_group_bits & ResamplingEventGroupBits::ERR_ESP_FAIL) {
this->status_set_error("Resampler task failed");
xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ERR_ESP_FAIL);
this->state_ = speaker::STATE_STOPPING;
}
if (event_group_bits & ResamplingEventGroupBits::STATE_RUNNING) {
ESP_LOGD(TAG, "Started resampler task");
this->status_clear_error();
xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::STATE_RUNNING);
}
if (event_group_bits & ResamplingEventGroupBits::STATE_STOPPING) {
ESP_LOGD(TAG, "Stopping resampler task");
xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::STATE_STOPPING);
}
if (event_group_bits & ResamplingEventGroupBits::STATE_STOPPED) {
if (this->delete_task_() == ESP_OK) {
ESP_LOGD(TAG, "Stopped resampler task");
xEventGroupClearBits(this->event_group_, ResamplingEventGroupBits::ALL_BITS);
}
}
switch (this->state_) {
case speaker::STATE_STARTING: {
esp_err_t err = this->start_();
if (err == ESP_OK) {
this->status_clear_error();
this->state_ = speaker::STATE_RUNNING;
} else {
switch (err) {
case ESP_ERR_INVALID_STATE:
this->status_set_error("Failed to start resampler: resampler task failed to start");
break;
case ESP_ERR_NO_MEM:
this->status_set_error("Failed to start resampler: not enough memory for task stack");
default:
this->status_set_error("Failed to start resampler");
break;
}
this->state_ = speaker::STATE_STOPPING;
}
break;
}
case speaker::STATE_RUNNING:
if (this->output_speaker_->is_stopped()) {
this->state_ = speaker::STATE_STOPPING;
}
break;
case speaker::STATE_STOPPING:
this->stop_();
this->state_ = speaker::STATE_STOPPED;
break;
case speaker::STATE_STOPPED:
break;
}
}
size_t ResamplerSpeaker::play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) {
if (this->is_stopped()) {
this->start();
}
size_t bytes_written = 0;
if ((this->output_speaker_->is_running()) && (!this->requires_resampling_())) {
bytes_written = this->output_speaker_->play(data, length, ticks_to_wait);
} else {
if (this->ring_buffer_.use_count() == 1) {
std::shared_ptr<RingBuffer> temp_ring_buffer = this->ring_buffer_.lock();
bytes_written = temp_ring_buffer->write_without_replacement(data, length, ticks_to_wait);
}
}
return bytes_written;
}
void ResamplerSpeaker::start() { this->state_ = speaker::STATE_STARTING; }
esp_err_t ResamplerSpeaker::start_() {
this->target_stream_info_ = audio::AudioStreamInfo(
this->target_bits_per_sample_, this->audio_stream_info_.get_channels(), this->target_sample_rate_);
this->output_speaker_->set_audio_stream_info(this->target_stream_info_);
this->output_speaker_->start();
if (this->requires_resampling_()) {
// Start the resampler task to handle converting sample rates
return this->start_task_();
}
return ESP_OK;
}
esp_err_t ResamplerSpeaker::start_task_() {
if (this->task_stack_buffer_ == nullptr) {
if (this->task_stack_in_psram_) {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
} else {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
this->task_stack_buffer_ = stack_allocator.allocate(TASK_STACK_SIZE);
}
}
if (this->task_stack_buffer_ == nullptr) {
return ESP_ERR_NO_MEM;
}
if (this->task_handle_ == nullptr) {
this->task_handle_ = xTaskCreateStatic(resample_task, "sample", TASK_STACK_SIZE, (void *) this,
RESAMPLER_TASK_PRIORITY, this->task_stack_buffer_, &this->task_stack_);
}
if (this->task_handle_ == nullptr) {
return ESP_ERR_INVALID_STATE;
}
return ESP_OK;
}
void ResamplerSpeaker::stop() { this->state_ = speaker::STATE_STOPPING; }
void ResamplerSpeaker::stop_() {
if (this->task_handle_ != nullptr) {
xEventGroupSetBits(this->event_group_, ResamplingEventGroupBits::COMMAND_STOP);
}
this->output_speaker_->stop();
}
esp_err_t ResamplerSpeaker::delete_task_() {
if (!this->task_created_) {
this->task_handle_ = nullptr;
if (this->task_stack_buffer_ != nullptr) {
if (this->task_stack_in_psram_) {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_EXTERNAL);
stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
} else {
RAMAllocator<StackType_t> stack_allocator(RAMAllocator<StackType_t>::ALLOC_INTERNAL);
stack_allocator.deallocate(this->task_stack_buffer_, TASK_STACK_SIZE);
}
this->task_stack_buffer_ = nullptr;
}
return ESP_OK;
}
return ESP_ERR_INVALID_STATE;
}
void ResamplerSpeaker::finish() { this->output_speaker_->finish(); }
bool ResamplerSpeaker::has_buffered_data() const {
bool has_ring_buffer_data = false;
if (this->requires_resampling_() && (this->ring_buffer_.use_count() > 0)) {
has_ring_buffer_data = (this->ring_buffer_.lock()->available() > 0);
}
return (has_ring_buffer_data || this->output_speaker_->has_buffered_data());
}
void ResamplerSpeaker::set_mute_state(bool mute_state) {
this->mute_state_ = mute_state;
this->output_speaker_->set_mute_state(mute_state);
}
void ResamplerSpeaker::set_volume(float volume) {
this->volume_ = volume;
this->output_speaker_->set_volume(volume);
}
bool ResamplerSpeaker::requires_resampling_() const {
return (this->audio_stream_info_.get_sample_rate() != this->target_sample_rate_) ||
(this->audio_stream_info_.get_bits_per_sample() != this->target_bits_per_sample_);
}
void ResamplerSpeaker::resample_task(void *params) {
ResamplerSpeaker *this_resampler = (ResamplerSpeaker *) params;
this_resampler->task_created_ = true;
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_STARTING);
std::unique_ptr<audio::AudioResampler> resampler =
make_unique<audio::AudioResampler>(this_resampler->audio_stream_info_.ms_to_bytes(TRANSFER_BUFFER_DURATION_MS),
this_resampler->target_stream_info_.ms_to_bytes(TRANSFER_BUFFER_DURATION_MS));
esp_err_t err = resampler->start(this_resampler->audio_stream_info_, this_resampler->target_stream_info_,
this_resampler->taps_, this_resampler->filters_);
if (err == ESP_OK) {
std::shared_ptr<RingBuffer> temp_ring_buffer =
RingBuffer::create(this_resampler->audio_stream_info_.ms_to_bytes(this_resampler->buffer_duration_ms_));
if (temp_ring_buffer.use_count() == 0) {
err = ESP_ERR_NO_MEM;
} else {
this_resampler->ring_buffer_ = temp_ring_buffer;
resampler->add_source(this_resampler->ring_buffer_);
this_resampler->output_speaker_->set_audio_stream_info(this_resampler->target_stream_info_);
resampler->add_sink(this_resampler->output_speaker_);
}
}
if (err == ESP_OK) {
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_RUNNING);
} else if (err == ESP_ERR_NO_MEM) {
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_NO_MEM);
} else if (err == ESP_ERR_NOT_SUPPORTED) {
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_NOT_SUPPORTED);
}
this_resampler->playback_differential_ms_ = 0;
while (err == ESP_OK) {
uint32_t event_bits = xEventGroupGetBits(this_resampler->event_group_);
if (event_bits & ResamplingEventGroupBits::COMMAND_STOP) {
break;
}
// Stop gracefully if the decoder is done
int32_t ms_differential = 0;
audio::AudioResamplerState resampler_state = resampler->resample(false, &ms_differential);
this_resampler->playback_differential_ms_ += ms_differential;
if (resampler_state == audio::AudioResamplerState::FINISHED) {
break;
} else if (resampler_state == audio::AudioResamplerState::FAILED) {
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::ERR_ESP_FAIL);
break;
}
}
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_STOPPING);
resampler.reset();
xEventGroupSetBits(this_resampler->event_group_, ResamplingEventGroupBits::STATE_STOPPED);
this_resampler->task_created_ = false;
vTaskDelete(nullptr);
}
} // namespace resampler
} // namespace esphome
#endif

View File

@ -0,0 +1,107 @@
#pragma once
#ifdef USE_ESP32
#include "esphome/components/audio/audio.h"
#include "esphome/components/audio/audio_transfer_buffer.h"
#include "esphome/components/speaker/speaker.h"
#include "esphome/core/component.h"
#include <freertos/event_groups.h>
#include <freertos/FreeRTOS.h>
namespace esphome {
namespace resampler {
class ResamplerSpeaker : public Component, public speaker::Speaker {
public:
float get_setup_priority() const override { return esphome::setup_priority::DATA; }
void setup() override;
void loop() override;
size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override;
size_t play(const uint8_t *data, size_t length) override { return this->play(data, length, 0); }
void start() override;
void stop() override;
void finish() override;
void set_pause_state(bool pause_state) override { this->output_speaker_->set_pause_state(pause_state); }
bool get_pause_state() const override { return this->output_speaker_->get_pause_state(); }
bool has_buffered_data() const override;
/// @brief Mute state changes are passed to the parent's output speaker
void set_mute_state(bool mute_state) override;
/// @brief Volume state changes are passed to the parent's output speaker
void set_volume(float volume) override;
void set_output_speaker(speaker::Speaker *speaker) { this->output_speaker_ = speaker; }
void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; }
void set_target_bits_per_sample(uint8_t target_bits_per_sample) {
this->target_bits_per_sample_ = target_bits_per_sample;
}
void set_target_sample_rate(uint32_t target_sample_rate) { this->target_sample_rate_ = target_sample_rate; }
void set_filters(uint16_t filters) { this->filters_ = filters; }
void set_taps(uint16_t taps) { this->taps_ = taps; }
void set_buffer_duration(uint32_t buffer_duration_ms) { this->buffer_duration_ms_ = buffer_duration_ms; }
protected:
/// @brief Starts the output speaker after setting the resampled stream info. If resampling is required, it starts the
/// task.
/// @return ESP_OK if resampling is required
/// return value of start_task_() if resampling is required
esp_err_t start_();
/// @brief Starts the resampler task after allocating the task stack
/// @return ESP_OK if successful,
/// ESP_ERR_NO_MEM if the task stack couldn't be allocated
/// ESP_ERR_INVALID_STATE if the task wasn't created
esp_err_t start_task_();
/// @brief Stops the output speaker. If the resampling task is running, it sends the stop command.
void stop_();
/// @brief Deallocates the task stack and resets the pointers.
/// @return ESP_OK if successful
/// ESP_ERR_INVALID_STATE if the task hasn't stopped itself
esp_err_t delete_task_();
inline bool requires_resampling_() const;
static void resample_task(void *params);
EventGroupHandle_t event_group_{nullptr};
std::weak_ptr<RingBuffer> ring_buffer_;
speaker::Speaker *output_speaker_{nullptr};
bool task_stack_in_psram_{false};
bool task_created_{false};
TaskHandle_t task_handle_{nullptr};
StaticTask_t task_stack_;
StackType_t *task_stack_buffer_{nullptr};
audio::AudioStreamInfo target_stream_info_;
uint16_t taps_;
uint16_t filters_;
uint8_t target_bits_per_sample_;
uint32_t target_sample_rate_;
uint32_t buffer_duration_ms_;
int32_t playback_differential_ms_{0};
};
} // namespace resampler
} // namespace esphome
#endif

View File

@ -16,6 +16,8 @@
// Feature flags // Feature flags
#define USE_ALARM_CONTROL_PANEL #define USE_ALARM_CONTROL_PANEL
#define USE_AUDIO_FLAC_SUPPORT
#define USE_AUDIO_MP3_SUPPORT
#define USE_API #define USE_API
#define USE_API_NOISE #define USE_API_NOISE
#define USE_API_PLAINTEXT #define USE_API_PLAINTEXT

View File

@ -0,0 +1,13 @@
i2s_audio:
i2s_lrclk_pin: ${lrclk_pin}
i2s_bclk_pin: ${bclk_pin}
i2s_mclk_pin: ${mclk_pin}
speaker:
- platform: i2s_audio
id: speaker_id
dac_type: external
i2s_dout_pin: ${dout_pin}
- platform: resampler
id: resampler_speaker_id
output_speaker: speaker_id

View File

@ -0,0 +1,7 @@
substitutions:
lrclk_pin: GPIO16
bclk_pin: GPIO17
mclk_pin: GPIO15
dout_pin: GPIO14
<<: !include common.yaml

View File

@ -0,0 +1,7 @@
substitutions:
lrclk_pin: GPIO4
bclk_pin: GPIO5
mclk_pin: GPIO6
dout_pin: GPIO7
<<: !include common.yaml

View File

@ -0,0 +1,7 @@
substitutions:
lrclk_pin: GPIO4
bclk_pin: GPIO5
mclk_pin: GPIO6
dout_pin: GPIO7
<<: !include common.yaml

View File

@ -0,0 +1,7 @@
substitutions:
lrclk_pin: GPIO16
bclk_pin: GPIO17
mclk_pin: GPIO15
dout_pin: GPIO14
<<: !include common.yaml

View File

@ -0,0 +1,7 @@
substitutions:
lrclk_pin: GPIO4
bclk_pin: GPIO5
mclk_pin: GPIO6
dout_pin: GPIO7
<<: !include common.yaml

View File

@ -0,0 +1,7 @@
substitutions:
lrclk_pin: GPIO4
bclk_pin: GPIO5
mclk_pin: GPIO6
dout_pin: GPIO7
<<: !include common.yaml