1
0
mirror of https://github.com/esphome/esphome.git synced 2025-04-15 23:30:28 +01:00
Kevin Ahrendt 6f4e8f1fbf
[mixer] Media Player Components PR8 (#8170)
Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
2025-02-04 23:00:02 +00:00

208 lines
9.8 KiB
C++

#pragma once
#ifdef USE_ESP32
#include "esphome/components/audio/audio.h"
#include "esphome/components/audio/audio_transfer_buffer.h"
#include "esphome/components/speaker/speaker.h"
#include "esphome/core/component.h"
#include <freertos/event_groups.h>
#include <freertos/FreeRTOS.h>
namespace esphome {
namespace mixer_speaker {
/* Classes for mixing several source speaker audio streams and writing it to another speaker component.
* - Volume controls are passed through to the output speaker
* - Directly handles pausing at the SourceSpeaker level; pause state is not passed through to the output speaker.
* - Audio sent to the SourceSpeaker's must have 16 bits per sample.
* - Audio sent to the SourceSpeaker can have any number of channels. They are duplicated or ignored as needed to match
* the number of channels required for the output speaker.
* - In queue mode, the audio sent to the SoureSpeakers can have different sample rates.
* - In non-queue mode, the audio sent to the SourceSpeakers must have the same sample rates.
* - SourceSpeaker has an internal ring buffer. It also allocates a shared_ptr for an AudioTranserBuffer object.
* - Audio Data Flow:
* - Audio data played on a SourceSpeaker first writes to its internal ring buffer.
* - MixerSpeaker task temporarily takes shared ownership of each SourceSpeaker's AudioTransferBuffer.
* - MixerSpeaker calls SourceSpeaker's `process_data_from_source`, which tranfers audio from the SourceSpeaker's
* ring buffer to its AudioTransferBuffer. Audio ducking is applied at this step.
* - In queue mode, MixerSpeaker prioritizes the earliest configured SourceSpeaker with audio data. Audio data is
* sent to the output speaker.
* - In non-queue mode, MixerSpeaker adds all the audio data in each SourceSpeaker into one stream that is written
* to the output speaker.
*/
class MixerSpeaker;
class SourceSpeaker : public speaker::Speaker, public Component {
public:
void dump_config() override;
void setup() override;
void loop() override;
size_t play(const uint8_t *data, size_t length, TickType_t ticks_to_wait) override;
size_t play(const uint8_t *data, size_t length) override { return this->play(data, length, 0); }
void start() override;
void stop() override;
void finish() override;
bool has_buffered_data() const override;
/// @brief Mute state changes are passed to the parent's output speaker
void set_mute_state(bool mute_state) override;
/// @brief Volume state changes are passed to the parent's output speaker
void set_volume(float volume) override;
void set_pause_state(bool pause_state) override { this->pause_state_ = pause_state; }
bool get_pause_state() const override { return this->pause_state_; }
/// @brief Transfers audio from the ring buffer into the transfer buffer. Ducks audio while transferring.
/// @param ticks_to_wait FreeRTOS ticks to wait while waiting to read from the ring buffer.
/// @return Number of bytes transferred from the ring buffer.
size_t process_data_from_source(TickType_t ticks_to_wait);
/// @brief Sets the ducking level for the source speaker.
/// @param decibel_reduction (uint8_t) The dB reduction level. For example, 0 is no change, 10 is a reduction by 10 dB
/// @param duration (uint32_t) The number of milliseconds to transition from the current level to the new level
void apply_ducking(uint8_t decibel_reduction, uint32_t duration);
void set_buffer_duration(uint32_t buffer_duration_ms) { this->buffer_duration_ms_ = buffer_duration_ms; }
void set_parent(MixerSpeaker *parent) { this->parent_ = parent; }
void set_timeout(uint32_t ms) { this->timeout_ms_ = ms; }
std::weak_ptr<audio::AudioSourceTransferBuffer> get_transfer_buffer() { return this->transfer_buffer_; }
protected:
friend class MixerSpeaker;
esp_err_t start_();
void stop_();
/// @brief Ducks audio samples by a specified amount. When changing the ducking amount, it can transition gradually
/// over a specified amount of samples.
/// @param input_buffer buffer with audio samples to be ducked in place
/// @param input_samples_to_duck number of samples to process in ``input_buffer``
/// @param current_ducking_db_reduction pointer to the current dB reduction
/// @param ducking_transition_samples_remaining pointer to the total number of samples left before the the
/// transition is finished
/// @param samples_per_ducking_step total number of samples per ducking step for the transition
/// @param db_change_per_ducking_step the change in dB reduction per step
static void duck_samples(int16_t *input_buffer, uint32_t input_samples_to_duck, int8_t *current_ducking_db_reduction,
uint32_t *ducking_transition_samples_remaining, uint32_t samples_per_ducking_step,
int8_t db_change_per_ducking_step);
MixerSpeaker *parent_;
std::shared_ptr<audio::AudioSourceTransferBuffer> transfer_buffer_;
std::weak_ptr<RingBuffer> ring_buffer_;
uint32_t buffer_duration_ms_;
uint32_t last_seen_data_ms_{0};
optional<uint32_t> timeout_ms_;
bool stop_gracefully_{false};
bool pause_state_{false};
int8_t target_ducking_db_reduction_{0};
int8_t current_ducking_db_reduction_{0};
int8_t db_change_per_ducking_step_{1};
uint32_t ducking_transition_samples_remaining_{0};
uint32_t samples_per_ducking_step_{0};
uint32_t accumulated_frames_read_{0};
uint32_t pending_playback_ms_{0};
};
class MixerSpeaker : public Component {
public:
void dump_config() override;
void setup() override;
void loop() override;
void add_source_speaker(SourceSpeaker *source_speaker) { this->source_speakers_.push_back(source_speaker); }
/// @brief Starts the mixer task. Called by a source speaker giving the current audio stream information
/// @param stream_info The calling source speakers audio stream information
/// @return ESP_ERR_NOT_SUPPORTED if the incoming stream is incompatible due to unsupported bits per sample
/// ESP_ERR_INVALID_ARG if the incoming stream is incompatible to be mixed with the other input audio stream
/// ESP_ERR_NO_MEM if there isn't enough memory for the task's stack
/// ESP_ERR_INVALID_STATE if the task fails to start
/// ESP_OK if the incoming stream is compatible and the mixer task starts
esp_err_t start(audio::AudioStreamInfo &stream_info);
void stop();
void set_output_channels(uint8_t output_channels) { this->output_channels_ = output_channels; }
void set_output_speaker(speaker::Speaker *speaker) { this->output_speaker_ = speaker; }
void set_queue_mode(bool queue_mode) { this->queue_mode_ = queue_mode; }
void set_task_stack_in_psram(bool task_stack_in_psram) { this->task_stack_in_psram_ = task_stack_in_psram; }
speaker::Speaker *get_output_speaker() const { return this->output_speaker_; }
protected:
/// @brief Copies audio frames from the input buffer to the output buffer taking into account the number of channels
/// in each stream. If the output stream has more channels, the input samples are duplicated. If the output stream has
/// less channels, the extra channel input samples are dropped.
/// @param input_buffer
/// @param input_stream_info
/// @param output_buffer
/// @param output_stream_info
/// @param frames_to_transfer number of frames (consisting of a sample for each channel) to copy from the input buffer
static void copy_frames(const int16_t *input_buffer, audio::AudioStreamInfo input_stream_info, int16_t *output_buffer,
audio::AudioStreamInfo output_stream_info, uint32_t frames_to_transfer);
/// @brief Mixes the primary and secondary streams taking into account the number of channels in each stream. Primary
/// and secondary samples are duplicated or dropped as necessary to ensure the output stream has the configured number
/// of channels. Output samples are clamped to the corresponding int16 min or max values if the mixed sample
/// overflows.
/// @param primary_buffer (int16_t *) samples buffer for the primary stream
/// @param primary_stream_info stream info for the primary stream
/// @param secondary_buffer (int16_t *) samples buffer for secondary stream
/// @param secondary_stream_info stream info for the secondary stream
/// @param output_buffer (int16_t *) buffer for the mixed samples
/// @param output_stream_info stream info for the output buffer
/// @param frames_to_mix number of frames in the primary and secondary buffers to mix together
static void mix_audio_samples(const int16_t *primary_buffer, audio::AudioStreamInfo primary_stream_info,
const int16_t *secondary_buffer, audio::AudioStreamInfo secondary_stream_info,
int16_t *output_buffer, audio::AudioStreamInfo output_stream_info,
uint32_t frames_to_mix);
static void audio_mixer_task(void *params);
/// @brief Starts the mixer task after allocating memory for the task stack.
/// @return ESP_ERR_NO_MEM if there isn't enough memory for the task's stack
/// ESP_ERR_INVALID_STATE if the task didn't start
/// ESP_OK if successful
esp_err_t start_task_();
/// @brief If the task is stopped, it sets the task handle to the nullptr and deallocates its stack
/// @return ESP_OK if the task was stopped, ESP_ERR_INVALID_STATE otherwise.
esp_err_t delete_task_();
EventGroupHandle_t event_group_{nullptr};
std::vector<SourceSpeaker *> source_speakers_;
speaker::Speaker *output_speaker_{nullptr};
uint8_t output_channels_;
bool queue_mode_;
bool task_stack_in_psram_{false};
bool task_created_{false};
TaskHandle_t task_handle_{nullptr};
StaticTask_t task_stack_;
StackType_t *task_stack_buffer_{nullptr};
optional<audio::AudioStreamInfo> audio_stream_info_;
};
} // namespace mixer_speaker
} // namespace esphome
#endif