mirror of
https://github.com/esphome/esphome.git
synced 2025-10-29 06:04:01 +00:00
[audio, i2s_audio, speaker] Media Player Components PR2 (#8164)
Co-authored-by: Jesse Hills <3060199+jesserockz@users.noreply.github.com>
This commit is contained in:
@@ -1,9 +1,121 @@
|
||||
import esphome.codegen as cg
|
||||
import esphome.config_validation as cv
|
||||
from esphome.const import CONF_BITS_PER_SAMPLE, CONF_NUM_CHANNELS, CONF_SAMPLE_RATE
|
||||
import esphome.final_validate as fv
|
||||
|
||||
CODEOWNERS = ["@kahrendt"]
|
||||
audio_ns = cg.esphome_ns.namespace("audio")
|
||||
|
||||
AudioFile = audio_ns.struct("AudioFile")
|
||||
AudioFileType = audio_ns.enum("AudioFileType", is_class=True)
|
||||
AUDIO_FILE_TYPE_ENUM = {
|
||||
"NONE": AudioFileType.NONE,
|
||||
"WAV": AudioFileType.WAV,
|
||||
"MP3": AudioFileType.MP3,
|
||||
"FLAC": AudioFileType.FLAC,
|
||||
}
|
||||
|
||||
|
||||
CONF_MIN_BITS_PER_SAMPLE = "min_bits_per_sample"
|
||||
CONF_MAX_BITS_PER_SAMPLE = "max_bits_per_sample"
|
||||
CONF_MIN_CHANNELS = "min_channels"
|
||||
CONF_MAX_CHANNELS = "max_channels"
|
||||
CONF_MIN_SAMPLE_RATE = "min_sample_rate"
|
||||
CONF_MAX_SAMPLE_RATE = "max_sample_rate"
|
||||
|
||||
|
||||
CONFIG_SCHEMA = cv.All(
|
||||
cv.Schema({}),
|
||||
)
|
||||
|
||||
AUDIO_COMPONENT_SCHEMA = cv.Schema(
|
||||
{
|
||||
cv.Optional(CONF_BITS_PER_SAMPLE): cv.int_range(8, 32),
|
||||
cv.Optional(CONF_NUM_CHANNELS): cv.int_range(1, 2),
|
||||
cv.Optional(CONF_SAMPLE_RATE): cv.int_range(8000, 48000),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
_UNDEF = object()
|
||||
|
||||
|
||||
def set_stream_limits(
|
||||
min_bits_per_sample: int = _UNDEF,
|
||||
max_bits_per_sample: int = _UNDEF,
|
||||
min_channels: int = _UNDEF,
|
||||
max_channels: int = _UNDEF,
|
||||
min_sample_rate: int = _UNDEF,
|
||||
max_sample_rate: int = _UNDEF,
|
||||
):
|
||||
def set_limits_in_config(config):
|
||||
if min_bits_per_sample is not _UNDEF:
|
||||
config[CONF_MIN_BITS_PER_SAMPLE] = min_bits_per_sample
|
||||
if max_bits_per_sample is not _UNDEF:
|
||||
config[CONF_MAX_BITS_PER_SAMPLE] = max_bits_per_sample
|
||||
if min_channels is not _UNDEF:
|
||||
config[CONF_MIN_CHANNELS] = min_channels
|
||||
if max_channels is not _UNDEF:
|
||||
config[CONF_MAX_CHANNELS] = max_channels
|
||||
if min_sample_rate is not _UNDEF:
|
||||
config[CONF_MIN_SAMPLE_RATE] = min_sample_rate
|
||||
if max_sample_rate is not _UNDEF:
|
||||
config[CONF_MAX_SAMPLE_RATE] = max_sample_rate
|
||||
|
||||
return set_limits_in_config
|
||||
|
||||
|
||||
def final_validate_audio_schema(
|
||||
name: str,
|
||||
*,
|
||||
audio_device: str,
|
||||
bits_per_sample: int,
|
||||
channels: int,
|
||||
sample_rate: int,
|
||||
):
|
||||
def validate_audio_compatiblity(audio_config):
|
||||
audio_schema = {}
|
||||
|
||||
try:
|
||||
cv.int_range(
|
||||
min=audio_config.get(CONF_MIN_BITS_PER_SAMPLE),
|
||||
max=audio_config.get(CONF_MAX_BITS_PER_SAMPLE),
|
||||
)(bits_per_sample)
|
||||
except cv.Invalid as exc:
|
||||
raise cv.Invalid(
|
||||
f"Invalid configuration for the {name} component. The {CONF_BITS_PER_SAMPLE} {str(exc)}"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
cv.int_range(
|
||||
min=audio_config.get(CONF_MIN_CHANNELS),
|
||||
max=audio_config.get(CONF_MAX_CHANNELS),
|
||||
)(channels)
|
||||
except cv.Invalid as exc:
|
||||
raise cv.Invalid(
|
||||
f"Invalid configuration for the {name} component. The {CONF_NUM_CHANNELS} {str(exc)}"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
cv.int_range(
|
||||
min=audio_config.get(CONF_MIN_SAMPLE_RATE),
|
||||
max=audio_config.get(CONF_MAX_SAMPLE_RATE),
|
||||
)(sample_rate)
|
||||
return cv.Schema(audio_schema, extra=cv.ALLOW_EXTRA)(audio_config)
|
||||
except cv.Invalid as exc:
|
||||
raise cv.Invalid(
|
||||
f"Invalid configuration for the {name} component. The {CONF_SAMPLE_RATE} {str(exc)}"
|
||||
) from exc
|
||||
|
||||
return cv.Schema(
|
||||
{
|
||||
cv.Required(audio_device): fv.id_declaration_match_schema(
|
||||
validate_audio_compatiblity
|
||||
)
|
||||
},
|
||||
extra=cv.ALLOW_EXTRA,
|
||||
)
|
||||
|
||||
|
||||
async def to_code(config):
|
||||
cg.add_library("esphome/esp-audio-libs", "1.1.1")
|
||||
|
||||
67
esphome/components/audio/audio.cpp
Normal file
67
esphome/components/audio/audio.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#include "audio.h"
|
||||
|
||||
namespace esphome {
|
||||
namespace audio {
|
||||
|
||||
// Euclidean's algorithm for finding the greatest common divisor
|
||||
static uint32_t gcd(uint32_t a, uint32_t b) {
|
||||
while (b != 0) {
|
||||
uint32_t t = b;
|
||||
b = a % b;
|
||||
a = t;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
AudioStreamInfo::AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate)
|
||||
: bits_per_sample_(bits_per_sample), channels_(channels), sample_rate_(sample_rate) {
|
||||
this->ms_sample_rate_gcd_ = gcd(1000, this->sample_rate_);
|
||||
this->bytes_per_sample_ = (this->bits_per_sample_ + 7) / 8;
|
||||
}
|
||||
|
||||
uint32_t AudioStreamInfo::frames_to_microseconds(uint32_t frames) const {
|
||||
return (frames * 1000000 + (this->sample_rate_ >> 1)) / this->sample_rate_;
|
||||
}
|
||||
|
||||
uint32_t AudioStreamInfo::frames_to_milliseconds_with_remainder(uint32_t *total_frames) const {
|
||||
uint32_t unprocessable_frames = *total_frames % (this->sample_rate_ / this->ms_sample_rate_gcd_);
|
||||
uint32_t frames_for_ms_calculation = *total_frames - unprocessable_frames;
|
||||
|
||||
uint32_t playback_ms = (frames_for_ms_calculation * 1000) / this->sample_rate_;
|
||||
*total_frames = unprocessable_frames;
|
||||
return playback_ms;
|
||||
}
|
||||
|
||||
bool AudioStreamInfo::operator==(const AudioStreamInfo &rhs) const {
|
||||
return (this->bits_per_sample_ == rhs.get_bits_per_sample()) && (this->channels_ == rhs.get_channels()) &&
|
||||
(this->sample_rate_ == rhs.get_sample_rate());
|
||||
}
|
||||
|
||||
const char *audio_file_type_to_string(AudioFileType file_type) {
|
||||
switch (file_type) {
|
||||
#ifdef USE_AUDIO_FLAC_SUPPORT
|
||||
case AudioFileType::FLAC:
|
||||
return "FLAC";
|
||||
#endif
|
||||
#ifdef USE_AUDIO_MP3_SUPPORT
|
||||
case AudioFileType::MP3:
|
||||
return "MP3";
|
||||
#endif
|
||||
case AudioFileType::WAV:
|
||||
return "WAV";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
|
||||
size_t samples_to_scale) {
|
||||
// Note the assembly dsps_mulc function has audio glitches if the input and output buffers are the same.
|
||||
for (int i = 0; i < samples_to_scale; i++) {
|
||||
int32_t acc = (int32_t) audio_samples[i] * (int32_t) scale_factor;
|
||||
output_buffer[i] = (int16_t) (acc >> 15);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace audio
|
||||
} // namespace esphome
|
||||
@@ -1,21 +1,139 @@
|
||||
#pragma once
|
||||
|
||||
#include "esphome/core/defines.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
namespace esphome {
|
||||
namespace audio {
|
||||
|
||||
struct AudioStreamInfo {
|
||||
bool operator==(const AudioStreamInfo &rhs) const {
|
||||
return (channels == rhs.channels) && (bits_per_sample == rhs.bits_per_sample) && (sample_rate == rhs.sample_rate);
|
||||
class AudioStreamInfo {
|
||||
/* Class to respresent important parameters of the audio stream that also provides helper function to convert between
|
||||
* various audio related units.
|
||||
*
|
||||
* - An audio sample represents a unit of audio for one channel.
|
||||
* - A frame represents a unit of audio with a sample for every channel.
|
||||
*
|
||||
* In gneneral, converting between bytes, samples, and frames shouldn't result in rounding errors so long as frames
|
||||
* are used as the main unit when transferring audio data. Durations may result in rounding for certain sample rates;
|
||||
* e.g., 44.1 KHz. The ``frames_to_milliseconds_with_remainder`` function should be used for accuracy, as it takes
|
||||
* into account the remainder rather than just ignoring any rounding.
|
||||
*/
|
||||
public:
|
||||
AudioStreamInfo()
|
||||
: AudioStreamInfo(16, 1, 16000){}; // Default values represent ESPHome's audio components historical values
|
||||
AudioStreamInfo(uint8_t bits_per_sample, uint8_t channels, uint32_t sample_rate);
|
||||
|
||||
uint8_t get_bits_per_sample() const { return this->bits_per_sample_; }
|
||||
uint8_t get_channels() const { return this->channels_; }
|
||||
uint32_t get_sample_rate() const { return this->sample_rate_; }
|
||||
|
||||
/// @brief Convert bytes to duration in milliseconds.
|
||||
/// @param bytes Number of bytes to convert
|
||||
/// @return Duration in milliseconds that will store `bytes` bytes of audio. May round down for certain sample rates
|
||||
/// or values of `bytes`.
|
||||
uint32_t bytes_to_ms(size_t bytes) const {
|
||||
return bytes * 1000 / (this->sample_rate_ * this->bytes_per_sample_ * this->channels_);
|
||||
}
|
||||
|
||||
/// @brief Convert bytes to frames.
|
||||
/// @param bytes Number of bytes to convert
|
||||
/// @return Audio frames that will store `bytes` bytes.
|
||||
uint32_t bytes_to_frames(size_t bytes) const { return (bytes / (this->bytes_per_sample_ * this->channels_)); }
|
||||
|
||||
/// @brief Convert bytes to samples.
|
||||
/// @param bytes Number of bytes to convert
|
||||
/// @return Audio samples that will store `bytes` bytes.
|
||||
uint32_t bytes_to_samples(size_t bytes) const { return (bytes / this->bytes_per_sample_); }
|
||||
|
||||
/// @brief Converts frames to bytes.
|
||||
/// @param frames Number of frames to convert.
|
||||
/// @return Number of bytes that will store `frames` frames of audio.
|
||||
size_t frames_to_bytes(uint32_t frames) const { return frames * this->bytes_per_sample_ * this->channels_; }
|
||||
|
||||
/// @brief Converts samples to bytes.
|
||||
/// @param samples Number of samples to convert.
|
||||
/// @return Number of bytes that will store `samples` samples of audio.
|
||||
size_t samples_to_bytes(uint32_t samples) const { return samples * this->bytes_per_sample_; }
|
||||
|
||||
/// @brief Converts duration to frames.
|
||||
/// @param ms Duration in milliseconds
|
||||
/// @return Audio frames that will store `ms` milliseconds of audio. May round down for certain sample rates.
|
||||
uint32_t ms_to_frames(uint32_t ms) const { return (ms * this->sample_rate_) / 1000; }
|
||||
|
||||
/// @brief Converts duration to samples.
|
||||
/// @param ms Duration in milliseconds
|
||||
/// @return Audio samples that will store `ms` milliseconds of audio. May round down for certain sample rates.
|
||||
uint32_t ms_to_samples(uint32_t ms) const { return (ms * this->channels_ * this->sample_rate_) / 1000; }
|
||||
|
||||
/// @brief Converts duration to bytes. May round down for certain sample rates.
|
||||
/// @param ms Duration in milliseconds
|
||||
/// @return Bytes that will store `ms` milliseconds of audio. May round down for certain sample rates.
|
||||
size_t ms_to_bytes(uint32_t ms) const {
|
||||
return (ms * this->bytes_per_sample_ * this->channels_ * this->sample_rate_) / 1000;
|
||||
}
|
||||
|
||||
/// @brief Computes the duration, in microseconds, the given amount of frames represents.
|
||||
/// @param frames Number of audio frames
|
||||
/// @return Duration in microseconds `frames` respresents. May be slightly inaccurate due to integer divison rounding
|
||||
/// for certain sample rates.
|
||||
uint32_t frames_to_microseconds(uint32_t frames) const;
|
||||
|
||||
/// @brief Computes the duration, in milliseconds, the given amount of frames represents. Avoids
|
||||
/// accumulating rounding errors by updating `frames` with the remainder after converting.
|
||||
/// @param frames Pointer to uint32_t with the number of audio frames. Replaced with the remainder.
|
||||
/// @return Duration in milliseconds `frames` represents. Always less than or equal to the actual value due to
|
||||
/// rounding.
|
||||
uint32_t frames_to_milliseconds_with_remainder(uint32_t *frames) const;
|
||||
|
||||
// Class comparison operators
|
||||
bool operator==(const AudioStreamInfo &rhs) const;
|
||||
bool operator!=(const AudioStreamInfo &rhs) const { return !operator==(rhs); }
|
||||
size_t get_bytes_per_sample() const { return bits_per_sample / 8; }
|
||||
uint8_t channels = 1;
|
||||
uint8_t bits_per_sample = 16;
|
||||
uint32_t sample_rate = 16000;
|
||||
|
||||
protected:
|
||||
uint8_t bits_per_sample_;
|
||||
uint8_t channels_;
|
||||
uint32_t sample_rate_;
|
||||
|
||||
// The greatest common divisor between 1000 ms = 1 second and the sample rate. Used to avoid accumulating error when
|
||||
// converting from frames to duration. Computed at construction.
|
||||
uint32_t ms_sample_rate_gcd_;
|
||||
|
||||
// Conversion factor derived from the number of bits per sample. Assumes audio data is aligned to the byte. Computed
|
||||
// at construction.
|
||||
size_t bytes_per_sample_;
|
||||
};
|
||||
|
||||
enum class AudioFileType : uint8_t {
|
||||
NONE = 0,
|
||||
#ifdef USE_AUDIO_FLAC_SUPPORT
|
||||
FLAC,
|
||||
#endif
|
||||
#ifdef USE_AUDIO_MP3_SUPPORT
|
||||
MP3,
|
||||
#endif
|
||||
WAV,
|
||||
};
|
||||
|
||||
struct AudioFile {
|
||||
const uint8_t *data;
|
||||
size_t length;
|
||||
AudioFileType file_type;
|
||||
};
|
||||
|
||||
/// @brief Helper function to convert file type to a const char string
|
||||
/// @param file_type
|
||||
/// @return const char pointer to the readable file type
|
||||
const char *audio_file_type_to_string(AudioFileType file_type);
|
||||
|
||||
/// @brief Scales Q15 fixed point audio samples. Scales in place if audio_samples == output_buffer.
|
||||
/// @param audio_samples PCM int16 audio samples
|
||||
/// @param output_buffer Buffer to store the scaled samples
|
||||
/// @param scale_factor Q15 fixed point scaling factor
|
||||
/// @param samples_to_scale Number of samples to scale
|
||||
void scale_audio_samples(const int16_t *audio_samples, int16_t *output_buffer, int16_t scale_factor,
|
||||
size_t samples_to_scale);
|
||||
|
||||
} // namespace audio
|
||||
} // namespace esphome
|
||||
|
||||
Reference in New Issue
Block a user