This commit is contained in:
2026-02-12 21:00:02 -08:00
parent cb1f2b0efd
commit 40714a3a68
1141 changed files with 1010880 additions and 2 deletions

View File

@@ -0,0 +1,43 @@
#pragma once
// All codecs, so that we can find any compile errors easily
// This only works, when you have all codecs installed!
#include "AudioTools/AudioCodecs/AudioEncoded.h"
#include "AudioTools/AudioCodecs/ContainerOgg.h"
#include "AudioTools/AudioCodecs/CodecOpus.h"
#include "AudioTools/AudioCodecs/CodecOpusOgg.h"
#include "AudioTools/AudioCodecs/CodecFLAC.h"
#include "AudioTools/AudioCodecs/CodecVorbis.h"
#include "AudioTools/AudioCodecs/CodecADPCM.h"
//#include "AudioTools/AudioCodecs/CodecCodec2.h"
#include "AudioTools/AudioCodecs/CodecGSM.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
#include "AudioTools/AudioCodecs/CodecAACHelix.h"
#include "AudioTools/AudioCodecs/ContainerBinary.h"
#include "AudioTools/AudioCodecs/CodecADPCMXQ.h"
#include "AudioTools/AudioCodecs/CodecCopy.h"
#include "AudioTools/AudioCodecs/CodecHelix.h"
#include "AudioTools/AudioCodecs/CodecMP3LAME.h"
#include "AudioTools/AudioCodecs/CodecSBC.h"
#include "AudioTools/AudioCodecs/ContainerM4A.h"
#include "AudioTools/AudioCodecs/AudioFormat.h"
#include "AudioTools/AudioCodecs/CodecADTS.h"
#include "AudioTools/AudioCodecs/CodecILBC.h"
#include "AudioTools/AudioCodecs/CodecMP3MAD.h"
#include "AudioTools/AudioCodecs/CodecAACFAAD.h"
#include "AudioTools/AudioCodecs/CodecAPTX.h"
#include "AudioTools/AudioCodecs/CodecFloat.h"
#include "AudioTools/AudioCodecs/CodecL16.h"
#include "AudioTools/AudioCodecs/CodecWAV.h"
#include "AudioTools/AudioCodecs/CodecAACFDK.h"
#include "AudioTools/AudioCodecs/CodecBase64.h"
#include "AudioTools/AudioCodecs/CodecG722.h"
#include "AudioTools/AudioCodecs/CodecL8.h"
#include "AudioTools/AudioCodecs/CodecMTS.h"
#include "AudioTools/AudioCodecs/CodecWavIMA.h"
#include "AudioTools/AudioCodecs/CodecBasic.h"
#include "AudioTools/AudioCodecs/CodecG7xx.h"
#include "AudioTools/AudioCodecs/CodecLC3.h"
#include "AudioTools/AudioCodecs/ContainerAVI.h"
#include "AudioTools/AudioCodecs/StreamingDecoder.h"
//#include "AudioTools/AudioCodecs/CodecMP3Mini.h"

View File

@@ -0,0 +1,32 @@
#pragma once
/**
* @defgroup codecs Codecs
* @ingroup main
* @brief Audio Coder and Decoder
**/
/**
* @defgroup encoder Encoder
* @ingroup codecs
* @brief Audio Encoder
**/
/**
* @defgroup decoder Decoder
* @ingroup codecs
* @brief Audio Decoder
**/
// codecs that do not require any additional library
#include "AudioTools/AudioCodecs/CodecWAV.h"
#include "AudioTools/AudioCodecs/CodecCopy.h"
#include "AudioTools/AudioCodecs/CodecL8.h"
#include "AudioTools/AudioCodecs/CodecFloat.h"
#include "AudioTools/AudioCodecs/CodecBase64.h"
#include "AudioTools/AudioCodecs/CodecMTS.h"
#include "AudioTools/AudioCodecs/CodecADTS.h"
#include "AudioTools/AudioCodecs/CodecNetworkFormat.h"
#include "AudioTools/AudioCodecs/CodecFactory.h"
#include "AudioTools/AudioCodecs/StreamingDecoder.h"
#include "AudioTools/AudioCodecs/MultiDecoder.h"

View File

@@ -0,0 +1,166 @@
#pragma once
#include "AudioToolsConfig.h"
#include "AudioLogger.h"
#include "AudioTools/CoreAudio/AudioBasic/Collections/Vector.h"
#include "AudioTools/CoreAudio/AudioTypes.h"
#include "AudioTools/CoreAudio/BaseStream.h"
#include "AudioTools/CoreAudio/AudioOutput.h"
namespace audio_tools {
/**
* @brief Decoding of encoded audio into PCM data
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioDecoder : public AudioWriter, public AudioInfoSource {
public:
AudioDecoder() = default;
virtual ~AudioDecoder() = default;
AudioDecoder(AudioDecoder const &) = delete;
AudioDecoder &operator=(AudioDecoder const &) = delete;
AudioInfo audioInfo() override { return info; };
/// for most decoders this is not needed
void setAudioInfo(AudioInfo from) override {
TRACED();
if (info != from) {
info = from;
notifyAudioChange(from);
}
}
/// Defines where the decoded result is written to
virtual void setOutput(AudioStream &out_stream) {
Print *p_print = &out_stream;
setOutput(*p_print);
addNotifyAudioChange(out_stream);
}
/// Defines where the decoded result is written to
virtual void setOutput(AudioOutput &out_stream) {
Print *p_print = &out_stream;
setOutput(*p_print);
addNotifyAudioChange(out_stream);
}
/// Defines where the decoded result is written to
virtual void setOutput(Print &out_stream) override { p_print = &out_stream; }
/// Returns true to indicate that the decoding result is PCM data
virtual bool isResultPCM() { return true; }
virtual bool begin(AudioInfo info) override {
setAudioInfo(info);
return begin();
}
bool begin() override { return true; }
void end() override {}
/// custom id to be used by application
int id;
Print* getOutput(){
return p_print;
}
/// Some decoders need e.g. a magic cookie to provide the relevant info for decoding
virtual bool setCodecConfig(const uint8_t* data, size_t len){
LOGE("not implemented");
return false;
}
protected:
Print *p_print = nullptr;
AudioInfo info;
};
/**
* @brief Parent class for all container formats
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ContainerDecoder : public AudioDecoder {
bool isResultPCM() override { return true; }
};
/**
* @brief Encoding of PCM data
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioEncoder : public AudioWriter {
public:
AudioEncoder() = default;
virtual ~AudioEncoder() = default;
AudioEncoder(AudioEncoder const &) = delete;
AudioEncoder &operator=(AudioEncoder const &) = delete;
/// Provides the mime type of the encoded result
virtual const char *mime() = 0;
/// Defines the sample rate, number of channels and bits per sample
void setAudioInfo(AudioInfo from) override { info = from; }
AudioInfo audioInfo() override { return info; }
/// Default output assignment (encoders may override to store Print reference)
virtual void setOutput(Print &out_stream) override { (void)out_stream; }
/// Optional rtsp function: provide the frame duration in microseconds
virtual uint32_t frameDurationUs() { return 0;};
/// Optional rtsp function: provide samples per the frame
virtual uint16_t samplesPerFrame() { return 0;};
protected:
AudioInfo info;
};
class AudioDecoderExt : public AudioDecoder {
public:
virtual void setBlockSize(int blockSize) = 0;
};
class AudioEncoderExt : public AudioEncoder {
public:
virtual int blockSize() = 0;
};
/**
* @brief Dummy no implmentation Codec. This is used so that we can initialize
* some pointers to decoders and encoders to make sure that they do not point to
* null.
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class CodecNOP : public AudioDecoder, public AudioEncoder {
public:
static CodecNOP *instance() {
static CodecNOP self;
return &self;
}
virtual bool begin() { return true; }
virtual void end() {}
virtual void setOutput(Print &out_stream) {}
virtual void addNotifyAudioChange(AudioInfoSupport &bi) {}
virtual void setAudioInfo(AudioInfo info) {}
virtual AudioInfo audioInfo() {
AudioInfo info;
return info;
}
virtual operator bool() { return false; }
virtual int readStream(Stream &in) { return 0; };
// just output silence
virtual size_t write(const uint8_t *data, size_t len) {
memset((void *)data, 0, len);
return len;
}
virtual const char *mime() { return nullptr; }
};
} // namespace audio_tools

View File

@@ -0,0 +1,508 @@
#pragma once
#include "AudioCodecsBase.h"
#include "AudioToolsConfig.h"
#include "AudioTools/CoreAudio/AudioLogger.h"
#include "AudioTools/CoreAudio/AudioIO.h"
#include "AudioTools/CoreAudio/AudioOutput.h"
#include "AudioTools/CoreAudio/AudioStreams.h"
#include "AudioTools/CoreAudio/AudioTypes.h"
namespace audio_tools {
/**
* @brief A more natural Print class to process encoded data (aac, wav,
* mp3...). Just define the output and the decoder and write the encoded
* data.
* @ingroup transform
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncodedAudioOutput : public ModifyingOutput {
public:
EncodedAudioOutput() { active = false; }
EncodedAudioOutput(AudioDecoder *decoder) {
setDecoder(decoder);
active = false;
}
EncodedAudioOutput(AudioEncoder *encoder) {
setEncoder(encoder);
active = false;
}
EncodedAudioOutput(AudioStream *outputStream, AudioDecoder *decoder) {
setDecoder(decoder);
setOutput(outputStream);
active = false;
}
EncodedAudioOutput(AudioOutput *outputStream, AudioDecoder *decoder) {
setDecoder(decoder);
setOutput(outputStream);
active = false;
}
EncodedAudioOutput(Print *outputStream, AudioDecoder *decoder) {
setDecoder(decoder);
setOutput(outputStream);
active = false;
}
EncodedAudioOutput(Print *outputStream, AudioEncoder *encoder) {
setEncoder(encoder);
setOutput(outputStream);
active = false;
}
EncodedAudioOutput(AudioOutput *outputStream, AudioEncoder *encoder) {
setEncoder(encoder);
setOutput(outputStream);
active = false;
}
EncodedAudioOutput(AudioStream *outputStream, AudioEncoder *encoder) {
setEncoder(encoder);
setOutput(outputStream);
active = false;
}
virtual ~EncodedAudioOutput() { end(); }
/// Define object which need to be notified if the basinfo is changing
void addNotifyAudioChange(AudioInfoSupport &bi) override {
TRACEI();
decoder_ptr->addNotifyAudioChange(bi);
}
AudioInfo defaultConfig() {
AudioInfo cfg;
return cfg;
}
virtual void setAudioInfo(AudioInfo newInfo) override {
TRACED();
if (this->cfg != newInfo && newInfo) {
this->cfg = newInfo;
decoder_ptr->setAudioInfo(cfg);
encoder_ptr->setAudioInfo(cfg);
}
}
/// Provide audio info from decoder if relevant
AudioInfo audioInfo() override {
// return info from decoder if avilable
if (decoder_ptr != undefined && *decoder_ptr){
AudioInfo info = decoder_ptr->audioInfo();
if (info) return info;
}
return ModifyingOutput::audioInfo();
}
/// Defines the output
void setOutput(Print *outputStream) {
ptr_out = outputStream;
if (decoder_ptr != undefined) {
decoder_ptr->setOutput(*ptr_out);
}
if (encoder_ptr != undefined) {
encoder_ptr->setOutput(*ptr_out);
}
}
void setOutput(AudioStream* out) {
setOutput((Print*)out);
to_notify = out;
}
void setOutput(AudioOutput*out){
setOutput((Print*)out);
to_notify = out;
}
void setOutput(Print &outputStream) override { setOutput(&outputStream); }
void setOutput(AudioOutput &outputStream) { setOutput(&outputStream); }
void setOutput(AudioStream &outputStream) { setOutput(&outputStream); }
void setEncoder(AudioEncoder *encoder) {
if (encoder == nullptr) {
encoder = undefined;
}
encoder_ptr = encoder;
writer_ptr = encoder;
if (ptr_out != nullptr) {
encoder_ptr->setOutput(*ptr_out);
}
}
AudioEncoder *getEncoder() { return encoder_ptr; }
void setDecoder(AudioDecoder *decoder) {
if (decoder == nullptr) {
decoder = undefined;
}
decoder_ptr = decoder;
writer_ptr = decoder;
if (ptr_out != nullptr) {
decoder_ptr->setOutput(*ptr_out);
}
}
AudioDecoder *getDecoder() { return decoder_ptr; }
/// Starts the processing - sets the status to active
bool begin() override {
TRACED();
if (!active) {
TRACED();
// Setup notification
if (to_notify != nullptr) {
decoder_ptr->addNotifyAudioChange(*to_notify);
}
// Get notifications from decoder
decoder_ptr->addNotifyAudioChange(*this);
if (decoder_ptr != undefined || encoder_ptr != undefined) {
active = true;
if (!decoder_ptr->begin(cfg)) active = false;
if (!encoder_ptr->begin(cfg)) active = false;
} else {
LOGW("no decoder or encoder defined");
}
}
return active;
}
/// Starts the processing - sets the status to active
virtual bool begin(AudioInfo newInfo) override {
setAudioInfo(newInfo);
return begin();
}
/// Ends the processing
void end() override {
if (active) {
TRACEI();
decoder_ptr->end();
encoder_ptr->end();
active = false;
}
}
/// encoder decode the data
virtual size_t write(const uint8_t *data, size_t len) override {
if (len == 0) {
// LOGI("write: %d", 0);
return 0;
}
LOGD("EncodedAudioOutput::write: %d", (int)len);
if (writer_ptr == nullptr || data == nullptr) {
LOGE("NPE");
return 0;
}
if (check_available_for_write && availableForWrite() == 0) {
return 0;
}
size_t result = writer_ptr->write(data, len);
LOGD("EncodedAudioOutput::write: %d -> %d", (int)len, (int)result);
return result;
}
int availableForWrite() override {
if (!check_available_for_write) return frame_size;
return min(ptr_out->availableForWrite(), frame_size);
}
/// Returns true if status is active and we still have data to be processed
operator bool() override { return active; }
/// Provides the initialized decoder
AudioDecoder &decoder() { return *decoder_ptr; }
/// Provides the initialized encoder
AudioEncoder &encoder() { return *encoder_ptr; }
/// Is Available for Write check activated ?
bool isCheckAvailableForWrite() { return check_available_for_write; }
/// defines the size of the decoded frame in bytes
void setFrameSize(int size) { frame_size = size; }
EncodedAudioOutput& operator=(EncodedAudioOutput const& src) {
decoder_ptr = src.decoder_ptr;
encoder_ptr = src.encoder_ptr;
ptr_out = src.ptr_out;
active = src.active;
check_available_for_write = src.check_available_for_write;
frame_size = src.frame_size;
cfg = src.cfg;
is_active = src.is_active;
return *this;
}
protected:
// AudioInfo info;
CodecNOP* undefined = CodecNOP::instance();
AudioDecoder *decoder_ptr = undefined; // decoder
AudioEncoder *encoder_ptr = undefined; // decoder
AudioWriter *writer_ptr = nullptr;
Print *ptr_out = nullptr;
AudioInfoSupport *to_notify = nullptr;
bool active = false;
bool check_available_for_write = false;
int frame_size = DEFAULT_BUFFER_SIZE;
};
/// @brief Legacy alias for EncodedAudioOutput
/// @ingroup codecs
using EncodedAudioPrint = EncodedAudioOutput;
/**
* @brief A more natural Stream class to process encoded data (aac, wav,
* mp3...) which also supports the decoding by calling readBytes().
* @ingroup transform
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncodedAudioStream : public ReformatBaseStream {
public:
EncodedAudioStream() = default;
EncodedAudioStream(AudioStream *ioStream, AudioDecoder *decoder) {
setDecoder(decoder);
setStream(*ioStream);
}
EncodedAudioStream(Stream *ioStream, AudioDecoder *decoder) {
setDecoder(decoder);
setStream(*ioStream);
}
EncodedAudioStream(AudioOutput *outputStream, AudioDecoder *decoder) {
setDecoder(decoder);
setOutput(*outputStream);
}
EncodedAudioStream(Print *outputStream, AudioDecoder *decoder) {
setDecoder(decoder);
setOutput(*outputStream);
}
EncodedAudioStream(Print *outputStream, AudioEncoder *encoder) {
setEncoder(encoder);
setOutput(*outputStream);
}
EncodedAudioStream(AudioDecoder *decoder) { setDecoder(decoder); }
EncodedAudioStream(AudioEncoder *encoder) { setEncoder(encoder); }
virtual ~EncodedAudioStream() { end(); }
void setEncoder(AudioEncoder *encoder) { enc_out.setEncoder(encoder); }
void setDecoder(AudioDecoder *decoder) { enc_out.setDecoder(decoder); }
AudioEncoder *getEncoder() { return enc_out.getEncoder(); }
AudioDecoder *getDecoder() { return enc_out.getDecoder(); }
/// Provides the initialized decoder
AudioDecoder &decoder() { return *getDecoder(); }
/// Provides the initialized encoder
AudioEncoder &encoder() { return *getEncoder(); }
void setStream(Stream *stream) { setStream(*stream); }
void setStream(AudioStream *stream) { setStream(*stream); }
void setOutput(AudioOutput *stream) { setOutput(*stream); }
void setOutput(Print *stream) { setOutput(*stream); }
void setStream(AudioStream &stream) override {
ReformatBaseStream::setStream(stream);
enc_out.setOutput(&stream);
}
void setStream(Stream &stream) override {
ReformatBaseStream::setStream(stream);
enc_out.setOutput(&stream);
}
void setOutput(AudioOutput &stream) override {
ReformatBaseStream::setOutput(stream);
enc_out.setOutput(&stream);
}
void setOutput(Print &out) override {
ReformatBaseStream::setOutput(out);
enc_out.setOutput(&out);
}
AudioInfo defaultConfig() {
AudioInfo ai;
return ai;
}
bool begin(AudioInfo info) {
setAudioInfo(info);
return begin();
}
bool begin() override {
// is_output_notify = false;
setupReader();
ReformatBaseStream::begin();
enc_out.addNotifyAudioChange(*this);
return enc_out.begin(audioInfo());
}
void end() override {
enc_out.end();
reader.end();
}
int availableForWrite() override { return enc_out.availableForWrite(); }
size_t write(const uint8_t *data, size_t len) override {
// addNotifyOnFirstWrite();
return enc_out.write(data, len);
}
size_t readBytes(uint8_t *data, size_t len) override {
return reader.readBytes(data, len);
}
void addNotifyAudioChange(AudioInfoSupport &bi) override {
enc_out.addNotifyAudioChange(bi);
}
/// approx compression factor: e.g. mp3 is around 4
float getByteFactor() override { return byte_factor; }
void setByteFactor(float factor) { byte_factor = factor; }
/// defines the size of the decoded frame in bytes
void setFrameSize(int size) { enc_out.setFrameSize(size); }
EncodedAudioStream& operator=(EncodedAudioStream const& src) {
enc_out = src.enc_out;
byte_factor = src.byte_factor;
p_stream = src.p_stream;
p_print = src.p_print;
info = src.info;
return *this;
};
AudioInfo audioInfo() override {
return enc_out.audioInfo();;
}
void setAudioInfo(AudioInfo newInfo) override {
ReformatBaseStream::setAudioInfo(newInfo);
enc_out.setAudioInfo(newInfo);
}
protected:
EncodedAudioOutput enc_out;
float byte_factor = 2.0f;
};
/**
* @brief Adapter class which lets an AudioWriter behave like a Print
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
class AudioWriterToAudioOutput : public AudioOutputAdapter {
public:
void setWriter(AudioWriter *writer) { p_writer = writer; }
size_t write(const uint8_t *data, size_t len) {
return p_writer->write(data, len);
};
protected:
AudioWriter *p_writer = nullptr;
};
/**
* @brief ContainerTarget: forwards requests to both the output and the
* encoder/decoder and sets up the output chain for Containers. We also
* manage the proper sequence of the output classes
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ContainerTarget {
public:
virtual bool begin() = 0;
virtual void end() = 0;
virtual void setAudioInfo(AudioInfo info) {
if (this->info != info && info.channels != 0 && info.sample_rate != 0) {
this->info = info;
if (p_writer1 != nullptr) p_writer1->setAudioInfo(info);
if (p_writer2 != nullptr) p_writer2->setAudioInfo(info);
}
}
virtual size_t write(uint8_t *data, size_t size) = 0;
protected:
AudioInfo info;
AudioWriter *p_writer1 = nullptr;
AudioWriter *p_writer2 = nullptr;
AudioWriterToAudioOutput print2;
bool active = false;
};
class ContainerTargetPrint : public ContainerTarget {
public:
void setupOutput(AudioWriter *writer1, AudioWriter *writer2, Print &print) {
p_print = &print;
p_writer1 = writer1;
p_writer2 = writer2;
print2.setWriter(p_writer2);
}
void setupOutput(AudioWriter *writer1, Print &print) {
p_print = &print;
p_writer1 = writer1;
}
virtual bool begin() {
if (!active) {
active = true;
if (p_writer2 != nullptr) {
p_writer1->setOutput(print2);
p_writer2->setOutput(*p_print);
p_writer1->begin();
p_writer2->begin();
} else {
p_writer1->setOutput(*p_print);
p_writer1->begin();
}
}
return true;
}
virtual void end() {
if (active) {
if (p_writer1 != nullptr) p_writer1->end();
if (p_writer2 != nullptr) p_writer2->end();
}
active = false;
}
virtual size_t write(uint8_t *data, size_t size) {
TRACED();
return p_writer1->write(data, size);
}
protected:
Print *p_print = nullptr;
AudioWriterToAudioOutput print2;
};
} // namespace audio_tools

View File

@@ -0,0 +1,292 @@
/**
* @file AudioFormat.h
* @author Phil Schatzmann
* @brief WAV Audio Formats used by Microsoft e.g. in AVI video files
* @version 0.1
* @date 2023-05-06
*
* @copyright Copyright (c) 2023
*
*/
#pragma once
namespace audio_tools {
/**
* @brief Audio format codes used by Microsoft e.g. in avi or wav files
* @ingroup video
*/
enum class AudioFormat : uint16_t {
UNKNOWN = 0x0000, /* Microsoft Corporation */
PCM = 0x0001,
ADPCM = 0x0002, /* Microsoft Corporation */
IEEE_FLOAT = 0x0003, /* Microsoft Corporation */
// VSELP = 0x0004, /* Compaq Computer Corp. */
// IBM_CVSD = 0x0005, /* IBM Corporation */
ALAW = 0x0006, /* Microsoft Corporation */
MULAW = 0x0007, /* Microsoft Corporation */
// DTS = 0x0008, /* Microsoft Corporation */
// DRM = 0x0009, /* Microsoft Corporation */
// WMAVOICE9 = 0x000A, /* Microsoft Corporation */
// WMAVOICE10 = 0x000B, /* Microsoft Corporation */
OKI_ADPCM = 0x0010, /* OKI */
DVI_ADPCM = 0x0011, /* Intel Corporation */
//IMA_ADPCM(DVI_ADPCM), /* Intel Corporation */
MEDIASPACE_ADPCM = 0x0012, /* Videologic */
SIERRA_ADPCM = 0x0013, /* Sierra Semiconductor Corp */
G723_ADPCM = 0x0014, /* Antex Electronics Corporation */
// DIGISTD = 0x0015, /* DSP Solutions, Inc. */
// DIGIFIX = 0x0016, /* DSP Solutions, Inc. */
DIALOGIC_OKI_ADPCM = 0x0017, /* Dialogic Corporation */
MEDIAVISION_ADPCM = 0x0018, /* Media Vision, Inc. */
// CU_CODEC = 0x0019, /* Hewlett-Packard Company */
// HP_DYN_VOICE = 0x001A, /* Hewlett-Packard Company */
YAMAHA_ADPCM = 0x0020, /* Yamaha Corporation of America */
// SONARC = 0x0021, /* Speech Compression */
// DSPGROUP_TRUESPEECH = 0x0022, /* DSP Group, Inc */
// ECHOSC1 = 0x0023, /* Echo Speech Corporation */
// AUDIOFILE_AF36 = 0x0024, /* Virtual Music, Inc. */
// APTX = 0x0025, /* Audio Processing Technology */
// AUDIOFILE_AF10 = 0x0026, /* Virtual Music, Inc. */
// PROSODY_1612 = 0x0027, /* Aculab plc */
// LRC = 0x0028, /* Merging Technologies S.A. */
// DOLBY_AC2 = 0x0030, /* Dolby Laboratories */
// GSM610 = 0x0031, /* Microsoft Corporation */
// MSNAUDIO = 0x0032, /* Microsoft Corporation */
ANTEX_ADPCME = 0x0033, /* Antex Electronics Corporation */
// CONTROL_RES_VQLPC = 0x0034, /* Control Resources Limited */
// DIGIREAL = 0x0035, /* DSP Solutions, Inc. */
DIGIADPCM = 0x0036, /* DSP Solutions, Inc. */
// CONTROL_RES_CR10 = 0x0037, /* Control Resources Limited */
NMS_VBXADPCM = 0x0038, /* Natural MicroSystems */
CS_IMAADPCM = 0x0039, /* Crystal Semiconductor IMA ADPCM */
// ECHOSC3 = 0x003A, /* Echo Speech Corporation */
ROCKWELL_ADPCM = 0x003B, /* Rockwell International */
// ROCKWELL_DIGITALK = 0x003C, /* Rockwell International */
// XEBEC = 0x003D, /* Xebec Multimedia Solutions Limited */
G721_ADPCM = 0x0040, /* Antex Electronics Corporation */
// G728_CELP = 0x0041, /* Antex Electronics Corporation */
// MSG723 = 0x0042, /* Microsoft Corporation */
// INTEL_G723_1 = 0x0043, /* Intel Corp. */
// INTEL_G729 = 0x0044, /* Intel Corp. */
// SHARP_G726 = 0x0045, /* Sharp */
// MPEG = 0x0050, /* Microsoft Corporation */
// RT24 = 0x0052, /* InSoft, Inc. */
// PAC = 0x0053, /* InSoft, Inc. */
// MPEGLAYER3 = 0x0055, /* ISO/MPEG Layer3 Format Tag */
// LUCENT_G723 = 0x0059, /* Lucent Technologies */
// CIRRUS = 0x0060, /* Cirrus Logic */
// ESPCM = 0x0061, /* ESS Technology */
// VOXWARE = 0x0062, /* Voxware Inc */
// CANOPUS_ATRAC = 0x0063, /* Canopus, co., Ltd. */
G726_ADPCM = 0x0064, /* APICOM */
G722_ADPCM = 0x0065, /* APICOM */
// DSAT = 0x0066, /* Microsoft Corporation */
// DSAT_DISPLAY = 0x0067, /* Microsoft Corporation */
// VOXWARE_BYTE_ALIGNED = 0x0069, /* Voxware Inc */
// VOXWARE_AC8 = 0x0070, /* Voxware Inc */
// VOXWARE_AC10 = 0x0071, /* Voxware Inc */
// VOXWARE_AC16 = 0x0072, /* Voxware Inc */
// VOXWARE_AC20 = 0x0073, /* Voxware Inc */
// VOXWARE_RT24 = 0x0074, /* Voxware Inc */
// VOXWARE_RT29 = 0x0075, /* Voxware Inc */
// VOXWARE_RT29HW = 0x0076, /* Voxware Inc */
// VOXWARE_VR12 = 0x0077, /* Voxware Inc */
// VOXWARE_VR18 = 0x0078, /* Voxware Inc */
// VOXWARE_TQ40 = 0x0079, /* Voxware Inc */
// VOXWARE_SC3 = 0x007A, /* Voxware Inc */
// VOXWARE_SC3_1 = 0x007B, /* Voxware Inc */
// SOFTSOUND = 0x0080, /* Softsound, Ltd. */
// VOXWARE_TQ60 = 0x0081, /* Voxware Inc */
// MSRT24 = 0x0082, /* Microsoft Corporation */
// G729A = 0x0083, /* AT&T Labs, Inc. */
// MVI_MVI2 = 0x0084, /* Motion Pixels */
// DF_G726 = 0x0085, /* DataFusion Systems (Pty) (Ltd) */
// DF_GSM610 = 0x0086, /* DataFusion Systems (Pty) (Ltd) */
// ISIAUDIO = 0x0088, /* Iterated Systems, Inc. */
// ONLIVE = 0x0089, /* OnLive! Technologies, Inc. */
// MULTITUDE_FT_SX20 = 0x008A, /* Multitude Inc. */
INFOCOM_ITS_G721_ADPCM = 0x008B, /* Infocom */
// CONVEDIA_G729 = 0x008C, /* Convedia Corp. */
// CONGRUENCY = 0x008D, /* Congruency Inc. */
// SBC24 = 0x0091, /* Siemens Business Communications Sys */
// DOLBY_AC3_SPDIF = 0x0092, /* Sonic Foundry */
// MEDIASONIC_G723 = 0x0093, /* MediaSonic */
// PROSODY_8KBPS = 0x0094, /* Aculab plc */
ZYXEL_ADPCM = 0x0097, /* ZyXEL Communications, Inc. */
// PHILIPS_LPCBB = 0x0098, /* Philips Speech Processing */
// PACKED = 0x0099, /* Studer Professional Audio AG */
// MALDEN_PHONYTALK = 0x00A0, /* Malden Electronics Ltd. */
// RACAL_RECORDER_GSM = 0x00A1, /* Racal recorders */
// RACAL_RECORDER_G720_A = 0x00A2, /* Racal recorders */
// RACAL_RECORDER_G723_1 = 0x00A3, /* Racal recorders */
// RACAL_RECORDER_TETRA_ACELP = 0x00A4, /* Racal recorders */
// NEC_AAC = 0x00B0, /* NEC Corp. */
// RAW_AAC1 = 0x00FF, /* For Raw AAC, with format block
RHETOREX_ADPCM = 0x0100, /* Rhetorex Inc. */
// IRAT = 0x0101, /* BeCubed Software Inc. */
// VIVO_G723 = 0x0111, /* Vivo Software */
// VIVO_SIREN = 0x0112, /* Vivo Software */
// PHILIPS_CELP = 0x0120, /* Philips Speech Processing */
// PHILIPS_GRUNDIG = 0x0121, /* Philips Speech Processing */
// DIGITAL_G723 = 0x0123, /* Digital Equipment Corporation */
SANYO_LD_ADPCM = 0x0125, /* Sanyo Electric Co., Ltd. */
// SIPROLAB_ACEPLNET = 0x0130, /* Sipro Lab Telecom Inc. */
// SIPROLAB_ACELP4800 = 0x0131, /* Sipro Lab Telecom Inc. */
// SIPROLAB_ACELP8V3 = 0x0132, /* Sipro Lab Telecom Inc. */
// SIPROLAB_G729 = 0x0133, /* Sipro Lab Telecom Inc. */
// SIPROLAB_G729A = 0x0134, /* Sipro Lab Telecom Inc. */
// SIPROLAB_KELVIN = 0x0135, /* Sipro Lab Telecom Inc. */
// VOICEAGE_AMR = 0x0136, /* VoiceAge Corp. */
G726ADPCM = 0x0140, /* Dictaphone Corporation */
// DICTAPHONE_CELP68 = 0x0141, /* Dictaphone Corporation */
// DICTAPHONE_CELP54 = 0x0142, /* Dictaphone Corporation */
// QUALCOMM_PUREVOICE = 0x0150, /* Qualcomm, Inc. */
// QUALCOMM_HALFRATE = 0x0151, /* Qualcomm, Inc. */
// TUBGSM = 0x0155, /* Ring Zero Systems, Inc. */
// MSAUDIO1 = 0x0160, /* Microsoft Corporation */
// WMAUDIO2 = 0x0161, /* Microsoft Corporation */
// WMAUDIO3 = 0x0162, /* Microsoft Corporation */
// WMAUDIO_LOSSLESS = 0x0163, /* Microsoft Corporation */
// WMASPDIF = 0x0164, /* Microsoft Corporation */
UNISYS_NAP_ADPCM = 0x0170, /* Unisys Corp. */
// UNISYS_NAP_ULAW = 0x0171, /* Unisys Corp. */
// UNISYS_NAP_ALAW = 0x0172, /* Unisys Corp. */
// UNISYS_NAP_16K = 0x0173, /* Unisys Corp. */
// SYCOM_ACM_SYC008 = 0x0174, /* SyCom Technologies */
// SYCOM_ACM_SYC701_G726L = 0x0175, /* SyCom Technologies */
// SYCOM_ACM_SYC701_CELP54 = 0x0176, /* SyCom Technologies */
// SYCOM_ACM_SYC701_CELP68 = 0x0177, /* SyCom Technologies */
// KNOWLEDGE_ADVENTURE_ADPCM = 0x0178, /* Knowledge Adventure, Inc.*/
// FRAUNHOFER_IIS_MPEG2_AAC = 0x0180, /* Fraunhofer IIS */
// DTS_DS = 0x0190, /* Digital Theatre Systems, Inc. */
// CREATIVE_ADPCM = 0x0200, /* Creative Labs, Inc */
// CREATIVE_FASTSPEECH8 = 0x0202, /* Creative Labs, Inc */
// CREATIVE_FASTSPEECH10 = 0x0203, /* Creative Labs, Inc */
// UHER_ADPCM = 0x0210, /* UHER informatic GmbH */
// ULEAD_DV_AUDIO = 0x0215, /* Ulead Systems, Inc. */
// ULEAD_DV_AUDIO_1 = 0x0216, /* Ulead Systems, Inc. */
// QUARTERDECK = 0x0220, /* Quarterdeck Corporation */
// ILINK_VC = 0x0230, /* I-link Worldwide */
// RAW_SPORT = 0x0240, /* Aureal Semiconductor */
// ESST_AC3 = 0x0241, /* ESS Technology, Inc. */
// GENERIC_PASSTHRU = 0x0249,
// IPI_HSX = 0x0250, /* Interactive Products, Inc. */
// IPI_RPELP = 0x0251, /* Interactive Products, Inc. */
// CS2 = 0x0260, /* Consistent Software */
// SONY_SCX = 0x0270, /* Sony Corp. */
// SONY_SCY = 0x0271, /* Sony Corp. */
// SONY_ATRAC3 = 0x0272, /* Sony Corp. */
// SONY_SPC = 0x0273, /* Sony Corp. */
// TELUM_AUDIO = 0x0280, /* Telum Inc. */
// TELUM_IA_AUDIO = 0x0281, /* Telum Inc. */
// NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285, /* Norcom Electronics Corp.*/
// FM_TOWNS_SND = 0x0300, /* Fujitsu Corp. */
// MICRONAS = 0x0350, /* Micronas Semiconductors, Inc. */
// MICRONAS_CELP833 = 0x0351, /* Micronas Semiconductors, Inc. */
// BTV_DIGITAL = 0x0400, /* Brooktree Corporation */
// INTEL_MUSIC_CODER = 0x0401, /* Intel Corp. */
// INDEO_AUDIO = 0x0402, /* Ligo */
// QDESIGN_MUSIC = 0x0450, /* QDeign Corporation */
// ON2_VP7_AUDIO = 0x0500, /* On2 echnologies */
// ON2_VP6_AUDIO = 0x0501, /* On2 Tchnologies */
// VME_VMPCM = 0x0680, /* AT&T Labs,Inc. */
// TPC = 0x0681, /* AT&T Labs, Inc. *
// LIGHTWAVE_LOSSLESS = 0x08AE, /* Clerjump */
// OLIGSM = 0x1000, /* Ing C. Olivetti C., S.p.A. */
// OLIADPCM = 0x1001, /* Ing C. Olivetti& C., S.p.A.*/
// OLICELP = 0x1002, /* Ing C. Olivetti &C., S.p.A. */
// OLISBC = 0x1003, /* Ing C. Olivetti & C, S.p.A. */
// OLIOPR = 0x1004, /* Ing C. Olivetti & C. S.p.A. */
// LH_CODEC = 0x1100, /* Lernout & Hauspie *
// LH_CODEC_CELP = 0x1101, /* Lernout & Hauspie *
// LH_CODEC_SBC8 = 0x1102, /* Lernout & Hauspie */
// LH_CODEC_SBC12 = 0x1103, /* Lernout & Hauspie */
// LH_CODEC_SBC16 = 0x1104, /* Lernout & Hauspie */
// NORRIS = 0x1400, /* Norris Communications, Inc. */
// ISIAUDIO_2 = 0x1401, /* ISIAudio */
// SOUNDSPACE_MUSICOMPRESS = 0x1500, /* AT&T Labs, Inc. */
// MPEG_ADTS_AAC = 0x1600, /* Microsoft Corporation */
// MPEG_RAW_AAC = 0x1601, /* Microsoft Corporation */
// MPEG_LOAS = 0x1602, /* Microsoft Corporation (MPEG-4 Audio Transport Streams (LOAS/LATM)
// NOKIA_MPEG_ADTS_AAC = 0x1608, /* Microsoft Corporation */
// NOKIA_MPEG_RAW_AAC = 0x1609, /* Microsoft Corporation */
// VODAFONE_MPEG_ADTS_AAC = 0x160A, /* Microsoft Corporation */
// VODAFONE_MPEG_RAW_AAC = 0x160B, /* Microsoft Corporation */
// MPEG_HEAAC =
// 0x1610, /* Microsoft Corporation (MPEG-2 AAC or MPEG-4 HE-AAC
// v1/v2 streams with any payload (ADTS, ADIF, LOAS/LATM, RAW). Format block
// icludes MP4 AudioSpecificConfig() -- see HEAACWAVEFORMAT below
// */
// VOXWARE_RT24_SPEECH = 0x181C, /* Voxware Inc. */
// SONICFOUNDRY_LOSSLESS = 0x1971, /* Sonic Foundry */
// INNINGS_TELECOM_ADPCM = 0x1979, /* Innings Telecom Inc. */
// LUCENT_SX8300P = 0x1C07, /* Lucent Technologies */
// LUCENT_SX5363S = 0x1C0C, /* Lucent Technologies */
// CUSEEME = 0x1F03, /* CUSeeMe */
// NTCSOFT_ALF2CM_ACM = 0x1FC4, /* NTCSoft */
// DVM = 0x2000, /* FAST Multimedia AG */
// DTS2 = 0x2001,
// MAKEAVIS = 0x3313,
// DIVIO_MPEG4_AAC = 0x4143, /* Divio, Inc. */
// NOKIA_ADAPTIVE_MULTIRATE = 0x4201, /* Nokia */
// DIVIO_G726 = 0x4243, /* Divio, Inc. */
// LEAD_SPEECH = 0x434C, /* LEAD Technologies */
// LEAD_VORBIS = 0x564C, /* LEAD Technologies */
// WAVPACK_AUDIO = 0x5756, /* xiph.org */
// ALAC = 0x6C61, /* Apple Lossless */
// OGG_VORBIS_MODE_1 = 0x674F, /* Ogg Vorbis */
// OGG_VORBIS_MODE_2 = 0x6750, /* Ogg Vorbis */
// OGG_VORBIS_MODE_3 = 0x6751, /* Ogg Vorbis */
// OGG_VORBIS_MODE_1_PLUS = 0x676F, /* Ogg Vorbis */
// OGG_VORBIS_MODE_2_PLUS = 0x6770, /* Ogg Vorbis */
// OGG_VORBIS_MODE_3_PLUS = 0x6771, /* Ogg Vorbis */
// F3COM_NBX = 0x7000, /* 3COM Corp. */
// OPUS = 0x704F, /* Opus */
// FAAD_AAC = 0x706D,
// AMR_NB = 0x7361, /* AMR Narrowband */
// AMR_WB = 0x7362, /* AMR Wideband */
// AMR_WP = 0x7363, /* AMR Wideband Plus */
// GSM_AMR_CBR = 0x7A21, /* GSMA/3GPP */
// GSM_AMR_VBR_SID = 0x7A22, /* GSMA/3GPP */
// COMVERSE_INFOSYS_G723_1 = 0xA100, /* Comverse Infosys */
// COMVERSE_INFOSYS_AVQSBC = 0xA101, /* Comverse Infosys */
// COMVERSE_INFOSYS_SBC = 0xA102, /* Comverse Infosys */
// SYMBOL_G729_A = 0xA103, /* Symbol Technologies */
// VOICEAGE_AMR_WB = 0xA104, /* VoiceAge Corp. */
// INGENIENT_G726 = 0xA105, /* Ingenient Technologies, Inc. */
// MPEG4_AAC = 0xA106, /* ISO/MPEG-4 */
// ENCORE_G726 = 0xA107, /* Encore Software */
// ZOLL_ASAO = 0xA108, /* ZOLL Medical Corp. */
// SPEEX_VOICE = 0xA109, /* xiph.org */
// VIANIX_MASC = 0xA10A, /* Vianix LLC */
// WM9_SPECTRUM_ANALYZER = 0xA10B, /* Microsoft */
// WMF_SPECTRUM_ANAYZER = 0xA10C, /* Microsoft */
// GSM_610 = 0xA0D,
// GSM_620 = 0xA1E,
// GSM_660 = 0xA10,
// GSM_690 = 0xA110,
// GSM_ADAPTIVE_MULTIRATE_WB = 0xA111,
// POLYCOM_G722 = 0xA112 ,/* Polycom */
// POLYCOM_G728 = 0xA113, /* Polycom */
// POLYCOM_G729_A = 0xA114, /* Polycom */
// POLYCOM_SIREN = 0xA115, /* Polycom */
// GLOBAL_IP_ILBC = 0xA116, /* Global IP */
// RADIOTIME_TIME_SHIFT_RADIO = 0xA117, /* RadioTime */
// NICE_ACA = 0xA118, /* Nice Systems */
// NICE_ADPCM = 0xA119, /* Nice Systems */
// VOCORD_G721 = 0xA11A, /* Vocord Telecom */
// VOCORD_G726 = 0xA11B, /* Vocord Telecom */
// VOCORD_G722_1 = 0xA11C, /* Vocord Telecom */
// VOCORD_G728 = 0xA11D, /* Vocord Telecom */
// VOCORD_G729 = 0xA11E, /* Vocord Telecom */
// VOCORD_G729_A = 0xA11F, /* Vocord Telecom */
// VOCORD_G723_1 = 0xA120, /* Vocord Telecom */
// VOCORD_LBC = 0xA121, /* Vocord Telecom */
// NICE_G728 = 0xA122, /* Nice Systems */
// FRACE_TELECOM_G729 = 0xA123, /* France Telecom */
// CODIAN = 0xA124, /* CODIAN */
// FLAC = 0xF1AC, /* flac.sourceforge.net */
};
}

View File

@@ -0,0 +1,191 @@
#pragma once
// #include "Stream.h"
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "faad.h"
#ifndef FAAD_INPUT_BUFFER_SIZE
#define FAAD_INPUT_BUFFER_SIZE 1024*2
#endif
// to prevent Decoding error: Maximum number of bitstream elements exceeded
#ifndef FAAD_UNDERFLOW_LIMIT
#define FAAD_UNDERFLOW_LIMIT 500
#endif
namespace audio_tools {
/**
* @brief AAC Decoder using faad: https://github.com/pschatzmann/arduino-libfaad
* This needs a stack of around 60000 and you need to make sure that memory is allocated on PSRAM.
* See https://www.pschatzmann.ch/home/2023/09/12/arduino-audio-tools-faat-aac-decoder/
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AACDecoderFAAD : public AudioDecoder {
public:
AACDecoderFAAD() {
info.channels = 2;
info.sample_rate = 44100;
info.bits_per_sample = 16;
};
~AACDecoderFAAD() { end(); }
/// Starts the processing
bool begin() {
TRACED();
unsigned long cap = NeAACDecGetCapabilities();
// Check if decoder has the needed capabilities
if (!cap & FIXED_POINT_CAP) {
LOGE("Fixed Point");
return false;
}
// Open the library
hAac = NeAACDecOpen();
// // Get the current config
conf = NeAACDecGetCurrentConfiguration(hAac);
// // If needed change some of the values in conf
conf->outputFormat = FAAD_FMT_16BIT;
//conf->defObjectType = LC;
conf->defSampleRate = info.sample_rate;
conf->downMatrix = true; // 5.1 channel downmatrixed to 2 channel
conf->useOldADTSFormat = false;
conf->dontUpSampleImplicitSBR = false;
// Set the new configuration
if (!NeAACDecSetConfiguration(hAac, conf)) {
LOGE("NeAACDecSetConfiguration");
return false;
}
// setup input buffer
if (input_buffer.size() != buffer_size_input){
input_buffer.resize(buffer_size_input);
}
is_init = false;
return true;
}
/// Releases the reserved memory
virtual void end() {
TRACED();
flush();
if (hAac != nullptr) {
NeAACDecClose(hAac);
hAac = nullptr;
}
}
/// Write AAC data to decoder
size_t write(const uint8_t *data, size_t len) {
// Write supplied data to input buffer
size_t result = input_buffer.writeArray((uint8_t *)data, len);
// Decode from input buffer
decode(underflow_limit);
return result;
}
void flush() {
decode(0);
}
/// Defines the input buffer size
void setInputBufferSize(int len){
buffer_size_input = len;
}
/// Defines the min number of bytes that are submitted to the decoder
void setUnderflowLimit(int len){
underflow_limit = len;
}
/// checks if the class is active
virtual operator bool() { return hAac != nullptr; }
protected:
int buffer_size_input = FAAD_INPUT_BUFFER_SIZE;
int underflow_limit = FAAD_UNDERFLOW_LIMIT;
NeAACDecHandle hAac = nullptr;
NeAACDecConfigurationPtr conf;
SingleBuffer<uint8_t> input_buffer{0};
bool is_init = false;
void init(uint8_t *data, size_t len) {
TRACEI();
// Initialise the library using one of the initialization functions
unsigned long samplerate = info.sample_rate;
unsigned char channels = info.channels;
if (NeAACDecInit(hAac, data, len, &samplerate, &channels)==-1) {
LOGE("NeAACDecInit");
}
info.sample_rate = samplerate;
info.channels = channels;
is_init = true;
}
void decode(int minBufferSize) {
TRACED();
NeAACDecFrameInfo hInfo;
// decode until we do not conume any bytes
while (input_buffer.available()>minBufferSize) {
int eff_len = input_buffer.available();
if (!is_init) {
init(input_buffer.data(), eff_len);
}
uint8_t *sample_buffer=(uint8_t *)NeAACDecDecode(hAac, &hInfo, input_buffer.address(), eff_len);
LOGD("bytesconsumed: %d of %d", (int)hInfo.bytesconsumed, (int)eff_len);
if (hInfo.error != 0) {
LOGW("Decoding error: %s", NeAACDecGetErrorMessage(hInfo.error));
}
if (hInfo.bytesconsumed == 0 ) {
break;
}
LOGD("Decoded %lu samples", hInfo.samples);
LOGD(" bytesconsumed: %lu", hInfo.bytesconsumed);
LOGD(" channels: %d", hInfo.channels);
LOGD(" samplerate: %lu", hInfo.samplerate);
LOGD(" sbr: %u", hInfo.sbr);
LOGD(" object_type: %u", hInfo.object_type);
LOGD(" header_type: %u", hInfo.header_type);
LOGD(" num_front_channels: %u", hInfo.num_front_channels);
LOGD(" num_side_channels: %u", hInfo.num_side_channels);
LOGD(" num_back_channels: %u", hInfo.num_back_channels);
LOGD(" num_lfe_channels: %u", hInfo.num_lfe_channels);
LOGD(" ps: %u", hInfo.ps);
// removed consumed data
input_buffer.clearArray(hInfo.bytesconsumed);
// check for changes in config
AudioInfo tmp{(sample_rate_t)hInfo.samplerate, hInfo.channels, 16};
if (tmp != info) {
setAudioInfo(tmp);
}
int bytes = hInfo.samples * sizeof(int16_t);
size_t len = p_print->write(sample_buffer, bytes);
if (len != bytes) {
TRACEE();
}
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,319 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AACDecoderFDK.h"
#include "AACEncoderFDK.h"
namespace audio_tools {
// audio change notification target
AudioInfoSupport *audioChangeFDK = nullptr;
/**
* @brief Audio Decoder which decodes AAC into a PCM stream
* This is basically just a wrapper using https://github.com/pschatzmann/arduino-fdk-aac
* which uses AudioInfo and provides the handlig of AudioInfo changes.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AACDecoderFDK : public AudioDecoder {
public:
AACDecoderFDK(int output_buffer_size = FDK_OUT_BUFFER_DEFAULT_SIZE){
TRACED();
dec = new aac_fdk::AACDecoderFDK(output_buffer_size);
}
AACDecoderFDK(Print &out_stream, int output_buffer_size = 2048){
TRACED();
dec = new aac_fdk::AACDecoderFDK(out_stream, output_buffer_size);
}
virtual ~AACDecoderFDK(){
delete dec;
}
/// Defines the output stream
void setOutput(Print &out_stream) override {
dec->setOutput(out_stream);
}
bool begin() override {
return dec->begin(TT_MP4_ADTS, 1);
}
// opens the decoder
bool begin(TRANSPORT_TYPE transportType, UINT nrOfLayers){
return dec->begin(transportType, nrOfLayers);
}
/**
* @brief Explicitly configure the decoder by passing a raw AudioSpecificConfig (ASC) or a StreamMuxConfig
* (SMC), contained in a binary buffer. This is required for MPEG-4 and Raw Packets file format bitstreams
* as well as for LATM bitstreams with no in-band SMC. If the transport format is LATM with or without
* LOAS, configuration is assumed to be an SMC, for all other file formats an ASC.
*
**/
AAC_DECODER_ERROR configure(uint8_t *conf, const uint32_t &length) {
return dec->configure(conf, length);
}
// write AAC data to be converted to PCM data
virtual size_t write(const uint8_t *data, size_t len) override {
return dec->write(data, len);
}
// provides detailed information about the stream
CStreamInfo audioInfoEx(){
return dec->audioInfo();
}
// provides common information
AudioInfo audioInfo() override {
AudioInfo result;
CStreamInfo i = audioInfoEx();
result.channels = i.numChannels;
result.sample_rate = i.sampleRate;
result.bits_per_sample = 16;
return result;
}
// release the resources
void end() override {
TRACED();
dec->end();
}
virtual operator bool() override {
return (bool)*dec;
}
aac_fdk::AACDecoderFDK *driver() {
return dec;
}
static void audioChangeCallback(CStreamInfo &info){
if (audioChangeFDK!=nullptr){
AudioInfo base;
base.channels = info.numChannels;
base.sample_rate = info.sampleRate;
base.bits_per_sample = 16;
// notify audio change
audioChangeFDK->setAudioInfo(base);
}
}
void addNotifyAudioChange(AudioInfoSupport &bi) override {
audioChangeFDK = &bi;
// register audio change handler
dec->setInfoCallback(audioChangeCallback);
}
protected:
aac_fdk::AACDecoderFDK *dec=nullptr;
};
/**
* @brief Encodes PCM data to the AAC format and writes the result to a stream
* This is basically just a wrapper using https://github.com/pschatzmann/arduino-fdk-aac
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AACEncoderFDK : public AudioEncoder {
public:
AACEncoderFDK(){
enc = new aac_fdk::AACEncoderFDK();
}
AACEncoderFDK(Print &out_stream){
enc = new aac_fdk::AACEncoderFDK();
enc->setOutput(out_stream);
}
~AACEncoderFDK(){
delete enc;
}
/// Defines the output
void setOutput(Print &out_stream) override {
enc->setOutput(out_stream);
}
/** @brief Total encoder bitrate. This parameter is
mandatory and interacts with ::AACENC_BITRATEMODE.
- CBR: Bitrate in bits/second.
- VBR: Variable bitrate. Bitrate argument will
be ignored. See \ref suppBitrates for details. */
virtual void setBitrate(int bitrate){
enc->setBitrate(bitrate);
}
/** @brief Audio object type. See ::AUDIO_OBJECT_TYPE in FDK_audio.h.
- 2: MPEG-4 AAC Low Complexity.
- 5: MPEG-4 AAC Low Complexity with Spectral Band Replication
(HE-AAC).
- 29: MPEG-4 AAC Low Complexity with Spectral Band
Replication and Parametric Stereo (HE-AAC v2). This
configuration can be used only with stereo input audio data.
- 23: MPEG-4 AAC Low-Delay.
- 39: MPEG-4 AAC Enhanced Low-Delay. Since there is no
::AUDIO_OBJECT_TYPE for ELD in combination with SBR defined,
enable SBR explicitely by ::AACENC_SBR_MODE parameter. The ELD
v2 212 configuration can be configured by ::AACENC_CHANNELMODE
parameter.
- 129: MPEG-2 AAC Low Complexity.
- 132: MPEG-2 AAC Low Complexity with Spectral Band
Replication (HE-AAC).
Please note that the virtual MPEG-2 AOT's basically disables
non-existing Perceptual Noise Substitution tool in AAC encoder
and controls the MPEG_ID flag in adts header. The virtual
MPEG-2 AOT doesn't prohibit specific transport formats. */
virtual void setAudioObjectType(int aot){
enc->setAudioObjectType(aot);
}
/** @brief This parameter controls the use of the afterburner feature.
The afterburner is a type of analysis by synthesis algorithm
which increases the audio quality but also the required
processing power. It is recommended to always activate this if
additional memory consumption and processing power consumption
is not a problem. If increased MHz and memory consumption are
an issue then the MHz and memory cost of this optional module
need to be evaluated against the improvement in audio quality
on a case by case basis.
- 0: Disable afterburner (default).
- 1: Enable afterburner. */
virtual void setAfterburner(bool afterburner){
enc->setAfterburner(afterburner);
}
/** @brief Configure SBR independently of the chosen Audio
Object Type ::AUDIO_OBJECT_TYPE. This parameter
is for ELD audio object type only.
- -1: Use ELD SBR auto configurator (default).
- 0: Disable Spectral Band Replication.
- 1: Enable Spectral Band Replication. */
virtual void setSpectralBandReplication(int eld_sbr){
enc->setSpectralBandReplication(eld_sbr);
}
/** @brief Bitrate mode. Configuration can be different
kind of bitrate configurations:
- 0: Constant bitrate, use bitrate according
to ::AACENC_BITRATE. (default) Within none
LD/ELD ::AUDIO_OBJECT_TYPE, the CBR mode makes
use of full allowed bitreservoir. In contrast,
at Low-Delay ::AUDIO_OBJECT_TYPE the
bitreservoir is kept very small.
- 1: Variable bitrate mode, \ref vbrmode
"very low bitrate".
- 2: Variable bitrate mode, \ref vbrmode
"low bitrate".
- 3: Variable bitrate mode, \ref vbrmode
"medium bitrate".
- 4: Variable bitrate mode, \ref vbrmode
"high bitrate".
- 5: Variable bitrate mode, \ref vbrmode
"very high bitrate". */
virtual void setVariableBitrateMode(int vbr){
enc->setVariableBitrateMode(vbr);
}
/**
* @brief Set the Output Buffer Size object
*
* @param outbuf_size
*/
virtual void setOutputBufferSize(int outbuf_size){
enc->setOutputBufferSize(outbuf_size);
}
/// Defines the Audio Info
void setAudioInfo(AudioInfo from) override {
TRACED();
AudioEncoder::setAudioInfo(from);
aac_fdk::AudioInfo info;
info.channels = from.channels;
info.sample_rate = from.sample_rate;
info.bits_per_sample = from.bits_per_sample;
enc->setAudioInfo(info);
}
/**
* @brief Opens the encoder
*
* @param info
* @return int
*/
virtual bool begin(AudioInfo info) override {
TRACED();
return enc->begin(info.channels,info.sample_rate, info.bits_per_sample);
}
/**
* @brief Opens the encoder
*
* @param input_channels
* @param input_sample_rate
* @param input_bits_per_sample
* @return int 0 => ok; error with negative number
*/
virtual bool begin(int input_channels=2, int input_sample_rate=44100, int input_bits_per_sample=16) {
TRACED();
return enc->begin(input_channels,input_sample_rate, input_bits_per_sample);
}
// starts the processing
bool begin() override {
enc->begin();
return true;
}
// convert PCM data to AAC
size_t write(const uint8_t *data, size_t len) override {
LOGD("write %d bytes", (int)len);
return enc->write((uint8_t*)data, len);
}
// release resources
void end() override {
TRACED();
enc->end();
}
UINT getParameter(const AACENC_PARAM param) {
return enc->getParameter(param);
}
int setParameter(AACENC_PARAM param, uint32_t value){
return enc->setParameter(param, value);
}
aac_fdk::AACEncoderFDK *driver() {
return enc;
}
const char *mime() override {
return "audio/aac";
}
operator bool() override {
return (bool) *enc;
}
protected:
aac_fdk::AACEncoderFDK *enc=nullptr;
};
}

View File

@@ -0,0 +1,182 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#ifndef HELIX_PRINT
#define HELIX_PRINT
#endif
#include "AACDecoderHelix.h"
namespace audio_tools {
/**
* @brief AAC Decoder using libhelix:
* https://github.com/pschatzmann/arduino-libhelix This is basically just a
* simple wrapper to provide AudioInfo and AudioInfoSupport
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AACDecoderHelix : public AudioDecoder {
public:
AACDecoderHelix() {
TRACED();
aac = new libhelix::AACDecoderHelix();
if (aac != nullptr) {
aac->setReference(this);
} else {
LOGE("Not enough memory for libhelix");
}
}
/**
* @brief Construct a new AACDecoderMini object
*
* @param out_stream
*/
AACDecoderHelix(Print &out_stream) {
TRACED();
aac = new libhelix::AACDecoderHelix(out_stream);
if (aac != nullptr) {
aac->setReference(this);
} else {
LOGE("Not enough memory for libhelix");
}
}
/**
* @brief Construct a new AACDecoderMini object. The decoded output will go to
* the print object.
*
* @param out_stream
* @param bi
*/
AACDecoderHelix(Print &out_stream, AudioInfoSupport &bi) {
TRACED();
aac = new libhelix::AACDecoderHelix(out_stream);
if (aac != nullptr) {
aac->setReference(this);
} else {
LOGE("Not enough memory for libhelix");
}
addNotifyAudioChange(bi);
}
/**
* @brief Destroy the AACDecoderMini object
*
*/
~AACDecoderHelix() {
TRACED();
if (aac != nullptr) delete aac;
}
// void setRaw(bool flag){
// if (aac!=nullptr) aac->setRaw(flag);
// }
/// Defines the output Stream
virtual void setOutput(Print &out_stream) override {
TRACED();
AudioDecoder::setOutput(out_stream);
if (aac != nullptr) aac->setOutput(out_stream);
}
/// Starts the processing
bool begin() override {
TRACED();
if (aac != nullptr) {
// aac->setDelay(CODEC_DELAY_MS);
aac->setInfoCallback(infoCallback, this);
aac->begin();
}
return true;
}
/// Releases the reserved memory
virtual void end() override {
TRACED();
if (aac != nullptr) aac->end();
}
virtual _AACFrameInfo audioInfoEx() { return aac->audioInfo(); }
AudioInfo audioInfo() override {
AudioInfo result;
auto i = audioInfoEx();
if (i.nChans != 0 && i.bitsPerSample != 0 && i.sampRateOut != 0) {
result.channels = i.nChans;
result.sample_rate = i.sampRateOut;
result.bits_per_sample = i.bitsPerSample;
}
return result;
}
void setAudioInfo(AudioInfo info) override {
this->info = info;
if (info_notifications_active) {
notifyAudioChange(info);
}
}
/// Write AAC data to decoder
size_t write(const uint8_t *data, size_t len) override {
LOGD("AACDecoderHelix::write: %d", (int)len);
if (aac == nullptr) return 0;
int open = len;
int processed = 0;
uint8_t *data8 = (uint8_t *)data;
while (open > 0) {
int act_write =
aac->write(data8 + processed, min(open, DEFAULT_BUFFER_SIZE));
open -= act_write;
processed += act_write;
}
return processed;
}
/// checks if the class is active
virtual operator bool() override { return aac != nullptr && (bool)*aac; }
void flush() {
// aac->flush();
}
/// notifies the subscriber about a change
static void infoCallback(_AACFrameInfo &i, void *ref) {
AACDecoderHelix *p_helix = (AACDecoderHelix *)ref;
if (p_helix != nullptr) {
TRACED();
AudioInfo baseInfo;
baseInfo.channels = i.nChans;
baseInfo.sample_rate = i.sampRateOut;
baseInfo.bits_per_sample = i.bitsPerSample;
// p_helix->audioChangeAACHelix->setAudioInfo(baseInfo);
LOGW("sample_rate: %d", i.sampRateOut);
p_helix->setAudioInfo(baseInfo);
}
}
/// Provides the maximum frame size - this is allocated on the heap and you
/// can reduce the heap size my minimizing this value
size_t maxFrameSize() { return aac->maxFrameSize(); }
/// Define your optimized maximum frame size
void setMaxFrameSize(size_t len) { aac->setMaxFrameSize(len); }
void setAudioInfoNotifications(bool active) {
info_notifications_active = active;
}
/// Provides the maximum pwm buffer size - this is allocated on the heap and
/// you can reduce the heap size my minimizing this value
size_t maxPCMSize() { return aac->maxPCMSize(); }
/// Define your optimized maximum pwm buffer size
void setMaxPCMSize(size_t len) { aac->setMaxPCMSize(len); }
protected:
libhelix::AACDecoderHelix *aac = nullptr;
bool info_notifications_active = true;
};
} // namespace audio_tools

View File

@@ -0,0 +1,334 @@
#pragma once
#include "ADPCM.h" // https://github.com/pschatzmann/adpcm
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/**
* @brief Decoder for ADPCM. Depends on https://github.com/pschatzmann/adpcm
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ADPCMDecoder : public AudioDecoderExt {
public:
ADPCMDecoder() = default;
ADPCMDecoder(AVCodecID id, int blockSize = ADAPCM_DEFAULT_BLOCK_SIZE) {
setBlockSize(blockSize);
setId(id);
}
/// Destructor
~ADPCMDecoder() {
if (p_decoder) delete p_decoder;
}
// (re) defines the codec id: set the block size first
void setId(AVCodecID id) {
codec_id = id;
if (p_decoder != nullptr) {
setImplementation();
}
}
// defines the block size (= size of encoded frame)
void setBlockSize(int blockSize) override {
block_size = blockSize;
if (p_decoder == nullptr) return;
p_decoder->setBlockSize(blockSize);
}
/// Provides the block size (= size of encoded frame) (only available after
/// calling begin)
int blockSize() {
if (p_decoder == nullptr) return block_size;
return p_decoder->blockSize();
}
/// Provides the frame size (size of decoded frame) (only available after
/// calling begin)
int frameSize() {
if (p_decoder == nullptr) return 0;
return p_decoder->frameSize() * 2;
}
bool begin() override {
TRACEI();
if (p_decoder == nullptr) {
setImplementation();
}
if (is_started) return true;
current_byte = 0;
LOGI("sample_rate: %d, channels: %d", info.sample_rate, info.channels);
p_decoder->begin(info.sample_rate, info.channels);
LOGI("frameSize: %d", (int)frameSize());
LOGI("blockSize: %d", (int)blockSize());
block_size = p_decoder->blockSize();
assert(block_size > 0);
assert(p_decoder->frameSize() > 0);
adpcm_block.resize(block_size);
notifyAudioChange(info);
is_started = true;
return true;
}
void end() override {
TRACEI();
if (p_decoder != nullptr) p_decoder->end();
adpcm_block.resize(0);
is_started = false;
}
virtual void setOutput(Print &out_stream) override { p_print = &out_stream; }
virtual size_t write(const uint8_t *data, size_t len) override {
TRACED();
uint8_t *input_buffer8 = (uint8_t *)data;
LOGD("write: %d", (int)len);
for (int j = 0; j < len; j++) {
decode(input_buffer8[j]);
}
return len;
}
void flush() {
if (p_decoder != nullptr) p_decoder->flush();
}
operator bool() override { return is_started; }
protected:
adpcm_ffmpeg::ADPCMDecoder *p_decoder = nullptr;
Vector<uint8_t> adpcm_block;
Print *p_print = nullptr;
int current_byte = 0;
int block_size = ADAPCM_DEFAULT_BLOCK_SIZE;
AVCodecID codec_id = AV_CODEC_ID_ADPCM_MS;
bool is_started = false;
virtual bool decode(uint8_t byte) {
if (p_decoder == nullptr) return false;
adpcm_block[current_byte++] = byte;
if (current_byte >= block_size) {
TRACED();
adpcm_ffmpeg::AVFrame &frame =
p_decoder->decode(&adpcm_block[0], block_size);
// print the result
int16_t *data = (int16_t *)frame.data[0];
size_t byte_count = frame.nb_samples * sizeof(int16_t) * info.channels;
size_t written = p_print->write((uint8_t *)data, byte_count);
if (written != byte_count) {
LOGE("decode %d -> %d -> %d", block_size, (int)byte_count,
(int)written);
} else {
LOGD("decode %d -> %d -> %d", block_size, (int)byte_count,
(int)written);
}
// restart from array begin
current_byte = 0;
}
return true;
}
/// change the decoder implementation
void setImplementation() {
// delete the old decoder
if (p_decoder != nullptr) {
p_decoder->end();
delete p_decoder;
p_decoder = nullptr;
}
if (codec_id == AV_CODEC_ID_ADPCM_IMA_AMV) {
info.sample_rate = 22050;
info.channels = 1;
info.bits_per_sample = 16;
}
p_decoder = adpcm_ffmpeg::ADPCMDecoderFactory::create(codec_id);
if (p_decoder != nullptr) {
p_decoder->setCodecID(codec_id);
p_decoder->setBlockSize(block_size);
} else {
LOGE("Decoder not implemented");
}
}
};
/**
* @brief Encoder for ADPCM - Depends on https://github.com/pschatzmann/adpcm
* @ingroup codecs
* @ingroup p_encoder->
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ADPCMEncoder : public AudioEncoderExt {
public:
ADPCMEncoder() = default;
ADPCMEncoder(AVCodecID id, int blockSize = ADAPCM_DEFAULT_BLOCK_SIZE) {
setId(id);
setBlockSize(blockSize);
}
/// Destructor
~ADPCMEncoder() {
if (p_encoder != nullptr) delete p_encoder;
}
/// (re) defines the codec id
void setId(AVCodecID id) {
codec_id = id;
if (p_encoder != nullptr) {
setImplementation();
}
}
/// (re) defines the block size
void setBlockSize(int blockSize) {
block_size = blockSize;
if (p_encoder == nullptr) return;
p_encoder->setBlockSize(blockSize);
}
/// Provides the block size (size of encoded frame) (only available after
/// calling begin)
int blockSize() override {
if (p_encoder == nullptr) return 0;
return p_encoder->blockSize();
}
/// Provides the frame size (size of decoded frame) (only available after
/// calling begin)
int frameSize() {
if (p_encoder == nullptr) return 0;
return p_encoder->frameSize() * 2;
}
bool begin() override {
TRACEI();
if (p_encoder == nullptr) {
setImplementation();
};
if (is_started) return true;
LOGI("sample_rate: %d, channels: %d", info.sample_rate, info.channels);
p_encoder->begin(info.sample_rate, info.channels);
LOGI("frameSize: %d", (int)frameSize());
LOGI("blockSize: %d", (int)blockSize());
assert(info.sample_rate != 0);
assert(p_encoder->frameSize() != 0);
total_samples = p_encoder->frameSize() * info.channels;
pcm_block.resize(total_samples);
current_sample = 0;
is_started = true;
return true;
}
void end() override {
TRACEI();
pcm_block.resize(0);
if (p_encoder == nullptr) return;
p_encoder->end();
is_started = false;
}
const char *mime() override { return "audio/adpcm"; }
void setOutput(Print &out_stream) override { p_print = &out_stream; }
operator bool() override { return is_started; }
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", (int)len);
int16_t *data16 = (int16_t *)data;
for (int j = 0; j < len / 2; j++) {
encode(data16[j]);
}
return len;
}
/// provides the frame duration in us (for rtsp)
virtual uint32_t frameDurationUs() override {
if (p_encoder == nullptr || info.sample_rate == 0) {
return 20000; // Default 20ms if not initialized
}
// Get the number of samples per frame from the encoder
int samplesPerFrame = p_encoder->frameSize();
if (samplesPerFrame <= 0) {
return 20000; // Default 20ms if invalid frame size
}
// Calculate frame duration: (samples_per_frame / sample_rate) * 1000000 us
uint32_t durationUs = (samplesPerFrame * 1000000) / info.sample_rate;
return durationUs;
}
protected:
AVCodecID codec_id = AV_CODEC_ID_ADPCM_MS;
adpcm_ffmpeg::ADPCMEncoder *p_encoder = nullptr;
Vector<int16_t> pcm_block;
Print *p_print = nullptr;
bool is_started = false;
int current_sample = 0;
int total_samples = 0;
int current_id = -1;
int block_size = ADAPCM_DEFAULT_BLOCK_SIZE;
virtual bool encode(int16_t sample) {
if (p_encoder == nullptr) return false;
pcm_block[current_sample++] = sample;
if (current_sample >= total_samples) {
TRACED();
adpcm_ffmpeg::AVPacket &packet =
p_encoder->encode(&pcm_block[0], total_samples);
if (packet.size > 0) {
size_t written = p_print->write(packet.data, packet.size);
if (written != packet.size) {
LOGE("encode %d->%d->%d", 2 * total_samples, (int)packet.size,
(int)written);
} else {
LOGD("encode %d->%d->%d", 2 * total_samples, (int)packet.size,
(int)written);
}
}
// restart from array begin
current_sample = 0;
}
return true;
}
/// change the encoder implementation
bool setImplementation() {
bool rc = true;
// delete the old encoder
if (p_encoder != nullptr) {
p_encoder->end();
delete p_encoder;
p_encoder = nullptr;
}
if (codec_id == AV_CODEC_ID_ADPCM_IMA_AMV) {
info.sample_rate = 22050;
info.channels = 1;
info.bits_per_sample = 16;
}
p_encoder = adpcm_ffmpeg::ADPCMEncoderFactory::create(codec_id);
if (p_encoder != nullptr) {
p_encoder->setCodecID(codec_id);
p_encoder->setBlockSize(block_size);
} else {
LOGE("Encoder not implemented");
rc = false;
}
return rc;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,266 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "adpcm-lib.h" // https://github.com/pschatzmann/arduino-adpcm-xq
#define DEFAULT_NOISE_SHAPING NOISE_SHAPING_OFF
#define DEFAULT_LOOKAHEAD 0
#define DEFAULT_BLOCKSIZE_POW2 0
namespace audio_tools {
enum class ADPCMNoiseShaping {
AD_NOISE_SHAPING_OFF = 0, // flat noise (no shaping)
AD_NOISE_SHAPING_STATIC = 1, // first-order highpass shaping
AD_NOISE_SHAPING_DYNAMIC = 2
};
/**
* @brief Decoder for ADPCM-XQ. Depends on
* https://github.com/pschatzmann/arduino-adpcm-xq
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ADPCMDecoderXQ : public AudioDecoder {
public:
ADPCMDecoderXQ() {
info.sample_rate = 44100;
info.channels = 2;
info.bits_per_sample = 16;
}
/// set bocksizes as 2^pow: range from 8 to 15
void setBlockSizePower(int pow) {
if (pow >= 8 && pow >= 15) {
block_size_pow2 = pow;
}
}
/// Set look ahead bytes from 0 to 8
void setLookahead(int value) {
if (value <= 8) {
lookahead = value;
}
}
/// Defines the noise shaping
void setNoiseShaping(ADPCMNoiseShaping ns) { noise_shaping = (int)ns; }
bool begin() override {
TRACEI();
current_byte = 0;
if (adpcm_cnxt == nullptr) {
adpcm_cnxt = adpcm_create_context(info.channels, lookahead, noise_shaping,
initial_deltas);
if (block_size_pow2)
block_size = 1 << block_size_pow2;
else
block_size = 256 * info.channels *
(info.sample_rate < 11000 ? 1 : info.sample_rate / 11000);
samples_per_block =
(block_size - info.channels * 4) * (info.channels ^ 3) + 1;
pcm_block.resize(samples_per_block * info.channels);
adpcm_block.resize(block_size);
}
notifyAudioChange(info);
return true;
}
void end() override {
TRACEI();
if (adpcm_cnxt != nullptr) {
adpcm_free_context(adpcm_cnxt);
adpcm_cnxt = nullptr;
}
pcm_block.resize(0);
adpcm_block.resize(0);
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() override { return adpcm_cnxt != nullptr; }
virtual size_t write(const uint8_t *data, size_t len) {
uint8_t *input_buffer8 = (uint8_t *)data;
LOGD("write: %d", (int)len);
for (int j = 0; j < len; j++) {
adpcm_block[current_byte++] = input_buffer8[j];
if (current_byte == block_size) {
decode(current_byte);
current_byte = 0;
}
}
return len;
}
protected:
int current_byte = 0;
void *adpcm_cnxt = nullptr;
Vector<int16_t> pcm_block;
Vector<uint8_t> adpcm_block;
int32_t initial_deltas[2] = {0};
Print *p_print = nullptr;
int samples_per_block = 0, lookahead = DEFAULT_LOOKAHEAD,
noise_shaping = (int)DEFAULT_NOISE_SHAPING,
block_size_pow2 = DEFAULT_BLOCKSIZE_POW2, block_size = 0;
bool decode(int this_block_adpcm_samples) {
int result = adpcm_decode_block(pcm_block.data(), adpcm_block.data(),
block_size, info.channels);
if (result != samples_per_block) {
LOGE("adpcm_decode_block: %d instead %d", result,
this_block_adpcm_samples);
return false;
}
int write_size = samples_per_block * info.channels * 2;
p_print->write((uint8_t *)pcm_block.data(), write_size);
return true;
}
};
/**
* @brief Encoder for ADPCM-XQ - Depends on
* https://github.com/pschatzmann/arduino-adpcm-xq
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ADPCMEncoderXQ : public AudioEncoder {
public:
ADPCMEncoderXQ() {
info.sample_rate = 44100;
info.channels = 2;
info.bits_per_sample = 16;
}
/// set bocksizes as 2^pow: range from 8 to 15
void setBlockSizePower(int pow) {
if (pow >= 8 && pow >= 15) {
block_size_pow2 = pow;
}
}
/// Set look ahead bytes from 0 to 8
void setLookahead(int value) {
if (value <= 8) {
lookahead = value;
}
}
/// Defines the noise shaping
void setNoiseShaping(ADPCMNoiseShaping ns) { noise_shaping = (int)ns; }
bool begin() override {
TRACEI();
if (block_size_pow2)
block_size = 1 << block_size_pow2;
else
block_size = 256 * info.channels *
(info.sample_rate < 11000 ? 1 : info.sample_rate / 11000);
samples_per_block =
(block_size - info.channels * 4) * (info.channels ^ 3) + 1;
pcm_block.resize(samples_per_block * info.channels);
adpcm_block.resize(block_size);
current_sample = 0;
return true;
}
void end() override {
TRACEI();
if (adpcm_cnxt != nullptr) {
adpcm_free_context(adpcm_cnxt);
adpcm_cnxt = nullptr;
}
pcm_block.resize(0);
adpcm_block.resize(0);
}
const char *mime() override { return "audio/adpcm"; }
void setOutput(Print &out_stream) override { p_print = &out_stream; }
operator bool() override { return adpcm_cnxt != nullptr; }
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", (int)len);
int16_t *input_buffer = (int16_t *)data;
pcm_block_size = samples_per_block * info.channels;
for (int j = 0; j < len / 2; j++) {
pcm_block[current_sample++] = input_buffer[j];
if (current_sample == samples_per_block * info.channels) {
encode();
current_sample = 0;
}
}
return len;
}
protected:
int current_sample = 0;
void *adpcm_cnxt = nullptr;
Vector<int16_t> pcm_block;
Vector<uint8_t> adpcm_block;
Print *p_print = nullptr;
int samples_per_block = 0, lookahead = DEFAULT_LOOKAHEAD,
noise_shaping = (int)DEFAULT_NOISE_SHAPING,
block_size_pow2 = DEFAULT_BLOCKSIZE_POW2, block_size = 0, pcm_block_size;
bool is_first = true;
bool encode() {
// if this is the first block, compute a decaying average (in reverse) so
// that we can let the encoder know what kind of initial deltas to expect
// (helps initializing index)
if (adpcm_cnxt == nullptr) {
is_first = false;
int32_t average_deltas[2];
average_deltas[0] = average_deltas[1] = 0;
for (int i = samples_per_block * info.channels; i -= info.channels;) {
average_deltas[0] -= average_deltas[0] >> 3;
average_deltas[0] +=
abs((int32_t)pcm_block[i] - pcm_block[i - info.channels]);
if (info.channels == 2) {
average_deltas[1] -= average_deltas[1] >> 3;
average_deltas[1] +=
abs((int32_t)pcm_block[i - 1] - pcm_block[i + 1]);
}
}
average_deltas[0] >>= 3;
average_deltas[1] >>= 3;
adpcm_cnxt = adpcm_create_context(info.channels, lookahead, noise_shaping,
average_deltas);
}
size_t num_bytes;
adpcm_encode_block(adpcm_cnxt, adpcm_block.data(), &num_bytes,
pcm_block.data(), samples_per_block);
if (num_bytes != block_size) {
LOGE(
"adpcm_encode_block() did not return expected value "
"(expected %d, got %d)!\n",
block_size, (int)num_bytes);
return false;
}
p_print->write(adpcm_block.data(), block_size);
return true;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,342 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
#ifndef SYNCWORDH
#define SYNCWORDH 0xff
#define SYNCWORDL 0xf0
#endif
#define ERROR_FMT_CHANGE "- Invalid ADTS change: %s"
#define ERROR_FMT "- Invalid ADTS: %s (0x%x)"
/**
* @brief Structure to hold ADTS header field values
*/
class ADTSParser {
public:
struct ADTSHeader {
uint16_t syncword = 0;
uint8_t id = 0;
uint8_t layer = 0;
uint8_t protection_absent = 0;
uint8_t profile = 0;
uint8_t sampling_freq_idx = 0;
uint8_t private_bit = 0;
uint8_t channel_cfg = 0;
uint8_t original_copy = 0;
uint8_t home = 0;
uint8_t copyright_id_bit = 0;
uint8_t copyright_id_start = 0;
uint16_t frame_length = 0;
uint8_t adts_buf_fullness = 0;
uint8_t num_rawdata_blocks = 0;
};
bool begin() {
is_first = true;
is_valid = true;
return true;
}
bool parse(uint8_t *hdr) {
header.syncword = (hdr[0] << 4) | (hdr[1] >> 4);
// parse fixed header
header.id = (hdr[1] >> 3) & 0b1;
header.layer = (hdr[1] >> 1) & 0b11;
header.protection_absent = (hdr[1]) & 0b1;
header.profile = (hdr[2] >> 6) & 0b11;
header.sampling_freq_idx = (hdr[2] >> 2) & 0b1111;
header.private_bit = (hdr[2] >> 1) & 0b1;
header.channel_cfg = ((hdr[2] & 0x01) << 2) | ((hdr[3] & 0xC0) >> 6);
header.original_copy = (hdr[3] >> 5) & 0b1;
header.home = (hdr[3] >> 4) & 0b1;
// parse variable header
header.copyright_id_bit = (hdr[3] >> 3) & 0b1;
header.copyright_id_start = (hdr[3] >> 2) & 0b1;
header.frame_length = ((((unsigned int)hdr[3] & 0x3)) << 11) |
(((unsigned int)hdr[4]) << 3) | (hdr[5] >> 5);
header.adts_buf_fullness = ((hdr[5] & 0b11111) << 6) | (hdr[6] >> 2);
header.num_rawdata_blocks = (hdr[6]) & 0b11;
LOGD("id:%d layer:%d profile:%d freq:%d channel:%d frame_length:%d",
header.id, header.layer, header.profile, getSampleRate(),
header.channel_cfg, header.frame_length);
// check
is_valid = check();
return is_valid;
}
uint32_t getFrameLength() { return header.frame_length; };
void log() {
LOGI("%s id:%d layer:%d profile:%d freq:%d channel:%d frame_length:%d",
is_valid ? "+" : "-", header.id, header.layer, header.profile,
getSampleRate(), header.channel_cfg, header.frame_length);
}
int getSampleRate() {
return header.sampling_freq_idx > 12
? header.sampling_freq_idx
: (int)adtsSamplingRates[header.sampling_freq_idx];
}
bool isSyncWord(const uint8_t *buf) {
return ((buf[0] & SYNCWORDH) == SYNCWORDH &&
(buf[1] & SYNCWORDL) == SYNCWORDL);
}
int findSyncWord(const uint8_t *buf, int nBytes, int start = 0) {
/* find byte-aligned syncword (12 bits = 0xFFF) */
for (int i = start; i < nBytes - 1; i++) {
if (isSyncWord(buf + i)) return i;
}
return -1;
}
ADTSHeader &data() { return header; }
protected:
const int adtsSamplingRates[13] = {96000, 88200, 64000, 48000, 44100,
32000, 24000, 22050, 16000, 12000,
11025, 8000, 7350};
ADTSHeader header;
ADTSHeader header_ref;
bool is_first = true;
bool is_valid = false;
bool check() {
if (header.syncword != 0b111111111111) {
LOGW(ERROR_FMT, "sync", (int)header.syncword);
is_valid = false;
}
if (header.id > 6) {
LOGW(ERROR_FMT, "id", (int)header.id);
is_valid = false;
}
if (header.sampling_freq_idx > 0xb) {
LOGW(ERROR_FMT, "freq", (int)header.sampling_freq_idx);
is_valid = false;
}
// valid value 0-7
// if (header.channel_cfg == 0 || header.channel_cfg > 7) {
if (header.channel_cfg > 7) {
LOGW(ERROR_FMT, "channels", (int)header.channel_cfg);
is_valid = false;
}
if (header.frame_length > 8191) { // tymically <= 768
LOGW(ERROR_FMT, "frame_length", (int)header.frame_length);
is_valid = false;
}
// on subsequent checks we need to compare with the first header
if (!is_first) {
is_valid = checkRef();
}
if (is_valid) {
is_first = false;
header_ref = header;
}
return is_valid;
}
bool checkRef() {
char msg[200] = "";
bool is_valid = true;
if (header.id != header_ref.id) {
strcat(msg, "id ");
is_valid = false;
}
if (header.layer != header_ref.layer) {
strcat(msg, "layer ");
is_valid = false;
}
if (header.profile != header_ref.profile) {
strcat(msg, "profile ");
is_valid = false;
}
if (header.sampling_freq_idx != header_ref.sampling_freq_idx) {
strcat(msg, "freq ");
is_valid = false;
}
if (header.channel_cfg != header_ref.channel_cfg) {
strcat(msg, "channel ");
is_valid = false;
}
if (header.adts_buf_fullness != header_ref.adts_buf_fullness) {
strcat(msg, "fullness");
is_valid = false;
}
if (!is_valid) {
LOGW(ERROR_FMT_CHANGE, msg);
}
return is_valid;
}
};
/**
* @brief Audio Data Transport Stream (ADTS) is a format similar to Audio Data
* Interchange Format (ADIF), used by MPEG TS or Shoutcast to stream audio
* defined in MPEG-2 Part 7, usually AAC. This parser extracts all valid ADTS
* frames from the data stream ignoring other data.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ADTSDecoder : public AudioDecoder {
public:
ADTSDecoder() = default;
ADTSDecoder(AudioDecoder &dec) { p_dec = &dec; };
bool begin() override {
parser.begin();
if (p_dec) p_dec->begin();
return true;
}
void end() override {
parseBuffer();
writeData(out_buffer.data(), out_buffer.available());
out_buffer.reset();
buffer.resize(0);
if (p_dec) p_dec->end();
}
/// Write AAC data to decoder
size_t write(const uint8_t *data, size_t len) override {
LOGI("AACDecoderADTS::write: %d", (int)len);
parseBuffer();
// write data to buffer
size_t result = buffer.writeArray(data, len);
// assert(result == len);
LOGD("buffer size: %d", buffer.available());
return result;
}
/// checks if the class is active
operator bool() override { return true; }
/// By default we write the parsed frames directly to the output:
/// alternatively you can activate a buffer here
void setOutputBufferSize(int size) { out_buffer.resize(size); }
/// Defines the parse buffer size: default is 1024
void setParseBufferSize(int size) { buffer.resize(size); }
/// Defines where the decoded result is written to
void setOutput(AudioStream &out_stream) override {
if (p_dec) {
p_dec->setOutput(out_stream);
} else {
AudioDecoder::setOutput(out_stream);
}
}
/// Defines where the decoded result is written to
void setOutput(AudioOutput &out_stream) override {
if (p_dec) {
p_dec->setOutput(out_stream);
} else {
AudioDecoder::setOutput(out_stream);
}
}
/// Defines where the decoded result is written to
void setOutput(Print &out_stream) override {
if (p_dec) {
p_dec->setOutput(out_stream);
} else {
AudioDecoder::setOutput(out_stream);
}
}
protected:
SingleBuffer<uint8_t> buffer{DEFAULT_BUFFER_SIZE};
SingleBuffer<uint8_t> out_buffer;
ADTSParser parser;
AudioDecoder *p_dec = nullptr;
void parseBuffer() {
TRACED();
// Need at least 7 bytes for a valid ADTS header
while (true) {
if (buffer.available() <= 5) return;
// Needs to contain sync word
int syncPos = parser.findSyncWord(buffer.data(), buffer.available());
if (syncPos < 0) {
return;
}
// buffer needs to start with sync word
if (syncPos > 0) {
buffer.clearArray(syncPos);
LOGI("Cleared %d bytes", syncPos);
}
// assert(parser.findSyncWord(buffer.data(), buffer.available()) == 0);
// Try to parse the header
if (parser.parse(buffer.data())) {
// Get the frame length which includes the header
uint16_t frameLength = parser.getFrameLength();
if (frameLength > buffer.available()) {
// not enough data
return;
}
// write data to decoder
if (out_buffer.size() > 0) {
writeDataBuffered(buffer.data(), frameLength);
} else {
writeData(buffer.data(), frameLength);
}
buffer.clearArray(frameLength);
} else {
LOGI("Invalid ADTS header");
// ignore data and move to next synch word
int pos = parser.findSyncWord(buffer.data(), buffer.available(), 5);
if (pos < 0) {
// no more sync word found
buffer.reset();
} else {
buffer.clearArray(pos);
}
}
}
}
size_t writeDataBuffered(uint8_t *data, size_t size) {
LOGI("writeDataBuffered: %d", (int)size);
for (int j = 0; j < size; j++) {
out_buffer.write(data[j]);
if (out_buffer.isFull()) {
writeData(out_buffer.data(), out_buffer.available());
out_buffer.reset();
}
}
return size;
}
size_t writeData(uint8_t *data, size_t size) {
LOGI("writeData: %d", (int)size);
if (p_print) {
size_t len = audio_tools::writeData<uint8_t>(p_print, data, size);
assert(len == size);
return (len == size);
}
if (p_dec) {
LOGI("write to decoder: %d", (int)size);
size_t len = audio_tools::writeDataT<uint8_t, AudioDecoder>(p_dec, data, size);
assert(len == size);
return (len == size);
}
return 0;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,383 @@
#pragma once
#include "ALAC.h" // https://github.com/pschatzmann/codec-alac
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/// Magic Cookie
class ALACBinaryConfig {
public:
void setChannels(int inNumChannels) {
int size = (inNumChannels > 2)
? sizeof(ALACSpecificConfig) + kChannelAtomSize +
sizeof(ALACAudioChannelLayout)
: sizeof(ALACSpecificConfig);
vector.resize(size);
}
uint32_t size() { return vector.size(); }
uint8_t* data() { return vector.data(); }
protected:
Vector<uint8_t> vector;
};
/**
* @brief ALAC (Apple Lossless Audio Codec) decoder. This class depends on
* https://github.com/pschatzmann/codec-alac. This implementaion is based on
* https://github.com/macosforge/alac
* @note Please note that this codec usually needs a container (usually MP4):
* The write() method expects a complete frame to be written!
* The decoder also expects to get the config from the encoder, however we have
* some fallback functionality that uses the AudioInfo and the frame size
* defined in the constructor.
* @ingroup codecs
* @author Phil Schatzmann
*/
class DecoderALAC : public AudioDecoder {
public:
/// Default constructor: you can define your own optimized frame size
DecoderALAC(int frameSize = kALACDefaultFrameSize) {
// this is used when setCodecConfig() is not called with encoder info
setFrameSize(frameSize);
//setDefaultConfig();
}
// define ALACSpecificConfig
bool setCodecConfig(ALACSpecificConfig config) {
return setCodecConfig((uint8_t*)&config, sizeof(config));
}
/// write Magic Cookie (ALACSpecificConfig)
bool setCodecConfig(ALACBinaryConfig cfg) {
size_t result = setCodecConfig(cfg.data(), cfg.size());
is_init = true;
return result;
}
/// write Magic Cookie (ALACSpecificConfig)
bool setCodecConfig(const uint8_t* data, size_t len) override {
LOGI("DecoderALAC::setCodecConfig: %d", (int)len);
// Call Init() to set up the decoder
int32_t rc = dec.Init((void*)data, len);
if (rc != 0) {
LOGE("Init failed");
return false;
}
LOGI("ALAC Decoder Setup - SR: %d, Channels: %d, Bits: %d, Frame Size: %d",
(int)dec.mConfig.sampleRate, (int)dec.mConfig.numChannels,
(int)dec.mConfig.bitDepth, (int)dec.mConfig.frameLength);
AudioInfo tmp;
tmp.bits_per_sample = dec.mConfig.bitDepth;
tmp.channels = dec.mConfig.numChannels;
tmp.sample_rate = dec.mConfig.sampleRate;
setAudioInfo(tmp);
is_init = true;
return true;
}
/// Update the global decoder info
void setAudioInfo(AudioInfo from) override {
AudioDecoder::setAudioInfo(from);
dec.mConfig.sampleRate = from.sample_rate;
dec.mConfig.numChannels = from.channels;
dec.mConfig.bitDepth = from.bits_per_sample;
}
/// we expect the write is called for a complete frame!
size_t write(const uint8_t* encodedFrame, size_t encodedLen) override {
LOGD("DecoderALAC::write: %d", (int)encodedLen);
// Make sure we have a config: we can't do this in begin because the setConfig()
// might be called after begin()
if (!is_init) setDefaultConfig();
// Make sure we have the output buffer set up
if (result_buffer.size() != outputBufferSize()) {
result_buffer.resize(outputBufferSize());
}
// Init bit buffer
BitBufferInit(&bits, (uint8_t*)encodedFrame, encodedLen);
// Decode
uint32_t outNumSamples = 0;
int32_t status =
dec.Decode(&bits, result_buffer.data(), dec.mConfig.frameLength,
dec.mConfig.numChannels, &outNumSamples);
if (status != 0) {
LOGE("Decode failed with error: %d", status);
return 0;
}
// Process result
size_t outputSize =
outNumSamples * dec.mConfig.numChannels * dec.mConfig.bitDepth / 8;
LOGI("DecoderALAC::write-pcm: %d", (int)outputSize);
// Output the result in chunks of 1k
int open = outputSize;
int processed = 0;
while (open > 0) {
int writeSize = MIN(1024, open);
size_t written =
p_print->write(result_buffer.data() + processed, writeSize);
if (writeSize != written) {
LOGE("write error: %d -> %d", (int)outputSize, (int)written);
}
open -= written;
processed += written;
}
return encodedLen;
}
operator bool() { return true; }
/// Set the default frame size: this will be overwritten if you call
/// setCodecConfig()
void setFrameSize(int frames) { dec.mConfig.frameLength = frames; }
/// Provides the actual frame size
int frameSize() { return dec.mConfig.frameLength; }
protected:
ALACDecoder dec;
Vector<uint8_t> result_buffer;
bool is_init = false;
struct BitBuffer bits;
void setDefaultConfig() {
// LOGW("Setting up default ALAC config")
AudioInfo info = audioInfo();
ALACSpecificConfig tmp;
// Essential parameters for ALAC compression
tmp.frameLength = frameSize();
tmp.compatibleVersion = 0;
tmp.bitDepth = info.bits_per_sample;
tmp.pb = 40; // Rice parameter limit
tmp.mb = 10; // Maximum prefix length for Rice coding
tmp.kb = 14; // History multiplier
tmp.numChannels = info.channels;
tmp.maxRun = 255; // Maximum run length supported
tmp.avgBitRate = 0;
tmp.sampleRate = info.sample_rate;
// Calculate max frame bytes - must account for:
// 1. Uncompressed frame size
// 2. ALAC frame headers
// 3. Potential compression inefficiency
uint32_t bytesPerSample = info.bits_per_sample / 8;
uint32_t uncompressedFrameSize =
frameSize() * info.channels * bytesPerSample;
// Add safety margins:
// - ALAC header (~50 bytes)
// - Worst case compression overhead (50%)
// - Alignment padding (64 bytes)
tmp.maxFrameBytes =
uncompressedFrameSize + (uncompressedFrameSize / 2) + 64 + 50;
convertToNetworkFormat(tmp);
setCodecConfig(tmp);
}
/// Calculate the output buffer size based on the current configuration
int outputBufferSize() {
return dec.mConfig.frameLength * dec.mConfig.numChannels *
dec.mConfig.bitDepth / 8;
}
/// Convert to big endian so that we can use it in Init()
void convertToNetworkFormat(ALACSpecificConfig& config) {
config.frameLength = Swap32NtoB(config.frameLength);
config.maxRun = Swap16NtoB((uint16_t)config.maxRun);
config.maxFrameBytes = Swap32NtoB(config.maxFrameBytes);
config.avgBitRate = Swap32NtoB(config.avgBitRate);
config.sampleRate = Swap32NtoB(config.sampleRate);
}
};
/**
* @brief ALAC (Apple Lossless Audio Codec) encoder. This class is responsible
* for encoding audio data into ALAC format.
* The implementaion is based on https://github.com/macosforge/alac
* @ingroup codecs
* @author Phil Schatzmann
*/
class EncoderALAC : public AudioEncoder {
public:
/// Default constructor: you can define your own optimized frame size
EncoderALAC(int frameSize = kALACDefaultFrameSize) {
setFrameSize(frameSize);
}
void setOutput(Print& out_stream) override { p_print = &out_stream; };
bool begin() override {
if (p_print == nullptr) {
LOGE("No output stream set");
return false;
}
// define input format
input_format = getInputFormat();
out_format = getOutputFormat();
// Setup Encoder
enc.SetFrameSize(frame_size);
int rc = enc.InitializeEncoder(out_format);
// Calculate exact buffer sizes based on frame settings
uint32_t bytesPerSample = info.bits_per_sample / 8;
uint32_t inputBufferSize = frame_size * info.channels * bytesPerSample;
// Calculate output buffer size
uint32_t outputBufferSize = inputBufferSize * 2; // Ensure enough space
LOGI(
"ALAC Encoder: frame_size=%d, inputBuf=%d, outputBuf=%d, channels=%d, "
"bits=%d",
frame_size, inputBufferSize, outputBufferSize, info.channels,
info.bits_per_sample);
in_buffer.resize(inputBufferSize);
out_buffer.resize(outputBufferSize);
is_started = rc == 0;
return is_started;
}
void end() override {
enc.Finish();
is_started = false;
}
/// Encode the audio samples into ALAC format
size_t write(const uint8_t* data, size_t len) override {
if (!is_started) return 0;
LOGD("EncoderALAC::write: %d", (int)len);
for (int j = 0; j < len; j++) {
in_buffer.write(data[j]);
if (in_buffer.isFull()) {
// provide available encoded data length
int32_t ioNumBytes = in_buffer.size();
int rc = enc.Encode(input_format, out_format, (uint8_t*)in_buffer.data(),
out_buffer.data(), &ioNumBytes);
// Output encoded data
size_t written = p_print->write(out_buffer.data(), ioNumBytes);
if (ioNumBytes != written) {
LOGE("write error: %d -> %d", (int)ioNumBytes, (int)written);
}
in_buffer.reset();
}
}
return len;
}
/// Provide the configuration of the encoder
ALACSpecificConfig config() {
enc.GetConfig(cfg);
return cfg;
}
/// Provide the magic coookie for the decoder
ALACBinaryConfig& binaryConfig() {
bin.setChannels(info.channels);
uint32_t size = bin.size();
enc.GetMagicCookie(bin.data(), &size);
return bin;
}
/// Check if the encoder is ready to encode
operator bool() { return is_started && p_print != nullptr; }
/// Mime type: returns audio/alac
const char* mime() override { return "audio/alac"; }
/// Defines if the encoder should use fast mode
void setFastMode(bool fast) {
enc.SetFastMode(fast);
}
/// Defines the frame size for the decoder: default is 4096 frames
void setFrameSize(int frames) {
if (is_started) {
LOGE("Can't change frame size on started encoder")
return;
}
frame_size = frames;
}
/// Determins the actually defined number of frames
int frameSize() { return frame_size; }
protected:
int frame_size = kALACDefaultFrameSize;
ALACEncoder enc;
SingleBuffer<uint8_t> in_buffer;
Vector<uint8_t> out_buffer;
AudioFormatDescription input_format;
AudioFormatDescription out_format;
ALACSpecificConfig cfg;
ALACBinaryConfig bin;
Print* p_print = nullptr;
bool is_started = false;
AudioFormatDescription getInputFormat() {
AudioFormatDescription result;
memset(&result, 0, sizeof(AudioFormatDescription));
result.mSampleRate = info.sample_rate;
result.mFormatID = kALACFormatLinearPCM;
result.mFormatFlags =
kALACFormatFlagIsSignedInteger |
kALACFormatFlagIsPacked; // Native endian, signed integer
result.mBytesPerPacket = info.channels * (info.bits_per_sample / 8);
result.mFramesPerPacket = 1;
result.mBytesPerFrame = info.channels * (info.bits_per_sample / 8);
result.mChannelsPerFrame = info.channels;
result.mBitsPerChannel = info.bits_per_sample;
return result;
}
AudioFormatDescription getOutputFormat() {
AudioFormatDescription result;
memset(&result, 0, sizeof(AudioFormatDescription));
result.mSampleRate = info.sample_rate;
result.mFormatID = kALACCodecFormat;
result.mFormatFlags = getOutputFormatFlags(info.bits_per_sample); // or 0 ?
result.mBytesPerPacket = 0; // Variable for compressed format
result.mFramesPerPacket = frame_size; // Common ALAC frame size
result.mBytesPerFrame = 0; // Variable for compressed format
result.mChannelsPerFrame = info.channels;
result.mBitsPerChannel = info.bits_per_sample;
return result;
}
// Adapted from CoreAudioTypes.h
enum {
kFormatFlag_16BitSourceData = 1,
kFormatFlag_20BitSourceData = 2,
kFormatFlag_24BitSourceData = 3,
kFormatFlag_32BitSourceData = 4
};
uint32_t getOutputFormatFlags(uint32_t bits) {
switch (bits) {
case 16:
return kFormatFlag_16BitSourceData;
case 20:
return kFormatFlag_20BitSourceData;
case 24:
return kFormatFlag_24BitSourceData;
case 32:
return kFormatFlag_32BitSourceData;
break;
default:
LOGE("Unsupported bit depth: %d", bits);
return 0;
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,174 @@
#pragma once
#include "AMRNB.h" // https://github.com/pschatzmann/codec-amr
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/**
* @brief AMR Narrowband Decoder
* See https://github.com/pschatzmann/codec-amr
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AMRNBDecoder : public AudioDecoder {
public:
/// Default Constructor with valid mode values:
/// NB_475,NB_515,NB_59,NB_67,NB_74,NB_795,NB_102,NB_122 (e.g.
/// AMRNB::Mode::NB_475)
AMRNBDecoder(AMRNB::Mode mode) {
setMode(mode);
info.channels = 1;
info.sample_rate = 8000;
}
~AMRNBDecoder() override = default;
void setMode(AMRNB::Mode mode) {
this->mode = mode;
amr.setMode(mode);
}
bool begin() {
notifyAudioChange(audioInfo());
buffer.resize(amr.getEncodedFrameSizeBytes());
return getOutput() != nullptr;
}
void setAudioInfo(AudioInfo from) {
if (from.bits_per_sample != 16) {
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
}
if (from.sample_rate != 8000) {
LOGE("Invalid sample rate: %d", from.sample_rate);
}
if (from.channels != 1) {
LOGE("Invalid channels: %d", from.channels);
}
}
size_t write(const uint8_t *data, size_t len) override {
for (size_t j = 0; j < len; j++) {
buffer.write(data[j]);
if (buffer.isFull()) {
int result_samples = amr.getFrameSizeSamples();
int16_t result[result_samples];
int size =
amr.decode(buffer.data(), buffer.size(), result, result_samples);
if (size > 0) {
if (getOutput() != nullptr) {
getOutput()->write((uint8_t *)result, size * sizeof(int16_t));
}
}
buffer.clear();
}
}
return len;
}
/// Provides the block size (size of encoded frame)
int blockSize() {
amr.setMode(mode);
return amr.getEncodedFrameSizeBytes();
}
/// Provides the frame size (size of decoded frame)
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
operator bool() override { return getOutput() != nullptr; }
protected:
AMRNB amr;
AMRNB::Mode mode;
SingleBuffer<uint8_t> buffer{0};
};
/**
* @brief AMR NB Encoder
* See https://github.com/pschatzmann/codec-amr
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AMRNBEncoder : public AudioEncoder {
public:
/// Default Constructor with valid mode values:
/// NB_475,NB_515,NB_59,NB_67,NB_74,NB_795,NB_102,NB_122 (e.g.
/// AMRNB::Mode::NB_475) AMRNBDecoder(AMRNB::Mode mode) {
AMRNBEncoder(AMRNB::Mode mode) {
setMode(mode);
info.channels = 1;
info.sample_rate = 8000;
}
~AMRNBEncoder() override = default;
void setMode(AMRNB::Mode mode) {
this->mode = mode;
amr.setMode(mode);
}
bool begin() {
buffer.resize(frameSize());
return getOutput() != nullptr;
}
void setAudioInfo(AudioInfo from) {
if (from.bits_per_sample != 16) {
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
}
if (from.sample_rate != 8000) {
LOGE("Invalid sample rate: %d", from.sample_rate);
}
if (from.channels != 1) {
LOGE("Invalid channels: %d", from.channels);
}
}
size_t write(const uint8_t *data, size_t len) override {
for (size_t j = 0; j < len; j++) {
buffer.write(data[j]);
if (buffer.isFull()) {
int result_bytes = blockSize();
uint8_t result[result_bytes];
int size =
amr.encode((int16_t *)buffer.data(),
buffer.size() / sizeof(int16_t), result, result_bytes);
if (size > 0) {
if (getOutput() != nullptr) {
getOutput()->write(result, size);
}
}
buffer.clear();
}
}
return len;
}
/// Provides the block size (size of encoded frame)
int blockSize() {
amr.setMode(mode);
return amr.getEncodedFrameSizeBytes();
}
/// Provides the frame size (size of decoded frame)
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
const char *mime() { return "audio/amr"; }
void setOutput(Print &out_stream) override { p_print = &out_stream; }
Print *getOutput() { return p_print; }
protected:
AMRNB amr;
AMRNB::Mode mode;
SingleBuffer<uint8_t> buffer{0};
Print *p_print = nullptr;
};
} // namespace audio_tools

View File

@@ -0,0 +1,169 @@
#pragma once
#include "AMRWB.h" // https://github.com/pschatzmann/codec-amr
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/**
* @brief AMR Wideband Decoder
* See https://github.com/pschatzmann/codec-amr
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AMRWBDecoder : public AudioDecoder {
public:
/// Default constructor with valid mode values: WB_6_60,WB_8_85,WB_12_65,WB_14_25,
/// WB_15_85,WB_18_25,WB_19_85,WB_23_05,WB_23_85 (e.g. AMRWB::Mode::WB_6_60)
AMRWBDecoder(AMRWB::Mode mode) {
setMode(mode);
info.channels = 1;
info.sample_rate = 16000;
}
~AMRWBDecoder() override = default;
bool begin() {
notifyAudioChange(audioInfo());
buffer.resize(amr.getEncodedFrameSizeBytes());
return getOutput() != nullptr;
}
void setAudioInfo(AudioInfo from) {
if (from.bits_per_sample != 16) {
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
}
if (from.sample_rate != 8000) {
LOGE("Invalid sample rate: %d", from.sample_rate);
}
if (from.channels != 1) {
LOGE("Invalid channels: %d", from.channels);
}
}
size_t write(const uint8_t *data, size_t len) override {
for (size_t j = 0; j < len; j++) {
buffer.write(data[j]);
if (buffer.isFull()) {
int result_samples = amr.getFrameSizeSamples();
int16_t result[result_samples];
int size =
amr.decode(buffer.data(), buffer.size(), result, result_samples);
if (size > 0) {
if (getOutput() != nullptr) {
getOutput()->write((uint8_t *)result, size * sizeof(int16_t));
}
}
buffer.clear();
}
}
return len;
}
/// Provides the block size (size of encoded frame)
int blickSize() { return amr.getEncodedFrameSizeBytes(); }
/// Provides the frame size (size of decoded frame)
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
void setMode(AMRWB::Mode mode) {
this->mode = mode;
amr.setMode(mode);
}
operator bool() override { return getOutput() != nullptr; }
protected:
AMRWB amr;
AMRWB::Mode mode;
SingleBuffer<uint8_t> buffer{0};
};
/**
* @brief AMR Wideband Encoder
* See https://github.com/pschatzmann/codec-amr
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AMRWBEncoder : public AudioEncoder {
public:
/// Default constructor with valid mode values: WB_6_60,WB_8_85,WB_12_65,WB_14_25,
/// WB_15_85,WB_18_25,WB_19_85,WB_23_05,WB_23_85 (e.g. AMRWB::Mode::WB_6_60)
AMRWBEncoder(AMRWB::Mode mode) {
setMode(mode);
info.channels = 1;
info.sample_rate = 16000;
}
~AMRWBEncoder() override = default;
void setMode(AMRWB::Mode mode) {
this->mode = mode;
amr.setMode(mode);
}
bool begin() {
buffer.resize(frameSize());
return getOutput() != nullptr;
}
void setAudioInfo(AudioInfo from) {
if (from.bits_per_sample != 16) {
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
}
if (from.sample_rate != 8000) {
LOGE("Invalid sample rate: %d", from.sample_rate);
}
if (from.channels != 1) {
LOGE("Invalid channels: %d", from.channels);
}
}
size_t write(const uint8_t *data, size_t len) override {
for (size_t j = 0; j < len; j++) {
buffer.write(data[j]);
if (buffer.isFull()) {
int result_bytes = blockSize();
uint8_t result[result_bytes];
int size =
amr.encode((int16_t *)buffer.data(),
buffer.size() / sizeof(int16_t), result, result_bytes);
if (size > 0) {
if (getOutput() != nullptr) {
getOutput()->write(result, size);
}
}
buffer.clear();
}
}
return len;
}
/// Provides the block size (size of encoded frame)
int blockSize() {
amr.setMode(mode);
return amr.getEncodedFrameSizeBytes();
}
/// Provides the frame size (size of decoded frame)
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
const char *mime() { return "audio/amr"; }
void setOutput(Print &out_stream) override { p_print = &out_stream; }
Print *getOutput() { return p_print; }
protected:
AMRWB amr;
AMRWB::Mode mode;
SingleBuffer<uint8_t> buffer{0};
Print *p_print = nullptr;
};
} // namespace audio_tools

View File

@@ -0,0 +1,300 @@
/**
* @file CodecAptx.h
* @author Phil Schatzmann
* @brief Codec for aptx using https://github.com/pschatzmann/libopenaptx
* @version 0.1
* @date 2022-04-24
*
* @copyright Copyright (c) 2022
*
*/
#pragma once
#include "AudioToolsConfig.h"
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "openaptx.h"
namespace audio_tools {
/**
* @brief Decoder for OpenAptx. Depends on
* https://github.com/pschatzmann/libopenaptx
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class APTXDecoder : public AudioDecoder {
public:
APTXDecoder(bool isHd = false) {
is_hd = isHd;
info.sample_rate = 44100;
info.channels = 2;
info.bits_per_sample = isHd ? 24 : 16;
}
bool begin() override {
TRACEI();
ctx = aptx_init(is_hd);
is_first_write = true;
notifyAudioChange(info);
return ctx != nullptr;
}
void end() override {
TRACEI();
bool dropped = aptx_decode_sync_finish(ctx);
aptx_finish(ctx);
ctx = nullptr;
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return ctx != nullptr; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGI("write: %d", len);
bool is_ok = true;
size_t dropped;
int synced;
if (is_first_write) {
is_first_write = false;
if (!checkPrefix(data, len)) {
return 0;
}
}
output_buffer.resize(len * 10);
memset(output_buffer.data(), 0, output_buffer.size());
processed = aptx_decode_sync(ctx, (const uint8_t *)data, len,
output_buffer.data(), output_buffer.size(),
&written, &synced, &dropped);
checkSync(synced, dropped, is_ok);
// If we have not decoded all supplied samples then decoding unrecoverable
// failed
if (processed != len) {
LOGE("aptX decoding reqested: %d eff: %d", len, processed);
is_ok = false;
}
writeData(written, is_ok);
return is_ok ? len : 0;
}
protected:
struct aptx_context *ctx = nullptr;
Print *p_print = nullptr;
bool is_first_write = true;
Vector<uint8_t> output_buffer;
bool is_hd;
size_t processed;
size_t written;
bool syncing;
/// Converts the data to 16 bit and writes it to final output
void writeData(size_t written, bool &is_ok) {
if (written > 0) {
int samples = written / 3;
LOGI("written: %d", written);
LOGI("samples: %d", samples);
int24_t *p_int24 = (int24_t *)output_buffer.data();
int16_t *p_int16 = (int16_t *)output_buffer.data();
for (int j = 0; j < samples; j++) {
p_int16[j] = p_int24[j].getAndScale16();
}
if (p_print->write((uint8_t *)output_buffer.data(), samples * 2) !=
samples * 2) {
LOGE("aptX decoding failed to write decoded data");
is_ok = false;
}
}
}
/// Checks the syncronization
void checkSync(bool synced, bool dropped, bool &is_ok) {
/* Check all possible states of synced, syncing and dropped status */
if (!synced) {
if (!syncing) {
LOGE("aptX decoding failed, synchronizing");
syncing = true;
is_ok = false;
}
if (dropped) {
LOGE("aptX synchronization successful, dropped %lu byte%s",
(unsigned long)dropped, (dropped != 1) ? "s" : "");
syncing = false;
is_ok = true;
}
if (!syncing) {
LOGE("aptX decoding failed, synchronizing");
syncing = true;
is_ok = false;
}
} else {
if (dropped) {
if (!syncing) LOGE("aptX decoding failed, synchronizing");
LOGE("aptX synchronization successful, dropped %lu byte%s",
(unsigned long)dropped, (dropped != 1) ? "s" : "");
syncing = false;
is_ok = false;
} else if (syncing) {
LOGI("aptX synchronization successful");
syncing = false;
is_ok = true;
}
}
}
/// Checks the prefix of the received data
bool checkPrefix(const void *input_buffer, size_t length) {
bool result = true;
if (length >= 4 && memcmp(input_buffer, "\x4b\xbf\x4b\xbf", 4) == 0) {
if (is_hd) {
LOGE("aptX audio stream (not aptX HD)");
result = false;
}
} else if (length >= 6 &&
memcmp(input_buffer, "\x73\xbe\xff\x73\xbe\xff", 6) == 0) {
if (!is_hd) {
LOGE("aptX HD audio stream");
result = false;
}
} else {
if (length >= 4 && memcmp(input_buffer, "\x6b\xbf\x6b\xbf", 4) == 0) {
LOGE("standard aptX audio stream - not supported");
result = false;
} else {
LOGE("No aptX nor aptX HD audio stream");
result = false;
}
}
return result;
}
};
/**
* @brief Encoder for OpenAptx - Depends on
* https://github.com/pschatzmann/libopenaptx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class APTXEncoder : public AudioEncoder {
public:
APTXEncoder(bool isHd = false) {
is_hd = isHd;
info.sample_rate = 44100;
info.channels = 2;
info.bits_per_sample = isHd ? 24 : 16;
}
bool begin() {
TRACEI();
input_buffer.resize(4 * 2);
output_buffer.resize(100 * (is_hd ? 6 : 4));
LOGI("input_buffer.size: %d", input_buffer.size());
LOGI("output_buffer.size: %d", output_buffer.size());
LOGI("is_hd: %s", is_hd ? "true" : "false");
ctx = aptx_init(is_hd);
return ctx!=nullptr;
}
virtual void end() {
TRACEI();
if (ctx != nullptr) {
size_t output_written = 0;
aptx_encode_finish(ctx, output_buffer.data(), output_buffer.size(),
&output_written);
if (output_written > 0) {
// write result to final output
int written = p_print->write((const uint8_t *)output_buffer.data(),
output_written);
if (written != output_written) {
LOGE("write requested: %d eff: %d", output_written, written);
}
}
aptx_finish(ctx);
ctx = nullptr;
}
}
virtual const char *mime() { return "audio/aptx"; }
virtual void setAudioInfo(AudioInfo info) {
AudioEncoder::setAudioInfo(info);
switch (info.bits_per_sample) {
case 16:
is_hd = false;
break;
case 24:
is_hd = true;
break;
default:
LOGE("invalid bits_per_sample: %d", info.bits_per_sample);
}
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return ctx != nullptr; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGI("write: %d", len);
if (ctx == nullptr) return 0;
size_t output_written = 0;
// process all bytes
int16_t *in_ptr16 = (int16_t *)data;
int in_samples = len / 2;
for (int j = 0; j < in_samples; j++) {
input_buffer[input_pos++].setAndScale16(in_ptr16[j]);
// if input_buffer is full we encode
if (input_pos >= input_buffer.size()) {
size_t result = aptx_encode(
ctx, (const uint8_t *)input_buffer.data(), input_buffer.size() * 3,
output_buffer.data() + output_pos,
output_buffer.size() - output_pos, &output_written);
output_pos += output_written;
if (result != input_buffer.size() * 3) {
LOGW("encode requested: %d, eff: %d", input_buffer.size() * 3,
result);
}
// if output buffer is full we write the result
if (output_pos + output_pos >= output_buffer.size()) {
int written =
p_print->write((const uint8_t *)output_buffer.data(), output_pos);
if (written != output_pos) {
LOGE("write requested: %d eff: %d", output_pos, written);
}
// restart at beginning of output buffer
output_pos = 0;
}
// restart at beginning of input buffer
input_pos = 0;
}
}
return len;
}
protected:
bool is_hd;
Vector<int24_t> input_buffer{4 * 2};
Vector<uint8_t> output_buffer;
int input_pos = 0;
int output_pos = 0;
Print *p_print = nullptr;
struct aptx_context *ctx = nullptr;
};
} // namespace audio_tools

View File

@@ -0,0 +1,301 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
enum Base46Logic { NoCR, CRforFrame, CRforWrite };
static char encoding_table[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
static int mod_table[] = {0, 2, 1};
static const int B64index[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 62, 63, 62, 62, 63, 52, 53, 54, 55, 56, 57,
58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6,
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
/**
* @brief DecoderBase64 - Converts a Base64 encoded Stream into the original
* data stream. Decoding only gives a valid result if we start at a limit of 4
* bytes. We therefore use by default a newline to determine a valid start
* boundary.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderBase64 : public AudioDecoder {
public:
/**
* @brief Constructor for a new DecoderBase64 object
*/
DecoderBase64() { TRACED(); }
/**
* @brief Constructor for a new DecoderBase64 object
*
* @param out_buffeream Output Stream to which we write the decoded result
*/
DecoderBase64(Print &out) {
TRACED();
setOutput(out);
}
/// Defines the output Stream
void setOutput(Print &out) override { p_print = &out; }
/// We expect new lines to delimit the individual lines
void setNewLine(Base46Logic logic) { newline_logic = logic; }
bool begin() override {
TRACED();
is_valid = newline_logic == NoCR;
active = true;
return true;
}
void end() override {
TRACED();
// deconde ramaining bytes
int len = buffer.available();
uint8_t tmp[len];
buffer.readArray(tmp, len);
decodeLine(tmp, len);
active = false;
buffer.resize(0);
}
size_t write(const uint8_t *data, size_t len) override {
if (p_print == nullptr) return 0;
TRACED();
addToBuffer((uint8_t *)data, len);
int decode_size = 4; // maybe we should increase this ?
while (buffer.available() >= decode_size) {
uint8_t tmp[decode_size];
buffer.readArray(tmp, decode_size);
decodeLine(tmp, decode_size);
}
return len;
}
operator bool() override { return active; }
protected:
bool active = false;
bool is_valid = false;
Base46Logic newline_logic = CRforFrame;
Vector<uint8_t> result;
RingBuffer<uint8_t> buffer{1500};
AudioInfo info;
void decodeLine(uint8_t *data, size_t byteCount) {
LOGD("decode: %d", (int)byteCount);
int len = byteCount;
unsigned char *p = (unsigned char *)data;
int pad = len > 0 && (len % 4 || p[len - 1] == '=');
const size_t L = ((len + 3) / 4 - pad) * 4;
result.resize(L / 4 * 3 + pad);
memset(result.data(), 0, result.size());
for (size_t i = 0, j = 0; i < L; i += 4) {
int32_t n = static_cast<int32_t>(B64index[p[i]]) << 18 | B64index[p[i + 1]] << 12 |
B64index[p[i + 2]] << 6 | B64index[p[i + 3]];
result[j++] = n >> 16;
result[j++] = n >> 8 & 0xFF;
result[j++] = n & 0xFF;
}
if (pad) {
int32_t n = static_cast<int32_t>(B64index[p[L]]) << 18 | B64index[p[L + 1]] << 12;
result[result.size() - 1] = n >> 16;
if (len > L + 2 && p[L + 2] != '=') {
n |= B64index[p[L + 2]] << 6;
result.push_back(n >> 8 & 0xFF);
}
}
writeBlocking(p_print, result.data(), result.size());
}
void addToBuffer(uint8_t *data, size_t len) {
TRACED();
if (buffer.size() < len) {
buffer.resize(len);
}
// syncronize to find a valid start position
int start = 0;
if (!is_valid) {
for (int j = 0; j < len; j++) {
if (data[j] == '\n') {
start = j;
is_valid = true;
break;
}
}
}
if (is_valid) {
// remove white space
for (int j = start; j < len; j++) {
if (!isspace(data[j])) {
buffer.write(data[j]);
} else if (data[j] == '\n') {
int offset = buffer.available() % 4;
if (offset > 0) {
LOGW("Resync %d (-%d)...", buffer.available(), offset);
uint8_t tmp[4];
buffer.readArray(tmp, offset);
}
}
}
}
LOGD("buffer: %d, is_valid: %s", buffer.available(),
is_valid ? "true" : "false");
}
};
/**
* @brief EncoderBase64s - Encodes the input data into a Base64 string.
* By default each audio frame is followed by a new line, so that we can
* easily resynchronize the reading of a data stream. The generation
* of the new line can be configured with the setNewLine() method.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncoderBase64 : public AudioEncoder {
public:
// Empty Conbuffeructor - the output buffeream must be provided with begin()
EncoderBase64() {}
// Conbuffeructor providing the output buffeream
EncoderBase64(Print &out) { p_print = &out; }
/// Defines the output Stream
void setOutput(Print &out_buffeream) override {
p_print = &out_buffeream;
}
/// Provides "text/base64"
const char *mime() override { return "text/base64"; }
/// We add a new line after each write
void setNewLine(Base46Logic flag) { newline_logic = flag; }
/// starts the processing using the actual RAWAudioInfo
virtual bool begin() override {
is_open = true;
frame_size = info.bits_per_sample * info.channels / 8;
if (newline_logic != NoCR) {
if (frame_size==0){
LOGW("AudioInfo not defined");
// assume frame size
frame_size = 4;
}
p_print->write('\n');
flush();
}
return true;
}
/// stops the processing
void end() override { is_open = false; }
/// Writes PCM data to be encoded as RAW
virtual size_t write(const uint8_t *data, size_t len) override {
LOGD("EncoderBase64::write: %d", (int)len);
switch (newline_logic) {
case NoCR:
case CRforWrite:
encodeLine(data, len);
break;
case CRforFrame: {
int frames = len / frame_size;
int open = len;
int offset = 0;
while (open > 0) {
int write_size = min(frame_size, open);
encodeLine(data + offset, write_size);
open -= write_size;
offset += write_size;
}
break;
}
}
return len;
}
operator bool() override { return is_open; }
bool isOpen() { return is_open; }
protected:
Print *p_print = nullptr;
bool is_open;
Base46Logic newline_logic = CRforFrame;
Vector<uint8_t> ret;
AudioInfo info;
int frame_size;
void flush() {
#if defined(ESP32)
# if ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(3, 3, 5)
p_print->flush();
# endif
#else
p_print->flush();
#endif
}
void encodeLine(const uint8_t *data, size_t input_length) {
LOGD("EncoderBase64::encodeLine: %d", (int)input_length);
int output_length = 4 * ((input_length + 2) / 3);
if (ret.size() < output_length + 1) {
ret.resize(output_length + 1);
}
for (int i = 0, j = 0; i < input_length;) {
uint32_t octet_a = i < input_length ? (unsigned char)data[i++] : 0;
uint32_t octet_b = i < input_length ? (unsigned char)data[i++] : 0;
uint32_t octet_c = i < input_length ? (unsigned char)data[i++] : 0;
uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c;
ret[j++] = encoding_table[(triple >> 3 * 6) & 0x3F];
ret[j++] = encoding_table[(triple >> 2 * 6) & 0x3F];
ret[j++] = encoding_table[(triple >> 1 * 6) & 0x3F];
ret[j++] = encoding_table[(triple >> 0 * 6) & 0x3F];
}
for (int i = 0; i < mod_table[input_length % 3]; i++)
ret[output_length - 1 - i] = '=';
// add a new line to the end
if (newline_logic != NoCR) {
ret[output_length] = '\n';
output_length++;
}
writeBlocking(p_print, ret.data(), output_length);
flush();
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,133 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/CodecG7xx.h"
namespace audio_tools {
/**
* @brief DecoderBasic - supports mime type audio/basic
* Requires https://github.com/pschatzmann/arduino-libg7xx
* The content of the "audio/basic" subtype is single channel audio
* encoded using 8bit ISDN mu-law [PCM] at a sample rate of 8000 Hz.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderBasic : public AudioDecoder {
public:
/**
* @brief Construct a new DecoderBasic object
*/
DecoderBasic() { TRACED(); }
/**
* @brief Construct a new DecoderBasic object
*
* @param out_stream Output Stream to which we write the decoded result
*/
DecoderBasic(Print &out_stream, bool active = true) {
TRACED();
setOutput(out_stream);
}
/**
* @brief Construct a new DecoderBasic object
*
* @param out_stream Output Stream to which we write the decoded result
* @param bi Object that will be notified about the Audio Formt (Changes)
*/
DecoderBasic(Print &out_stream, AudioInfoSupport &bi) {
TRACED();
setOutput(out_stream);
addNotifyAudioChange(bi);
}
/// Defines the output Stream
void setOutput(Print &out_stream) override {
decoder.setOutput(out_stream);
}
void addNotifyAudioChange(AudioInfoSupport &bi) override {
decoder.addNotifyAudioChange(bi);
}
AudioInfo audioInfo() override { return decoder.audioInfo(); }
bool begin(AudioInfo info) {
decoder.setAudioInfo(info);
return decoder.begin();
}
bool begin() override {
TRACED();
return decoder.begin();
}
void end() override { decoder.end(); }
virtual size_t write(const uint8_t *data, size_t len) override {
return decoder.write((uint8_t *)data, len);
}
virtual operator bool() override { return decoder; }
protected:
G711_ULAWDecoder decoder;
};
/**
* @brief EncoderBasic - supports mime type audio/basic.
* The content of the "audio/basic" subtype is single channel audio
* encoded using 8bit ISDN mu-law [PCM] at a sample rate of 8000 Hz.
* Requires https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncoderBasic : public AudioEncoder {
public:
// Empty Constructor - the output stream must be provided with begin()
EncoderBasic() {}
// Constructor providing the output stream
EncoderBasic(Print &out) { setOutput(out); }
/// Defines the output Stream
void setOutput(Print &out) override { encoder.setOutput(out); }
/// Provides "audio/pcm"
const char *mime() override { return "audio/basic"; }
/// We actually do nothing with this
virtual void setAudioInfo(AudioInfo from) override {
AudioEncoder::setAudioInfo(from);
encoder.setAudioInfo(from);
}
/// starts the processing using the actual RAWAudioInfo
bool begin() override { return encoder.begin(); }
/// stops the processing
void end() override { encoder.end(); }
/// Writes PCM data to be encoded as RAW
virtual size_t write(const uint8_t *in_ptr, size_t in_size) override {
return encoder.write((uint8_t *)in_ptr, in_size);
}
operator bool() override {
return encoder;
}
bool isOpen() { return encoder; }
protected:
G711_ULAWEncoder encoder;
};
} // namespace audio_tools

View File

@@ -0,0 +1,99 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/AudioEncoded.h"
namespace audio_tools {
/**
* @brief CodecChain - allows to chain multiple decoders and encoders together
* @ingroup codecs
* @ingroup decoder
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class CodecChain : public AudioDecoder, AudioEncoder {
public:
CodecChain() = default;
CodecChain(AudioDecoder &decoder) { addDecoder(decoder); }
CodecChain(AudioEncoder &encoder) { addEncoder(encoder); }
/// Adds a decoder to the chain
void addDecoder(AudioDecoder &decoder) {
EncodedAudioStream stream;
stream.setDecoder(&decoder);
streams.push_back(stream);
if (streams.size() > 1) {
streams[streams.size() - 2].setOutput(streams[streams.size() - 1]);
}
}
/// Adds an encoder to the chain
void addEncoder(AudioEncoder &encoder) {
EncodedAudioStream stream;
stream.setEncoder(&encoder);
streams.push_back(stream);
if (streams.size() > 1) {
streams[streams.size() - 2].setOutput(streams[streams.size() - 1]);
}
}
void setOutput(Print &out_stream) override {
p_print = &out_stream;
if (streams.size() > 0) streams[streams.size() - 1].setOutput(out_stream);
}
void setOutput(AudioStream &out_stream) override {
p_print = &out_stream;
if (streams.size() > 0) streams[streams.size() - 1].setOutput(out_stream);
}
void setOutput(AudioOutput &out_stream) override {
p_print = &out_stream;
if (streams.size() > 0) streams[streams.size() - 1].setOutput(out_stream);
}
void setAudioInfo(AudioInfo from) override {
AudioDecoder::setAudioInfo(from);
for (auto &stream : streams) {
stream.setAudioInfo(from);
}
}
void addNotifyAudioChange(AudioInfoSupport &bi) override {
for (auto &stream : streams) {
stream.addNotifyAudioChange(bi);
}
}
size_t write(const uint8_t *data, size_t len) override {
if (streams.size() == 0) return 0;
return streams[0].write(data, len);
}
operator bool() { return is_active; }
bool begin() {
is_active = true;
for (auto &stream : streams) {
stream.begin();
}
return true;
}
void end() override {
is_active = false;
for (auto &stream : streams) {
stream.end();
}
};
/// Returns nullptr
const char *mime() { return nullptr; }
protected:
Vector<EncodedAudioStream> streams;
bool is_active = false;
};
} // namespace audio_tools

View File

@@ -0,0 +1,301 @@
/**
* @file CodecCodec2.h
* @author Phil Schatzmann
* @brief Codec2 Codec using https://github.com/pschatzmann/arduino-codec2
* The codec was developed by David Grant Rowe, with support and cooperation of
* other researchers (e.g., Jean-Marc Valin from Opus). Codec 2 consists of
* 3200, 2400, 1600, 1400, 1300, 1200, 700 and 450 bit/s codec modes. It
* outperforms most other low-bitrate speech codecs. For example, it uses half
* the bandwidth of Advanced Multi-Band Excitation to encode speech with similar
* quality. The speech codec uses 16-bit PCM sampled audio, and outputs packed
* digital bytes. When sent packed digital bytes, it outputs PCM sampled audio.
* The audio sample rate is fixed at 8 kHz.
*
* @version 0.1
* @date 2022-04-24
*/
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "codec2.h"
namespace audio_tools {
/// Convert bits per sample to Codec2 mode
int getCodec2Mode(int bits_per_second) {
switch (bits_per_second) {
case 3200:
return CODEC2_MODE_3200;
case 2400:
return CODEC2_MODE_2400;
case 1600:
return CODEC2_MODE_1600;
case 1400:
return CODEC2_MODE_1400;
case 1300:
return CODEC2_MODE_1300;
case 1200:
return CODEC2_MODE_1200;
case 700:
return CODEC2_MODE_700C;
case 450:
return CODEC2_MODE_450;
default:
LOGE(
"Unsupported sample rate: use 3200, 2400, 1600, 1400, 1300, 1200, "
"700 or 450");
return -1;
}
}
/**
* @brief Decoder for Codec2. Depends on
* https://github.com/pschatzmann/arduino-libcodec2.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Codec2Decoder : public AudioDecoder {
public:
Codec2Decoder(int bps = 3200) {
info.sample_rate = 8000;
info.channels = 1;
info.bits_per_sample = 16;
setBitsPerSecond(bps);
}
/// sets bits per second: 3200, 2400, 1600, 1400, 1300, 1200, 700 and 450
/// bit/s
virtual void setBitsPerSecond(int bps) { bits_per_second = bps; }
int bitsPerSecond() { return bits_per_second; }
virtual bool begin() {
TRACEI();
int mode = getCodec2Mode(bits_per_second);
if (mode == -1) {
LOGE("invalid bits_per_second")
return false;
}
if (info.channels != 1) {
LOGE("Only 1 channel supported")
return false;
}
if (info.bits_per_sample != 16) {
LOGE("Only 16 bps are supported")
return false;
}
if (info.sample_rate != 8000) {
LOGW("Sample rate should be 8000: %d", info.sample_rate);
}
p_codec2 = codec2_create(mode);
if (p_codec2 == nullptr) {
LOGE("codec2_create");
return false;
}
result_buffer.resize(bytesCompressed());
input_buffer.resize(bytesCompressed() );
assert(input_buffer.size()>0);
assert(result_buffer.size()>0);
notifyAudioChange(info);
LOGI("bytesCompressed:%d", bytesCompressed());
LOGI("bytesUncompressed:%d", bytesUncompressed());
is_active = true;
return true;
}
int bytesCompressed() {
return p_codec2 != nullptr ? codec2_bytes_per_frame(p_codec2) : 0;
}
int bytesUncompressed() {
return p_codec2 != nullptr
? codec2_samples_per_frame(p_codec2) * sizeof(int16_t)
: 0;
}
virtual void end() {
TRACEI();
codec2_destroy(p_codec2);
is_active = false;
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
uint8_t *p_byte = (uint8_t *)data;
for (int j = 0; j < len; j++) {
processByte(p_byte[j]);
}
return len;
}
protected:
Print *p_print = nullptr;
struct CODEC2 *p_codec2;
bool is_active = false;
Vector<uint8_t> input_buffer;
Vector<uint8_t> result_buffer;
int input_pos = 0;
int bits_per_second = 0;
/// Build decoding buffer and decode when frame is full
void processByte(uint8_t byte) {
// add byte to buffer
input_buffer[input_pos++] = byte;
// decode if buffer is full
if (input_pos >= input_buffer.size()) {
codec2_decode(p_codec2, (short*)result_buffer.data(), input_buffer.data());
int written = p_print->write((uint8_t *)result_buffer.data(), result_buffer.size());
if (written != result_buffer.size()){
LOGE("write: %d written: %d", result_buffer.size(), written);
} else {
LOGD("write: %d written: %d", result_buffer.size(), written);
}
delay(2);
input_pos = 0;
}
}
};
/**
* @brief Encoder for Codec2 - Depends on
* https://github.com/pschatzmann/arduino-libcodec2.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Codec2Encoder : public AudioEncoder {
public:
Codec2Encoder(int bps = 3200) {
info.sample_rate = 8000;
info.channels = 1;
info.bits_per_sample = 16;
setBitsPerSecond(bps);
}
/// sets bits per second: 3200, 2400, 1600, 1400, 1300, 1200, 700 and 450
/// bit/s
virtual void setBitsPerSecond(int bps) { bits_per_second = bps; }
int bitsPerSecond() { return bits_per_second; }
int bytesCompressed() {
return p_codec2 != nullptr ? codec2_bytes_per_frame(p_codec2) : 0;
}
int bytesUncompressed() {
return p_codec2 != nullptr
? codec2_samples_per_frame(p_codec2) * sizeof(int16_t)
: 0;
}
bool begin() {
TRACEI();
int mode = getCodec2Mode(bits_per_second);
if (mode == -1) {
LOGE("invalid bits_per_second")
return false;
}
if (info.channels != 1) {
LOGE("Only 1 channel supported")
return false;
}
if (info.bits_per_sample != 16) {
LOGE("Only 16 bps are supported")
return false;
}
if (info.sample_rate != 8000) {
LOGW("Sample rate should be 8000: %d", info.sample_rate);
}
p_codec2 = codec2_create(mode);
if (p_codec2 == nullptr) {
LOGE("codec2_create");
return false;
}
input_buffer.resize(bytesCompressed());
result_buffer.resize(bytesUncompressed());
assert(input_buffer.size()>0);
assert(result_buffer.size()>0);
LOGI("bytesCompressed:%d", bytesCompressed());
LOGI("bytesUncompressed:%d", bytesUncompressed());
is_active = true;
return true;
}
virtual void end() {
TRACEI();
codec2_destroy(p_codec2);
is_active = false;
}
virtual const char *mime() { return "audio/codec2"; }
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
size_t write(const uint8_t *in_ptr, size_t in_size) override {
LOGD("write: %d", in_size);
if (!is_active) {
LOGE("inactive");
return 0;
}
// encode bytes
uint8_t *p_byte = (uint8_t *)in_ptr;
for (int j = 0; j < in_size; j++) {
processByte(p_byte[j]);
}
return in_size;
}
protected:
Print *p_print = nullptr;
struct CODEC2 *p_codec2 = nullptr;
bool is_active = false;
int buffer_pos = 0;
Vector<uint8_t> input_buffer;
Vector<uint8_t> result_buffer;
int bits_per_second = 0;
// add byte to decoding buffer and decode if buffer is full
void processByte(uint8_t byte) {
input_buffer[buffer_pos++] = byte;
if (buffer_pos >= input_buffer.size()) {
// encode
codec2_encode(p_codec2, result_buffer.data(),
(short*)input_buffer.data());
int written = p_print->write(result_buffer.data(), result_buffer.size());
if(written!=result_buffer.size()){
LOGE("write: %d written: %d", result_buffer.size(), written);
} else {
LOGD("write: %d written: %d", result_buffer.size(), written);
}
buffer_pos = 0;
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,113 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#if defined(ARDUINO) && !defined(IS_MIN_DESKTOP)
#include "Print.h"
#endif
namespace audio_tools {
/**
* @brief Dummy Decoder which just copies the provided data to the output.
* You can define if it is PCM data.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class CopyDecoder : public AudioDecoder {
public:
CopyDecoder(bool isPcm = false){
is_pcm = isPcm;
}
CopyDecoder(Print &out_stream) { TRACED(); pt_print=&out_stream; }
CopyDecoder(Print &out_stream, AudioInfoSupport &bi) {pt_print=&out_stream;}
~CopyDecoder() {}
virtual void setOutput(Print &out_stream) {pt_print=&out_stream;}
bool begin() { return true; }
void end() {}
size_t write(const uint8_t *data, size_t len) {
TRACED();
if (pt_print == nullptr) {
LOGE("No output stream defined for CopyDecoder");
return 0;
}
return pt_print->write((uint8_t*)data,len);
}
operator bool() { return true; }
/// The result is encoded data - by default this is false
virtual bool isResultPCM() { return is_pcm;}
/// Defines that the source and therefor the result is also PCM data
void setResultPCM(bool pcm){ is_pcm = pcm;}
protected:
Print *pt_print=nullptr;
bool is_pcm = false;
};
/**
* @brief Dummy Encoder which just copies the provided data to the output
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class CopyEncoder : public AudioEncoder {
public:
CopyEncoder() { TRACED(); }
CopyEncoder(Print &out_stream) { TRACED(); pt_print=&out_stream; }
CopyEncoder(Print &out_stream, AudioInfoSupport &bi) {pt_print=&out_stream;}
~CopyEncoder() {}
virtual void setOutput(Print &out_stream) {pt_print=&out_stream;}
bool begin() { return true;}
void end() {}
size_t write(const uint8_t *data, size_t len) {
if (pt_print == nullptr) {
LOGE("No output stream defined for CopyDecoder");
return 0;
}
return pt_print->write((uint8_t*)data,len);
}
operator bool() { return true; }
/// Provides the mime type of the encoded data
const char *mime() {return mime_type;}
/// Defines the mime type
void setMime(const char *mime) { mime_type = mime; }
protected:
Print *pt_print=nullptr;
const char *mime_type = "audio/pcm";
};
/// @brief Alias for CopyEncoder to handle PCM audio encoding (no actual encoding)
/// @ingroup codecs
using PCMEncoder = CopyEncoder;
/// @brief Alias for CopyDecoder to handle PCM audio decoding (no actual decoding)
/// @ingroup codecs
using PCMDecoder = CopyDecoder;
} // namespace audio_tools

View File

@@ -0,0 +1,654 @@
/**
* @file CodecDSF.h
* @brief DSF (DSD Stream File) format decoder implementation
* @author pschatzmann
* @copyright GPLv3
*
* This file contains the implementation of a DSF decoder that converts Direct
* Stream Digital (DSD) audio data to Pulse Code Modulation (PCM) format. The
* decoder supports the DSF file format which is commonly used for
* high-resolution audio distribution.
*
* Key features:
* - DSF file header parsing and validation
* - DSD bitstream to PCM conversion with configurable decimation
* - BiQuad low-pass filtering for anti-aliasing
* - Streaming-compatible operation for real-time processing
* - Support for stereo DSD files (DSD64 and higher sample rates)
*
*/
#pragma once
// #pragma GCC optimize("Ofast")
#pragma GCC optimize("O3")
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/AudioFilter/Filter.h"
#include "AudioTools/CoreAudio/Buffers.h"
/**
* @defgroup dsd DSD Audio
* @ingroup codecs
* @brief Direct Stream Digital (DSD) audio format support
*/
/// Buffer size for DSD data processing - must accommodate decimation step
#define DSD_BUFFER_SIZE 1024 * 2
namespace audio_tools {
/**
* @brief Metadata structure for DSF (DSD Stream File) format
* @ingroup dsd
* @author pschatzmann
*
* Contains format information and metadata extracted from DSF file headers,
* including DSD sample rates, data sizes, and calculated PCM conversion
* parameters.
*/
struct DSFMetadata : public AudioInfo {
DSFMetadata() = default;
DSFMetadata(int rate) { sample_rate = rate; }
uint32_t dsd_sample_rate =
0; ///< DSD sample rate (e.g. 2822400 Hz for DSD64)
uint64_t dsd_data_bytes = 0; ///< Size of DSD bitstream data in bytes
uint8_t dsd_bits = 1; ///< BitSize always 1!
uint64_t pcm_frames = 0; ///< Estimated number of PCM frames after conversion
float duration_sec = 0; ///< Approximate audio duration in seconds
uint32_t dsd_buffer_size =
DSD_BUFFER_SIZE; ///< Internal buffer size for DSD processing
float filter_q = 0.5f; //1.41f;
float filter_cutoff = 0.4f; ///< Cutoff frequency as fraction of Nyquist
int output_buffer_size = 1024;
};
/**
* @brief Header structures for DSF (DSD Stream File) format
* @ingroup dsd
*
* These packed structures define the binary layout of DSF file headers,
* allowing direct parsing of the file format without manual byte manipulation.
*/
/// DSF file prefix containing file identification and basic information
struct __attribute__((packed)) DSDPrefix {
char id[4]; // "DSD "
uint64_t chunkSize; // 28
uint64_t fileSize; // total file size
uint64_t metadataOffset; // offset to "ID3 " chunk (0 if none)
};
/// DSF format chunk containing audio format parameters
struct __attribute__((packed)) DSFFormat {
char id[4]; // "fmt "
uint64_t chunkSize; // 52
uint32_t formatVersion; // 1
uint32_t formatID; // 0
uint32_t channelType; // e.g., 2 for stereo
uint32_t channelNum; // number of channels
uint32_t samplingFrequency; // e.g., 2822400
uint32_t bitsPerSample; // 1
uint64_t sampleCount; // total samples per channel
uint32_t blockSizePerChannel; // e.g., 4096
uint32_t reserved; // 0
};
/// DSF data chunk header containing audio data size information
struct __attribute__((packed)) DSFDataHeader {
char id[4]; // "data"
uint64_t chunkSize; // size of DSD data
// followed by: uint8_t rawData[chunkSize];
};
/**
* @brief DSF (DSD Stream File) format decoder
* @ingroup dsd
* @author pschatzmann
*
* Decodes DSF files containing Direct Stream Digital (DSD) audio data and
* converts it to PCM format. DSF is a file format that stores DSD audio
* streams, commonly used for high-resolution audio. This decoder:
*
* - Parses DSF file headers to extract format information
* - Buffers incoming DSD bitstream data
* - Applies decimation and low-pass filtering for anti-aliasing
* - Outputs converted PCM audio samples
*
* The decoder uses BiQuad low-pass filters for high-quality anti-aliasing
* during the DSD to PCM conversion process, replacing traditional FIR filter
* implementations for better performance and modularity.
*
* @note Supports mono and stereo DSD files with sample rates >= 2.8224 MHz
* (DSD64)
*
*/
class DSFDecoder : public AudioDecoder {
public:
DSFDecoder() = default;
DSFDecoder(DSFMetadata metaData) { setMetaData(metaData); };
AudioInfo audioInfo() override { return meta; }
/// Can be used to set up alternative sample rate (default is 44100 Hz) and
/// bits
void setAudioInfo(AudioInfo from) override {
TRACED();
AudioDecoder::setAudioInfo(from);
meta.copyFrom(from);
if (isHeaderAvailable()){
// Ensure PCM buffer is allocated based on the new audio info
int buffer_size = getOutputBufferSize();
pcmBuffer.resize(buffer_size);
channelAccum.resize(meta.channels);
channelIntegrator.resize(meta.channels);
setupTargetPCMRate();
setupDecimationStep();
}
}
/**
* @brief Initialize the decoder
* @return true if initialization successful
*
* Sets up the decoder state, initializes buffers, and configures the low-pass
* filters with default parameters. The filters are initialized with a cutoff
* frequency of 40% of the Nyquist frequency to provide effective
* anti-aliasing.
*/
bool begin() {
TRACED();
dsdBuffer.resize(meta.dsd_buffer_size);
dsdBuffer.reset();
headerParsed = false;
headerSize = 0;
dataSize = 0;
filePos = 0;
decimationStep = 64;
max_value = 0;
// update decimaten step & filter parameters
isActive = true;
return true;
}
void end() override { isActive = false; }
/**
* @brief Get DSF file metadata
* @return Reference to DSFMetadata structure containing format information
*
* Returns metadata extracted from the DSF file header, including DSD sample
* rate, data size, estimated PCM frames, and calculated duration.
*/
const DSFMetadata getMetadata() { return meta; }
void setMetaData(DSFMetadata metaData) {
meta = metaData;
AudioDecoder::setAudioInfo(meta);
}
/**
* @brief Check if decoder is ready
* @return true if DSF header has been successfully parsed
*
* Indicates whether the decoder has successfully parsed the DSF file header
* and is ready to process audio data.
*/
bool isHeaderAvailable() { return headerParsed; }
operator bool() { return isActive; }
/**
* @brief Main entry point for processing incoming DSF data
* @param data Incoming DSF file data bytes
* @param len Number of bytes in data buffer
* @return Number of bytes consumed (always returns len for streaming
* compatibility)
*
* Processes incoming DSF file data in two phases:
* 1. Header parsing: Extracts format information from DSF file header
* 2. Audio processing: Buffers DSD data and converts to PCM output
*
* The method is designed for streaming operation and always reports full
* consumption of input data for compatibility with streaming frameworks.
*/
size_t write(const uint8_t* data, size_t len) {
LOGD("write: %u", (unsigned)len);
size_t i = 0;
// Phase 1: Parse DSF header to extract format information
i += processHeader(data, len, i);
// Phase 2: Process audio data (buffer DSD + convert to PCM)
if (headerParsed && i < len) {
i += processDSDData(data, len, i);
}
return len; // Always report full consumption for streaming compatibility
}
protected:
// Header parsing state
size_t headerSize; ///< Current size of accumulated header data
bool headerParsed = false; ///< Flag indicating if header parsing is complete
bool isActive = false; ///< Flag indicating if decoder is active and ready
uint64_t dataSize; ///< Size of audio data section in bytes
size_t filePos; ///< Current position in DSF file
// Processing buffers and state
SingleBuffer<uint8_t> pcmBuffer{0}; ///< Buffer for PCM output samples -
///< supports multi-channel up to 32-bit
Vector<float> channelAccum; ///< Accumulator for each channel during DSD to
///< PCM conversion
Vector<LowPassFilter<float>>
channelFilters; ///< Anti-aliasing filters for each channel
RingBuffer<uint8_t> dsdBuffer{0}; ///< Ring buffer for DSD data
uint32_t decimationStep; ///< Decimation factor for DSD to PCM conversion
Vector<float> channelIntegrator; ///< Integrator state for each channel (for
///< better DSD conversion)
// Metadata
DSFMetadata meta; ///< Extracted DSF file metadata
float max_value = 0.0f;
/// The buffer size is defined in the metadata: it must be at least 1 frame
int getOutputBufferSize() {
int frame_size = meta.bits_per_sample / 8 * meta.channels;
if (meta.bits_per_sample == 24) frame_size = 4 * meta.channels;
int buffer_size = frame_size;
if (meta.output_buffer_size > buffer_size)
buffer_size = meta.output_buffer_size;
return buffer_size;
}
/**
* @brief Process header data until header is complete or data is exhausted
* @param data Input data buffer
* @param len Length of input data
* @param startPos Starting position in input buffer
* @return Number of bytes processed for header parsing
*
* Accumulates header bytes and attempts to parse the DSF file header.
* When a complete and valid header is found, sets headerParsed flag and
* updates decimation parameters.
*/
size_t processHeader(const uint8_t* data, size_t len, size_t startPos) {
if (headerParsed) return 0;
LOGI("processHeader: %u (%u)", (unsigned)len, (unsigned)startPos);
// Check for DSD header magic
if (memcmp(data, "DSD ", 4) != 0) {
LOGE("Invalid DSF header magic");
return 0;
}
int dataPos = findTag("data", data, len);
int fmtPos = findTag("fmt ", data, len);
if (dataPos < 0 || fmtPos < 0) {
LOGE("DSF header not found in data (fmt: %d, data: %d)", fmtPos, dataPos);
return 0; // No valid header found
}
// parse the data
parseFMT(data + fmtPos, len - fmtPos);
parseData(data + dataPos, len - dataPos);
headerParsed = true;
// update audio info and initialize filters
setAudioInfo(meta);
return dataPos + sizeof(DSFDataHeader);
}
/**
* @brief Process DSD audio data: buffer it and convert to PCM when possible
* @param data Input data buffer containing DSD audio data
* @param len Length of input data
* @param startPos Starting position in input buffer
* @return Number of bytes processed for audio data
*
* Buffers incoming DSD data and triggers PCM conversion when sufficient
* data is available for processing.
*/
size_t processDSDData(const uint8_t* data, size_t len, size_t startPos) {
LOGD("processDSDData: %u (%u)", (unsigned)len, (unsigned)startPos);
size_t bytesProcessed = 0;
// Buffer as much DSD data as possible
bytesProcessed += bufferDSDData(data, len, startPos);
// Convert buffered DSD data to PCM output
convertDSDToPCM();
return bytesProcessed;
}
/**
* @brief Buffer incoming DSD data into ring buffer
* @param data Input data buffer
* @param len Length of input data
* @param startPos Starting position in input buffer
* @return Number of bytes successfully buffered
*
* Copies DSD data bytes into the internal ring buffer until either all
* data is consumed or the buffer becomes full.
*/
size_t bufferDSDData(const uint8_t* data, size_t len, size_t startPos) {
int write_len = len - startPos;
if (write_len > dsdBuffer.availableForWrite()) {
write_len = dsdBuffer.availableForWrite();
}
dsdBuffer.writeArray(data + startPos, write_len);
filePos += write_len;
return write_len;
}
/**
* @brief Convert buffered DSD data to PCM samples and output them
*
* Performs the core DSD to PCM conversion process using integrator-based
* approach:
* 1. Integrates DSD bits over the decimation period for each channel
* 2. Converts DSD bits to analog values (-1 or +1) with proper delta-sigma
* handling
* 3. Applies low-pass filtering to remove high-frequency noise
* 4. Converts filtered values to PCM samples
* 5. Outputs PCM samples for all channels
*
* The conversion uses BiQuad low-pass filters for anti-aliasing, providing
* better audio quality than simple decimation.
*
* DSF format uses byte interleaving: each byte contains 8 DSD samples for one
* channel, and channels are interleaved at the byte level (not bit level).
*/
void convertDSDToPCM() {
while (hasEnoughData()) {
// Initialize accumulators
for (int ch = 0; ch < meta.channels; ch++) {
channelAccum[ch] = 0.0f;
}
// Initialize integrator states
for (int ch = 0; ch < meta.channels; ch++) {
channelIntegrator[ch] = 0.0f;
}
// Accumulate DSD samples over decimation period
// DSF uses byte interleaving: bytes alternate between channels
int bytesPerDecimationStep = decimationStep / 8;
int samplesProcessed = 0;
for (int i = 0; i < bytesPerDecimationStep && !dsdBuffer.isEmpty(); i++) {
for (int ch = 0; ch < meta.channels && !dsdBuffer.isEmpty(); ch++) {
uint8_t dsdByte;
if (dsdBuffer.read(dsdByte)) {
// Each byte contains 8 DSD samples for the current channel
// Use integrator-based approach for better DSD conversion
for (int bit = 0; bit < 8; bit++) {
int channelBit = (dsdByte >> (7 - bit)) & 1; // MSB first in DSF
// Delta-sigma integration: accumulate the difference
channelIntegrator[ch] += channelBit ? 1.0f : -1.0f;
// Apply decay to prevent DC buildup
channelIntegrator[ch] *= 0.9999f;
}
// Add integrated value to channel accumulator
channelAccum[ch] += channelIntegrator[ch];
samplesProcessed += 8;
}
}
}
float samplesPerChannel = samplesProcessed / meta.channels;
if (samplesPerChannel > 0) {
for (int ch = 0; ch < meta.channels; ch++) {
// Normalize by sample count and apply scaling factor
channelAccum[ch] = channelAccum[ch] / samplesPerChannel * 0.8f;
if (meta.filter_cutoff > 0.0f &&
meta.filter_q > 0.0f) { // Only apply filter if configured
// Apply low-pass filter to remove high-frequency noise
channelAccum[ch] = channelFilters[ch].process(channelAccum[ch]);
}
//Serial.print(channelAccum[ch]);
//Serial.print(" ");
// Convert to PCM sample and store in buffer
writePCMSample(clip(channelAccum[ch]));
}
}
//Serial.println();
// Output the PCM samples for all channels
if (pcmBuffer.isFull()) {
size_t frameSize = pcmBuffer.available();
size_t written =
getOutput()->write((uint8_t*)pcmBuffer.data(), frameSize);
if (written != frameSize) {
LOGE(
"Failed to write PCM samples: expected %zu bytes, wrote %zu "
"bytes",
frameSize, written);
}
pcmBuffer.reset();
}
}
}
/**
* @brief Clips audio values to valid range
* @param value Input audio value
* @return Clipped value in range [-1.0, 1.0]
*
* Ensures that filtered audio values stay within the valid range to
* prevent clipping artifacts in the final PCM output.
*/
float clip(float value) {
if (value > 1.0f) return 1.0f;
if (value < -1.0f) return -1.0f;
return value;
}
/**
* @brief Set up low-pass filters for all channels
*
* Initializes anti-aliasing filters for each audio channel with appropriate
* cutoff frequency (40% of Nyquist frequency) for the current sample rate.
* This ensures proper anti-aliasing performance during DSD to PCM
* conversion.
*/
void setupTargetPCMRate() {
TRACEI();
// Initialize filters for the correct number of channels
if (meta.sample_rate > 0 && meta.channels > 0) {
float cutoffFreq =
meta.sample_rate * meta.filter_cutoff; // 40% of Nyquist frequency
channelFilters.resize(meta.channels);
for (int i = 0; i < meta.channels; i++) {
channelFilters[i].begin(cutoffFreq, meta.sample_rate, meta.filter_q);
}
}
}
/**
* @brief Calculate optimal decimation step for DSD to PCM conversion
*
* Calculates the decimation factor as the ratio of DSD sample rate to
* target PCM sample rate. Clamps the value between 64 and 512 to ensure
* reasonable processing efficiency and audio quality while maintaining good
* anti-aliasing performance.
*/
void setupDecimationStep() {
TRACEI();
if (meta.sample_rate == 0 || meta.dsd_sample_rate == 0) {
LOGE("Invalid sample rates: DSD=%u, PCM=%u",
(unsigned)meta.dsd_sample_rate, (unsigned)meta.sample_rate);
return;
}
decimationStep = meta.dsd_sample_rate / meta.sample_rate;
if (decimationStep < 64) {
LOGW("Decimation step %u too low, setting to 64",
(unsigned)decimationStep);
decimationStep = 64;
}
if (decimationStep > 512) {
LOGW("Decimation step %u too high, setting to 512",
(unsigned)decimationStep);
decimationStep = 512;
}
// Ensure decimation step is multiple of 8 for clean byte processing
decimationStep = (decimationStep / 8) * 8;
if (decimationStep < 64) decimationStep = 64;
LOGI("Decimation step set to %u for DSD rate %u and target PCM rate %u",
(unsigned)decimationStep, (unsigned)meta.dsd_sample_rate,
(unsigned)meta.sample_rate);
}
/**
* @brief Check if sufficient DSD data is available for conversion
* @return true if enough data is buffered for one decimation step
*
* Determines if the DSD buffer contains enough data to perform one
* decimation step of DSD to PCM conversion. For DSF format with byte
* interleaving, we need enough bytes for all channels over the decimation
* period.
*/
bool hasEnoughData() {
// DSF uses byte interleaving: each decimation step needs enough bytes
// to cover all channels. Each byte contains 8 DSD samples for one
// channel.
int bytesPerDecimationStep = (decimationStep / 8) * meta.channels;
if (bytesPerDecimationStep < meta.channels)
bytesPerDecimationStep = meta.channels;
return dsdBuffer.available() >= bytesPerDecimationStep;
}
/**
* @brief Convert filtered DSD value to PCM sample in the buffer
* @param filteredValue The filtered DSD value (range -1.0 to 1.0)
* @param channel Channel index (0 for left/mono, 1 for right)
*/
void writePCMSample(float filteredValue) {
switch (meta.bits_per_sample) {
case 8: {
int8_t buffer8 = static_cast<int8_t>(filteredValue * 127.0f);
pcmBuffer.write(buffer8);
break;
}
case 16: {
int16_t buffer16 = static_cast<int16_t>(filteredValue * 32767.0f);
pcmBuffer.writeArray((uint8_t*)&buffer16, sizeof(int16_t));
break;
}
case 24: {
int24_t buffer24 =
static_cast<int24_t>(filteredValue * 8388607.0f); // 2^23 - 1
pcmBuffer.writeArray((uint8_t*)&buffer24, sizeof(int24_t));
break;
}
case 32: {
int32_t buffer32 =
static_cast<int32_t>(filteredValue * 2147483647.0f); // 2^31 -
pcmBuffer.writeArray((uint8_t*)&buffer32, sizeof(int32_t));
break;
}
default:
LOGE("Unsupported bits per sample: %d", meta.bits_per_sample);
break;
}
}
/**
* @brief Find a specific tag within binary data
* @param tag The tag string to search for (e.g., "fmt ", "data")
* @param data The binary data buffer to search in
* @param len The length of the data buffer
* @return The position of the tag if found, -1 if not found
*
* Searches for DSF chunk identifiers within the file data. Used to locate
* format and data sections within the DSF file structure.
*/
int findTag(const char* tag, const uint8_t* data, size_t len) {
int taglen = strlen(tag);
uint32_t* pt;
for (int j = 0; j < len - taglen; j++) {
if (memcmp(tag, data + j, taglen) == 0) {
return j; // Found the tag at position j
}
}
return -1;
}
/**
* @brief Parse DSF format chunk to extract audio parameters
* @param data Pointer to the fmt chunk data
* @param len Length of available data
* @return true if parsing was successful, false otherwise
*
* Extracts essential audio format information from the DSF format chunk,
* including channel count, DSD sample rate, and validates the parameters
* are within acceptable ranges for processing.
*/
bool parseFMT(const uint8_t* data, size_t len) {
TRACEI();
if (len < sizeof(DSFFormat)) {
LOGE("FMT section too short to parse DSF format header");
return false; // Not enough data to parse
}
DSFFormat* fmt = (DSFFormat*)data;
meta.channels = fmt->channelNum;
// Fallback to channel type if channels is 0
if (meta.channels == 0) meta.channels = fmt->channelType;
meta.dsd_sample_rate = fmt->samplingFrequency;
// Validate channel count
if (meta.channels == 0 || meta.channels > 8) {
LOGE("Invalid channel count: %u (must be 1-8)", (unsigned)meta.channels);
return false;
}
LOGI("channels: %u, DSD sample rate: %u", (unsigned)meta.channels,
(unsigned)meta.dsd_sample_rate);
return true;
}
/**
* @brief Parse DSF data chunk to extract audio data information
* @param data Pointer to the data chunk
* @param len Length of available data
* @return true if parsing was successful, false otherwise
*
* Extracts audio data size information and calculates estimated playback
* duration and total PCM frames that will be produced after DSD to PCM
* conversion is complete.
*/
bool parseData(const uint8_t* data, size_t len) {
TRACEI();
if (len < sizeof(DSFDataHeader)) {
LOGE("Data section too short to parse DSF data header");
return false; // Not enough data to parse
}
DSFDataHeader* header = (DSFDataHeader*)data;
dataSize = header->chunkSize;
meta.dsd_data_bytes = dataSize;
uint64_t totalBits = dataSize * 8;
uint64_t totalDSDSamples = totalBits / meta.channels;
uint64_t totalPCMFrames =
totalDSDSamples / (meta.dsd_sample_rate / meta.sample_rate);
meta.pcm_frames = totalPCMFrames;
meta.duration_sec = (float)totalPCMFrames / meta.sample_rate;
return true;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,444 @@
/**
* @file CodecFLAC.h
* @author Phil Schatzmann
* @brief FLAC Codec using https://github.com/pschatzmann/arduino-libflac
* @version 0.1
* @date 2022-04-24
*/
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/Buffers.h"
#include "AudioTools/CoreAudio/AudioBasic/Net.h"
#include "flac.h"
#ifndef FLAC_READ_TIMEOUT_MS
#define FLAC_READ_TIMEOUT_MS 10000
#endif
#ifndef FLAC_BUFFER_SIZE
#define FLAC_BUFFER_SIZE (8 * 1024)
#endif
namespace audio_tools {
/**
* @brief Decoder for FLAC. Depends on https://github.com/pschatzmann/arduino-libflac. We support an efficient streaming API and an very memory intensitiv standard interface. So
* you should prefer the streaming interface where you call setOutput() before the begin and copy() in the loop.
* Validated with http://www.2l.no/hires/
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FLACDecoder : public StreamingDecoder {
public:
/// Default Constructor
FLACDecoder(bool isOgg=false) {
is_ogg = isOgg;
}
/// Destructor - calls end();
~FLACDecoder() { end(); }
void setTimeout(uint64_t readTimeout=FLAC_READ_TIMEOUT_MS) {
read_timeout_ms = readTimeout;
}
void setOgg(bool isOgg) {
is_ogg = isOgg;
}
AudioInfo audioInfo() {
AudioInfo info;
info.sample_rate = FLAC__stream_decoder_get_sample_rate(decoder);
info.channels = FLAC__stream_decoder_get_channels(decoder);
info.bits_per_sample = 16; // only 16 is supported
return info;
}
bool begin() {
TRACEI();
is_active = false;
if (decoder == nullptr) {
if ((decoder = FLAC__stream_decoder_new()) == NULL) {
LOGE("ERROR: allocating decoder");
is_active = false;
return false;
}
LOGI("FLAC__stream_decoder_new");
}
// if it is already active we close it
auto state = FLAC__stream_decoder_get_state(decoder);
if (state != FLAC__STREAM_DECODER_UNINITIALIZED){
FLAC__stream_decoder_finish(decoder);
}
// deactivate md5 checking
FLAC__stream_decoder_set_md5_checking(decoder, is_md5_checing);
// init decoder
if (is_ogg){
init_status = FLAC__stream_decoder_init_ogg_stream( decoder, read_callback, nullptr, nullptr, nullptr, nullptr, write_callback, nullptr, error_callback, this);
} else {
init_status = FLAC__stream_decoder_init_stream( decoder, read_callback, nullptr, nullptr, nullptr, nullptr, write_callback, nullptr, error_callback, this);
}
if (init_status != FLAC__STREAM_DECODER_INIT_STATUS_OK) {
LOGE("ERROR: initializing decoder: %s", FLAC__StreamDecoderInitStatusString[init_status]);
is_active = false;
return false;
}
LOGI("FLAC is open");
is_active = true;
return true;
}
void end() {
TRACEI();
if (decoder != nullptr){
flush();
FLAC__stream_decoder_delete(decoder);
decoder = nullptr;
}
is_active = false;
}
/// Process all data in the buffer
void flush() {
while(FLAC__stream_decoder_process_single(decoder));
}
operator bool() { return is_active; }
/// Stream Interface: Process a single frame - only relevant when input stream has been defined
bool copy() {
LOGD("copy");
if (!is_active) {
LOGW("FLAC not active");
return false;
}
if (p_input == nullptr) {
LOGE("setInput was not called");
return false;
}
if (!FLAC__stream_decoder_process_single(decoder)) {
LOGE("FLAC__stream_decoder_process_single");
return false;
}
return true;
}
/// Activate/deactivate md5 checking: call this before calling begin()
void setMD5(bool flag){
is_md5_checing = flag;
}
/// returns true of the stream is ogg
bool isOgg() const { return is_ogg; }
/// Provides "audio/flac" or "audio/ogg"
const char *mime() override { return is_ogg ? "audio/ogg; codecs=flac" : "audio/flac"; }
protected:
bool is_active = false;
bool is_ogg = false;
bool is_md5_checing = false;
AudioInfo info;
FLAC__StreamDecoder *decoder = nullptr;
FLAC__StreamDecoderInitStatus init_status;
uint64_t time_last_read = 0;
uint64_t read_timeout_ms = FLAC_READ_TIMEOUT_MS;
/// Check if input is directly from stream - instead of writes
bool isInputFromStream() { return p_input != nullptr; }
/// Error callback
static void error_callback(const FLAC__StreamDecoder *decoder,
FLAC__StreamDecoderErrorStatus status,
void *client_data) {
LOGE(FLAC__StreamDecoderErrorStatusString[status]);
}
size_t readBytes(uint8_t *data, size_t len) override {
return p_input->readBytes(data, len);
}
/// Callback which reads from stream
static FLAC__StreamDecoderReadStatus read_callback(const FLAC__StreamDecoder *decoder, FLAC__byte result_buffer[],size_t *bytes, void *client_data) {
FLAC__StreamDecoderReadStatus result = FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
LOGD("read_callback: %d", (int) *bytes);
FLACDecoder *self = (FLACDecoder *)client_data;
if (self == nullptr || !self->is_active) {
return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
}
// get data directly from stream
*bytes = self->readBytes(result_buffer, *bytes);
LOGD("-> %d", (int) *bytes);
if (self->isEof(*bytes)){
result = FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
self->is_active = false;
}
return result;
}
/// We return eof when we were subsequently getting 0 bytes for the timeout period.
bool isEof(int bytes) {
bool result = false;
if (bytes==0){
delay(5);
} else {
time_last_read=millis();
}
if (millis() - time_last_read >= read_timeout_ms){
result = true;
}
return result;
}
/// Output decoded result to final output stream
static FLAC__StreamDecoderWriteStatus write_callback(const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame,const FLAC__int32 *const buffer[], void *client_data) {
LOGD("write_callback: %u", (unsigned)frame->header.blocksize);
FLACDecoder *self = (FLACDecoder *)client_data;
AudioInfo actual_info = self->audioInfo();
if (self->info != actual_info){
self->info = actual_info;
self->info.logInfo();
int bps = FLAC__stream_decoder_get_bits_per_sample(decoder);
if (bps!=16){
LOGI("Converting from %d bits", bps);
}
self->info = actual_info;
self->notifyAudioChange(self->info);
}
// write audio data
int bps = FLAC__stream_decoder_get_bits_per_sample(decoder);
int16_t result_frame[actual_info.channels];
switch(bps){
case 8:
for (int j = 0; j < frame->header.blocksize; j++) {
for (int i = 0; i < actual_info.channels; i++) {
//self->output_buffer[j*actual_info.channels + i] = buffer[i][j]<<8;
result_frame[i] = buffer[i][j]<<8;
}
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
}
break;
case 16:
for (int j = 0; j < frame->header.blocksize; j++) {
for (int i = 0; i < actual_info.channels; i++) {
result_frame[i] = buffer[i][j];
}
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
}
break;
case 24:
for (int j = 0; j < frame->header.blocksize; j++) {
for (int i = 0; i < actual_info.channels; i++) {
result_frame[i] = buffer[i][j] >> 8;
}
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
}
break;
case 32:
for (int j = 0; j < frame->header.blocksize; j++) {
for (int i = 0; i < actual_info.channels; i++) {
result_frame[i] = buffer[i][j] >> 16;
}
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
}
break;
default:
LOGE("Unsupported bps: %d", bps);
}
return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
}
};
/**
* @brief FLACEncoder
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FLACEncoder : public AudioEncoder {
public:
/// Default Constructor
FLACEncoder(bool isOgg = false) {
setOgg(isOgg);
}
/// Destructor - calls end();
~FLACEncoder() { end(); }
void setOgg(bool isOgg) {
is_ogg = isOgg;
}
bool isOgg() {return is_ogg;}
void setBlockSize(int size){
flac_block_size = size;
}
int blockSize() {return flac_block_size; }
void setCompressionLevel(int level){
flac_compression_level = level;
}
int compressionLevel() {return flac_compression_level;}
/// Defines the output Stream
void setOutput(Print &out_stream) override { p_print = &out_stream; }
/// Provides "audio/pcm"
const char *mime() override { return "audio/flac"; }
/// We update the audio information which will be used in the begin method
virtual void setAudioInfo(AudioInfo from) override {
cfg = from;
cfg.logInfo();
}
/// starts the processing using the actual AudioInfo
virtual bool begin() override {
TRACED();
if (p_encoder==nullptr){
p_encoder = FLAC__stream_encoder_new();
if (p_encoder==nullptr){
LOGE("FLAC__stream_encoder_new");
return false;
}
}
is_open = false;
FLAC__stream_encoder_set_channels(p_encoder, cfg.channels);
FLAC__stream_encoder_set_bits_per_sample(p_encoder, cfg.bits_per_sample);
FLAC__stream_encoder_set_sample_rate(p_encoder, cfg.sample_rate);
FLAC__stream_encoder_set_blocksize(p_encoder, flac_block_size);
FLAC__stream_encoder_set_compression_level(p_encoder, flac_compression_level);
// setup stream
FLAC__StreamEncoderInitStatus status;
if (is_ogg){
status = FLAC__stream_encoder_init_ogg_stream(p_encoder, nullptr, write_callback, nullptr, nullptr, nullptr, this);
} else {
status = FLAC__stream_encoder_init_stream(p_encoder, write_callback, nullptr, nullptr, nullptr, this);
}
if (status != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
LOGE("ERROR: initializing decoder: %s", FLAC__StreamEncoderInitStatusString[status]);
if (status==FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR){
LOGE(" -> %s", FLAC__StreamEncoderStateString[FLAC__stream_encoder_get_state(p_encoder)]);
}
return false;
}
is_open = true;
return true;
}
/// starts the processing
bool begin(Print &out) {
p_print = &out;
return begin();
}
/// stops the processing
void end() override {
TRACED();
if (p_encoder != nullptr) {
FLAC__stream_encoder_delete(p_encoder);
p_encoder = nullptr;
is_open = false;
}
}
/// Writes FLAC Packet
virtual size_t write(const uint8_t *data, size_t len) override {
if (!is_open || p_print == nullptr) return 0;
LOGD("write: %zu", len);
size_t result = 0;
int samples=0;
int frames=0;
int32_t *data32=nullptr;
switch(cfg.bits_per_sample){
case 16:
samples = len / sizeof(int16_t);
frames = samples / cfg.channels;
writeBuffer((int16_t*)data, samples);
data32 = buffer.data();
break;
case 24:
case 32:
samples = len / sizeof(int32_t);
frames = samples / cfg.channels;
data32 = (int32_t*) data;
break;
default:
LOGE("bits_per_sample not supported: %d", (int) cfg.bits_per_sample);
break;
}
if (frames>0){
if (FLAC__stream_encoder_process_interleaved(p_encoder, data32, frames)){
result = len;
} else {
LOGE("FLAC__stream_encoder_process_interleaved");
}
}
return result;
}
operator bool() override { return is_open; }
bool isOpen() { return is_open; }
protected:
AudioInfo cfg;
Vector<FLAC__int32> buffer;
Print *p_print = nullptr;
FLAC__StreamEncoder *p_encoder=nullptr;
bool is_open = false;
bool is_ogg = false;
int flac_block_size = 512; // small value to minimize allocated memory
int flac_compression_level = 8;
static FLAC__StreamEncoderWriteStatus write_callback(const FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, uint32_t samples, uint32_t current_frame, void *client_data){
FLACEncoder *self = (FLACEncoder *)client_data;
if (self->p_print!=nullptr){
size_t written = self->p_print->write((uint8_t*)buffer, bytes);
if (written!=bytes){
LOGE("write_callback %zu -> %zu", bytes, written);
return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR;
}
}
return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
}
void writeBuffer(int16_t * data, size_t samples) {
buffer.resize(samples);
for (int j=0;j<samples;j++){
buffer[j] = data[j];
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,203 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/Buffers.h"
#include "foxen-flac.h"
namespace audio_tools {
#define FOXEN_IN_BUFFER_SIZE 1024 * 2
#define FOXEN_OUT_BUFFER_SIZE 1024 * 4
/**
* @brief Foxen FLAC Decoder using https://github.com/astoeckel/libfoxenflac
* Unlike FLACDecoder which is a streaming decoder, this is a simple
* AudioDecoder implementation.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FLACDecoderFoxen : public AudioDecoder {
public:
FLACDecoderFoxen() = default;
/// Default Constructor
FLACDecoderFoxen(int maxBlockSize, int maxChannels,
bool convertTo16Bits = true, bool releaseOnEnd = false) {
is_convert_to_16 = convertTo16Bits;
max_block_size = maxBlockSize;
max_channels = maxChannels;
is_release_memory_on_end = releaseOnEnd;
};
/// Destructor - calls end();
~FLACDecoderFoxen() { end(); }
bool begin() {
TRACEI();
is_active = false;
size_t foxen_size = fx_flac_size(max_block_size, max_channels);
foxen_data.resize(foxen_size);
flac = fx_flac_init(foxen_data.data(), max_block_size, max_channels);
if (flac != nullptr) {
is_active = true;
write_buffer.resize(in_buffer_size);
out.resize(out_buffer_size);
} else {
LOGE("not enough memory");
if (is_stop_on_error) stop();
}
return is_active;
}
void end() {
TRACEI();
flush();
if (flac != nullptr && is_release_memory_on_end) {
foxen_data.resize(0);
write_buffer.resize(0);
out.resize(0);
}
is_active = false;
}
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", len);
// no processing if not active
if (!is_active) return 0;
size_t result = write_buffer.writeArray(data, len);
LOGD("write_buffer availabe: %d", write_buffer.available());
while (write_buffer.available() > 0) {
if (!decode()) break;
}
// if the buffer is full we could not decode anything
if (write_buffer.available() == write_buffer.size()) {
LOGE("Decoder did not consume any data");
if (is_stop_on_error) stop();
}
LOGD("write: %d -> %d", len, result);
return result;
}
void flush() { decode(); }
operator bool() override { return is_active; }
/// Defines the input buffer size (default is 2k)
void setInBufferSize(int size) { in_buffer_size = size; }
/// Defines the number of 32 bit samples for providing the result (default is
/// 4k)
void setOutBufferSize(int size) { out_buffer_size = size; }
/// Defines the maximum FLAC blocksize: drives the buffer allocation
void setMaxBlockSize(int size) { max_block_size = size; }
/// Defines the maximum number of channels: drives the buffer allocation
void setMaxChannels(int ch) { max_channels = ch; }
/// Select between 16 and 32 bit output: the default is 16 bits
void set32Bit(bool flag) { is_convert_to_16 = !flag; }
protected:
fx_flac_t *flac = nullptr;
SingleBuffer<uint8_t> write_buffer{0};
Vector<int32_t> out;
Vector<uint8_t> foxen_data{0};
bool is_active = false;
bool is_convert_to_16 = true;
bool is_stop_on_error = true;
bool is_release_memory_on_end = false;
int bits_eff = 0;
int max_block_size = 5 * 1024;
int max_channels = 2;
int in_buffer_size = FOXEN_IN_BUFFER_SIZE;
int out_buffer_size = FOXEN_OUT_BUFFER_SIZE;
bool decode() {
TRACED();
if (!is_active) return false;
uint32_t out_len = out.size();
uint32_t buf_len = write_buffer.available();
uint32_t buf_len_result = buf_len;
int rc = fx_flac_process(flac, write_buffer.data(), &buf_len_result,
out.data(), &out_len);
// assert(out_len <= FOXEN_OUT_BUFFER_SIZE);
switch (rc) {
case FLAC_END_OF_METADATA: {
processMetadata();
} break;
case FLAC_ERR: {
LOGE("FLAC decoder in error state!");
if (is_stop_on_error) stop();
} break;
default: {
if (out_len > 0) {
LOGD("Providing data: %d samples", out_len);
if (is_convert_to_16) {
write16BitData(out_len);
} else {
write32BitData(out_len);
}
}
} break;
}
LOGD("processed: %d bytes of %d -> %d samples", buf_len_result, buf_len,
out_len);
// removed processed bytes from buffer
write_buffer.clearArray(buf_len_result);
return buf_len_result > 0 || out_len > 0;
}
void write32BitData(int out_len) {
TRACED();
// write the result to the output destination
writeBlocking(p_print, (uint8_t *)out.data(), out_len * sizeof(int32_t));
}
void write16BitData(int out_len) {
TRACED();
// in place convert to 16 bits
int16_t *out16 = (int16_t *)out.data();
for (int j = 0; j < out_len; j++) {
out16[j] = out.data()[j] >> 16; // 65538;
}
// write the result to the output destination
LOGI("writeBlocking: %d", out_len * sizeof(int16_t));
writeBlocking(p_print, (uint8_t *)out.data(), out_len * sizeof(int16_t));
}
void processMetadata() {
bits_eff = fx_flac_get_streaminfo(flac, FLAC_KEY_SAMPLE_SIZE);
int info_blocksize = fx_flac_get_streaminfo(flac, FLAC_KEY_MAX_BLOCK_SIZE);
LOGI("bits: %d", bits_eff);
LOGI("blocksize: %d", info_blocksize);
// assert(bits_eff == 32);
info.sample_rate = fx_flac_get_streaminfo(flac, FLAC_KEY_SAMPLE_RATE);
info.channels = fx_flac_get_streaminfo(flac, FLAC_KEY_N_CHANNELS);
info.bits_per_sample = is_convert_to_16 ? 16 : bits_eff;
info.logInfo();
if (info.channels > max_channels) {
LOGE("max channels too low: %d -> %d", max_channels, info.channels);
if (is_stop_on_error) stop();
}
if (info_blocksize > max_block_size) {
LOGE("max channels too low: %d -> %d", max_block_size, info_blocksize);
if (is_stop_on_error) stop();
}
notifyAudioChange(info);
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,64 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/**
* @brief Factory for creating new decoders based on the mime type or id
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
*/
class CodecFactory {
public:
bool addDecoder(const char* id, AudioDecoder* (*cb)()) {
if (id == nullptr || cb == nullptr) return false;
DecoderFactoryLine line;
line.id = id;
line.cb = cb;
decoders.push_back(line);
return true;
}
bool addEncoder(const char* id, AudioEncoder* (*cb)()) {
if (id == nullptr || cb == nullptr) return false;
EncoderFactoryLine line;
line.id = id;
line.cb = cb;
encoders.push_back(line);
return true;
}
/// create a new decoder instance
AudioDecoder* createDecoder(const char* str) {
for (auto& line : decoders) {
if (line.id.equals(str)) {
return line.cb();
}
}
return nullptr;
}
/// create a new encoder instance
AudioEncoder* createEncoder(const char* str) {
for (auto& line : encoders) {
if (line.id.equals(str)) {
return line.cb();
}
}
return nullptr;
}
protected:
struct DecoderFactoryLine {
Str id;
AudioDecoder* (*cb)() = nullptr;
};
struct EncoderFactoryLine {
Str id;
AudioEncoder* (*cb)() = nullptr;
};
Vector<DecoderFactoryLine> decoders;
Vector<EncoderFactoryLine> encoders;
};
} // namespace audio_tools

View File

@@ -0,0 +1,142 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/**
* @brief DecoderFloat - Converts Stream of floats into 2 byte integers
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderFloat : public AudioDecoder {
public:
/// Empty Constructor
DecoderFloat() = default;
/**
* @brief Construct a new DecoderFloat object
*
* @param out_stream Output Stream to which we write the decoded result
*/
DecoderFloat(Print &out_stream, bool active=true){
TRACED();
p_print = &out_stream;
}
/**
* @brief Construct a new DecoderFloat object
*
* @param out_stream Output Stream to which we write the decoded result
* @param bi Object that will be notified about the Audio Formt (Changes)
*/
DecoderFloat(Print &out_stream, AudioInfoSupport &bi){
TRACED();
p_print = &out_stream;
addNotifyAudioChange(bi);
}
/// Defines the output Stream
void setOutput(Print &out_stream) override {
p_print = &out_stream;
}
/// Converts data from float to int16_t
virtual size_t write(const uint8_t *data, size_t len) override {
if (p_print==nullptr) return 0;
int samples = len/sizeof(float);
buffer.resize(samples);
float* p_float = (float*) data;
for (int j=0;j<samples;j++){
buffer[j] = p_float[j]*32767;
}
return p_print->write((uint8_t*)buffer.data(), samples*sizeof(int16_t)) * 2;
}
virtual operator bool() override {
return p_print!=nullptr;;
}
protected:
Print *p_print=nullptr;
Vector<int16_t> buffer;
};
/**
* @brief EncoderFloats - Encodes 16 bit PCM data stream to floats
* data.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncoderFloat : public AudioEncoder {
public:
/// Empty Constructor
EncoderFloat() = default;
/// Constructor providing the output stream
EncoderFloat(Print &out){
p_print = &out;
}
/// Defines the output Stream
void setOutput(Print &out_stream) override {
p_print = &out_stream;
}
/// Provides "audio/pcm"
const char* mime() override{
return mime_pcm;
}
/// starts the processing using the actual RAWAudioInfo
virtual bool begin() override{
is_open = true;
return true;
}
/// starts the processing
bool begin(Print &out) {
p_print = &out;
return begin();
}
/// stops the processing
void end() override {
is_open = false;
}
/// Converts data from int16_t to float
virtual size_t write(const uint8_t *data, size_t len) override {
if (p_print==nullptr) return 0;
int16_t *pt16 = (int16_t*)data;
size_t samples = len / sizeof(int16_t);
buffer.resize(samples);
for (size_t j=0;j<samples;j++){
buffer[j] = static_cast<float>(pt16[j]) / 32768.0;
}
return p_print->write((uint8_t*)buffer.data(), samples*sizeof(float));
}
operator bool() override {
return is_open;
}
bool isOpen(){
return is_open;
}
protected:
Print* p_print=nullptr;;
volatile bool is_open;
Vector<float> buffer;
};
}

View File

@@ -0,0 +1,197 @@
/**
* @file CodecG.722.h
* @author Phil Schatzmann
* @brief G.722 Codec using https://github.com/pschatzmann/arduino-libg722
* @version 0.1
* @date 2022-04-24
*/
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "g722_codec.h"
// size in bytes
#define G722_PCM_SIZE 80
#define G722_ENC_SIZE 40
namespace audio_tools {
/**
* @brief Decoder for G.722. Depends on
* https://github.com/pschatzmann/arduino-libg722.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G722Decoder : public AudioDecoder {
public:
G722Decoder() = default;
/// Defines the options for the G.722 Codec: G722_SAMPLE_RATE_8000,G722_PACKED
void setOptions(int options){
this->options = options;
}
virtual bool begin() {
TRACEI();
input_buffer.resize(10);
result_buffer.resize(40);
g722_dctx = g722_decoder_new(info.sample_rate, options);
if (g722_dctx == nullptr) {
LOGE("g722_decoder_new");
return false;
}
notifyAudioChange(info);
is_active = true;
return true;
}
virtual void end() {
TRACEI();
g722_decoder_destroy(g722_dctx);
is_active = false;
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
uint8_t *p_byte = (uint8_t *)data;
for (int j = 0; j < len; j++) {
processByte(p_byte[j]);
}
return len;
}
protected:
Print *p_print = nullptr;
G722_DEC_CTX *g722_dctx=nullptr;
Vector<uint8_t> input_buffer;
Vector<uint8_t> result_buffer;
int options = G722_SAMPLE_RATE_8000;
int input_pos = 0;
bool is_active = false;
/// Build decoding buffer and decode when frame is full
void processByte(uint8_t byte) {
// add byte to buffer
input_buffer[input_pos++] = byte;
// decode if buffer is full
if (input_pos >= input_buffer.size()) {
int result_samples = g722_decode(g722_dctx, input_buffer.data(), input_buffer.size(),
(int16_t *)result_buffer.data());
if (result_samples*2>result_buffer.size()){
LOGE("Decoder:Result buffer too small: %d -> %d",result_buffer.size(),result_samples*2);
}
p_print->write(result_buffer.data(), result_samples);
input_pos = 0;
}
}
};
/**
* @brief Encoder for G.722 - Depends on
* https://github.com/pschatzmann/arduino-libg722.
* Inspired by g722enc.c
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G722Encoder : public AudioEncoder {
public:
G722Encoder() = default;
/// Defines the options for the G.722 Codec: G722_SAMPLE_RATE_8000,G722_PACKED
void setOptions(int options){
this->options = options;
}
bool begin() {
TRACEI();
if (info.channels != 1) {
LOGW("1 channel expected, was: %d", info.channels);
}
g722_ectx = g722_encoder_new(info.sample_rate, options);
if (g722_ectx == NULL) {
LOGE("g722_encoder_new");
return false;
}
input_buffer.resize(G722_PCM_SIZE);
result_buffer.resize(G722_ENC_SIZE);
is_active = true;
return true;
}
virtual void end() {
TRACEI();
g722_encoder_destroy(g722_ectx);
is_active = false;
}
virtual const char *mime() { return "audio/g722"; }
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
// encode bytes
uint8_t *p_byte = (uint8_t *)data;
for (int j = 0; j < len; j++) {
processByte(p_byte[j]);
}
return len;
}
protected:
Print *p_print = nullptr;
G722_ENC_CTX *g722_ectx = nullptr;
Vector<uint8_t> input_buffer;
Vector<uint8_t> result_buffer;
int options = G722_SAMPLE_RATE_8000;
int buffer_pos = 0;
bool is_active = false;
// add byte to decoding buffer and decode if buffer is full
void processByte(uint8_t byte) {
input_buffer[buffer_pos++] = byte;
if (buffer_pos >= input_buffer.size()) {
// convert for little endian
int samples = input_buffer.size() / 2;
// encode
int result_len = g722_encode(g722_ectx,(const int16_t*) input_buffer.data(), samples,
result_buffer.data());
if (result_len>result_buffer.size()){
LOGE("Encoder:Result buffer too small: %d -> %d",result_buffer.size(),result_len);
}
p_print->write(result_buffer.data(), result_len);
buffer_pos = 0;
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,411 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
extern "C"{
#include "g72x.h"
}
namespace audio_tools {
/**
* @brief Supported codecs by G7xxDecoder and G7xxEncoder
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
enum G7xxCODEC_e {g723_24, g721, g723_40, others};
/**
* @brief g723_24, g721, g723_40 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G7xxDecoder : public AudioDecoder {
public:
G7xxDecoder(G7xxCODEC_e codec) {
info.channels = 1;
info.sample_rate = 8000;
info.bits_per_sample = 16;
switch(codec){
case g723_24:
dec_routine = g723_24_decoder;
dec_bits = 3;
break;
case g721:
dec_routine = g721_decoder;
dec_bits = 4;
break;
case g723_40:
dec_routine = g723_40_decoder;
dec_bits = 5;
break;
}
}
void setAudioInfo(AudioInfo info) override {
bool ok = true;
if (info.channels!=1){
LOGE("channels must be 1 instead of %d", info.channels);
ok = false;
}
if (info.sample_rate!=8000){
LOGE("sample_rate must be 8000 instead of %d", info.sample_rate);
ok = false;
}
if (info.bits_per_sample!=16){
LOGE("bits_per_sample must be 16 instead of %d", info.bits_per_sample);
ok = false;
}
if (ok) AudioDecoder::setAudioInfo(info);
}
bool begin() override {
TRACEI();
in_buffer = 0;
in_bits = 0;
out_size = sizeof(int16_t);
g72x_init_state(&state);
is_active = true;
return true;
}
void end() override {
TRACEI();
is_active = false;
}
void setOutput(Print &out_stream) override { p_print = &out_stream; }
operator bool() { return is_active; }
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
uint8_t *p_byte = (uint8_t *)data;
for (int j = 0; j < len; j++) {
sample = (*dec_routine)(p_byte[j], AUDIO_ENCODING_LINEAR, &state);
p_print->write((uint8_t*)&sample, out_size);
}
return len;
}
protected:
Print *p_print = nullptr;
int input_pos = 0;
bool is_active = false;
int16_t sample;
unsigned char code;
int n;
struct g72x_state state;
int out_size;
int (*dec_routine)(int code, int out_coding, struct g72x_state* state_ptr);
int dec_bits;
unsigned int in_buffer = 0;
int in_bits = 0;
};
/**
* @brief g723_24, g721, g723_40 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G7xxEncoder : public AudioEncoder {
public:
G7xxEncoder(G7xxCODEC_e codec) {
info.channels = 1;
info.sample_rate = 8000;
info.bits_per_sample = 16;
switch(codec){
case g721:
enc_routine = g721_encoder;
enc_bits = 4;
p_mime = "audio/g721";
break;
case g723_24:
enc_routine = g723_24_encoder;
enc_bits = 3;
p_mime = "audio/g723_24";
break;
case g723_40:
enc_routine = g723_40_encoder;
enc_bits = 5;
p_mime = "audio/g723_40";
break;
}
}
bool begin() override {
TRACEI();
g72x_init_state(&state);
out_buffer = 0;
out_bits = 0;
is_active = true;
return true;
}
void end() override {
TRACEI();
is_active = false;
}
const char *mime() override { return p_mime; }
virtual void setAudioInfo(AudioInfo info) {
bool ok = true;
if (info.channels!=1){
LOGE("channels must be 1 instead of %d", info.channels);
ok = false;
}
if (info.sample_rate!=8000){
LOGE("sample_rate must be 8000 instead of %d", info.sample_rate);
ok = false;
}
if (info.bits_per_sample!=16){
LOGE("bits_per_sample must be 16 instead of %d", info.bits_per_sample);
ok = false;
}
if (ok) AudioEncoder::setAudioInfo(info);
}
void setOutput(Print &out_stream) override { p_print = &out_stream; }
operator bool() { return is_active; }
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
// encode bytes
int16_t *p_16 = (int16_t *)data;
int samples = len / sizeof(int16_t);
for (int j = 0; j < samples; j++) {
code = (*enc_routine)(p_16[j], AUDIO_ENCODING_LINEAR, &state);
p_print->write(&code, 1);
}
return len;
}
protected:
Print *p_print = nullptr;
bool is_active = false;
const char *p_mime = nullptr;
int resid;
struct g72x_state state;
unsigned char sample_char;
int16_t sample_int16;
unsigned char code;
int (*enc_routine)(int sample, int in_coding, struct g72x_state* state_ptr);
int enc_bits;
unsigned int out_buffer = 0;
int out_bits = 0;
};
/**
* @brief 32Kbps G721 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G721Decoder : public G7xxDecoder {
public:
G721Decoder() : G7xxDecoder(g721) {};
};
/**
* @brief 32Kbps G721 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G721Encoder : public G7xxEncoder {
public:
G721Encoder() : G7xxEncoder(g721) {};
};
/**
* @brief 24Kbps G723 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G723_24Decoder : public G7xxDecoder {
public:
G723_24Decoder() : G7xxDecoder(g723_24) {};
};
/**
* @brief 24Kbps G723 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G723_24Encoder : public G7xxEncoder {
public:
G723_24Encoder() : G7xxEncoder(g723_24) {};
};
/**
* @brief 40Kbps G723 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G723_40Decoder : public G7xxDecoder {
public:
G723_40Decoder() : G7xxDecoder(g723_40) {};
};
/**
* @brief 40Kbps G723 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G723_40Encoder : public G7xxEncoder {
public:
G723_40Encoder() : G7xxEncoder(g723_40) {};
};
/**
* @brief 64 kbit/s g711 ULOW Encoder based on https://github.com/pschatzmann/arduino-libg7xx
* Supported encoder parameters: linear2alaw2, linear2ulaw
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G711Encoder : public G7xxEncoder {
public:
G711Encoder(uint8_t(*enc)(int)) : G7xxEncoder(others) {
this->enc = enc;
assert(this->enc!=nullptr);
};
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
// encode bytes
int samples = len/2;
int16_t *p_16 = (int16_t *)data;
uint8_t buffer[samples];
for (int j = 0; j < samples; j++) {
buffer[j] = enc(p_16[j]);
}
p_print->write(buffer,samples);
return len;
}
protected:
uint8_t(*enc)(int)=nullptr;
};
/**
* @brief 64 kbit/s g711 ULOW Decoder based on https://github.com/pschatzmann/arduino-libg7xx
* Supported decoder parameters: alaw2linear, ulaw2linear
* @author Phil Schatzmann
* @ingroup codecs
* @ingroup encoder
* @copyright GPLv3
*/
class G711Decoder : public G7xxDecoder {
public:
G711Decoder(int (*dec)(uint8_t a_val)) : G7xxDecoder(others) {
this->dec = dec;
assert(this->dec!=nullptr);
};
size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
// decode bytes
uint8_t *p_8 = (uint8_t *)data;
for (int j = 0; j < len; j++) {
int16_t result = dec(p_8[j]);
p_print->write((uint8_t*)&result,sizeof(int16_t));
}
return len;
}
protected:
int (*dec)(uint8_t a_val)=nullptr;
};
/**
* @brief 64 kbit/s g711 ALOW Encoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G711_ALAWEncoder : public G711Encoder {
public:
G711_ALAWEncoder() : G711Encoder(linear2alaw) {};
};
/**
* @brief 64 kbit/s g711 ALOW Decoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G711_ALAWDecoder : public G711Decoder {
public:
G711_ALAWDecoder() : G711Decoder(alaw2linear) {};
};
/**
* @brief 64 kbit/s g711 ULOW Encoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G711_ULAWEncoder : public G711Encoder {
public:
G711_ULAWEncoder() : G711Encoder(linear2ulaw) {};
};
/**
* @brief 64 kbit/s g711 ULOW Decoder based on https://github.com/pschatzmann/arduino-libg7xx
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class G711_ULAWDecoder : public G711Decoder {
public:
G711_ULAWDecoder() : G711Decoder(ulaw2linear) {};
};
} // namespace audio_tools

View File

@@ -0,0 +1,236 @@
/**
* @file CodecGSM.h
* @author Phil Schatzmann
* @brief GSM Codec using https://github.com/pschatzmann/arduino-libgsm
* @version 0.1
* @date 2022-04-24
*/
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "gsm.h"
namespace audio_tools {
/**
* @brief Decoder for GSM. Depends on
* https://github.com/pschatzmann/arduino-libgsm.
* Inspired by gsmdec.c
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class GSMDecoder : public AudioDecoder {
public:
GSMDecoder() {
info.sample_rate = 8000;
info.channels = 1;
}
virtual bool begin() {
TRACEI();
// 160 13-bit samples
result_buffer.resize(160 * sizeof(int16_t));
// gsm_frame of 33 bytes
input_buffer.resize(33);
v_gsm = gsm_create();
notifyAudioChange(info);
is_active = true;
return true;
}
virtual void end() {
TRACEI();
gsm_destroy(v_gsm);
is_active = false;
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
for (int j = 0; j < len; j++) {
processByte(data[j]);
}
return len;
}
protected:
Print *p_print = nullptr;
gsm v_gsm;
bool is_active = false;
Vector<uint8_t> input_buffer;
Vector<uint8_t> result_buffer;
int input_pos = 0;
/// Build decoding buffer and decode when frame is full
void processByte(uint8_t byte) {
// add byte to buffer
input_buffer[input_pos++] = byte;
// decode if buffer is full
if (input_pos >= input_buffer.size()) {
if (gsm_decode(v_gsm, input_buffer.data(), (gsm_signal*)result_buffer.data())!=0){
LOGE("gsm_decode");
}
//fromBigEndian(result_buffer);
// scale to 13 to 16-bit samples
scale(result_buffer);
p_print->write(result_buffer.data(), result_buffer.size());
input_pos = 0;
}
}
void scale(Vector<uint8_t> &vector){
int16_t *pt16 = (int16_t *)vector.data();
for (int j = 0; j < vector.size() / 2; j++) {
if (abs(pt16[j])<=4095){
pt16[j] = pt16[j] * 8;
} else if(pt16[j]<0){
pt16[j] = -32767;
} else if(pt16[j]>0){
pt16[j] = 32767;
}
}
}
void fromBigEndian(Vector<uint8_t> &vector){
int size = vector.size() / 2;
int16_t *data16 = (int16_t*) vector.data();
for (int i=0; i<size; i++){
data16[i] = ntohs(data16[i]);
}
}
};
/**
* @brief Encoder for GSM - Depends on
* https://github.com/pschatzmann/arduino-libgsm.
* Inspired by gsmenc.c
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class GSMEncoder : public AudioEncoder {
public:
GSMEncoder(bool scaling=true) {
info.sample_rate = 8000;
info.channels = 1;
scaling_active = scaling;
}
bool begin() {
TRACEI();
if (info.sample_rate != 8000) {
LOGW("Sample rate is supposed to be 8000 - it was %d", info.sample_rate);
}
if (info.channels != 1) {
LOGW("channels is supposed to be 1 - it was %d", info.channels);
}
v_gsm = gsm_create();
// 160 13-bit samples
input_buffer.resize(160 * sizeof(int16_t));
// gsm_frame of 33 bytes
result_buffer.resize(33);
is_active = true;
return true;
}
virtual void end() {
TRACEI();
gsm_destroy(v_gsm);
is_active = false;
}
virtual const char *mime() { return "audio/gsm"; }
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
// encode bytes
for (int j = 0; j < len; j++) {
processByte(data[j]);
}
return len;
}
protected:
Print *p_print = nullptr;
gsm v_gsm;
bool is_active = false;
int buffer_pos = 0;
bool scaling_active;
Vector<uint8_t> input_buffer;
Vector<uint8_t> result_buffer;
// add byte to decoding buffer and decode if buffer is full
void processByte(uint8_t byte) {
input_buffer[buffer_pos++] = byte;
if (buffer_pos >= input_buffer.size()) {
scaleValues(input_buffer);
// toBigEndian(input_buffer);
// encode
gsm_encode(v_gsm, (gsm_signal*)input_buffer.data(), result_buffer.data());
size_t written = p_print->write(result_buffer.data(), result_buffer.size());
assert(written == result_buffer.size());
buffer_pos = 0;
}
}
void toBigEndian(Vector<uint8_t> &vector){
int size = vector.size() / 2;
int16_t *data16 = (int16_t*) vector.data();
for (int i=0; i<size; i++){
data16[i] = htons(data16[i]);
}
}
void scaleValues(Vector<uint8_t> &vector) {
int16_t *pt16 = (int16_t *)vector.data();
int size = vector.size() / 2;
if (scaling_active){
// scale to 16 to 13-bit samples
for (int j = 0; j < size; j++) {
pt16[j] = pt16[j] / 8;
}
} else {
// clip value to 13-bits
for (int j = 0; j < size; j++) {
if ( pt16[j]>4095){
pt16[j] = 4095;
}
if ( pt16[j]<-4095){
pt16[j] = -4095;
}
}
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,37 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/CodecAACHelix.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
#include "AudioTools/AudioCodecs/CodecWAV.h"
#include "AudioTools/AudioCodecs/MultiDecoder.h"
namespace audio_tools {
/**
* @brief MP3 and AAC Decoder using libhelix:
* https://github.com/pschatzmann/arduino-libhelix. We dynamically create a MP3
* or AAC decoder dependent on the provided audio format. In addition WAV files
* are also supported
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderHelix : public MultiDecoder {
public:
DecoderHelix() {
// register supported codecs with their mime type
addDecoder(mp3, "audio/mpeg");
addDecoder(aac, "audio/aac");
addDecoder(wav, "audio/vnd.wave");
}
protected:
MP3DecoderHelix mp3;
AACDecoderHelix aac;
WAVDecoder wav;
};
} // namespace audio_tools

View File

@@ -0,0 +1,192 @@
/**
* @file CodecILBC.h
* @author Phil Schatzmann
* @brief Codec for ilbc using https://github.com/pschatzmann/libilbc
* @version 0.1
* @date 2022-04-24
*
* @copyright Copyright (c) 2022
*
*/
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "iLBC.h"
namespace audio_tools {
/**
* @brief Decoder for iLBC. Depends on
* https://github.com/pschatzmann/libilbc
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ILBCDecoder : public AudioDecoder {
public:
ILBCDecoder(EnumLBCFrameSize frameSize = ms30, bool useEnhancer = true) {
info.sample_rate = 8000;
info.channels = 1;
info.bits_per_sample = 16;
frame_size = frameSize;
use_enhancer = useEnhancer;
}
~ILBCDecoder(){
end();
}
virtual bool begin() {
TRACEI();
if (p_print==nullptr){
LOGE("Output not defined");
return false;
}
if (p_ilbc==nullptr){
p_ilbc = new iLBCDecode(frame_size, use_enhancer);
}
// setup buffer
decoded_buffer.resize(p_ilbc->getSamples());
encoded_buffer.resize(p_ilbc->getEncodedBytes());
// update audio information
notifyAudioChange(info);
return true;
}
virtual void end() {
TRACEI();
delete p_ilbc;
p_ilbc = nullptr;
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return p_ilbc != nullptr; }
virtual size_t write(const uint8_t *data, size_t len) {
if (p_ilbc==nullptr) return 0;
LOGI("write: %d", len);
int samples = len / sizeof(int16_t);
int16_t *p_samples = (int16_t *)data;
for (int j=0;j<samples;j++){
encoded_buffer[encoded_buffer_pos++]=p_samples[j];
if (encoded_buffer_pos>=encoded_buffer.size()){
memset(decoded_buffer.data(),0,decoded_buffer.size()*sizeof(int16_t));
p_ilbc->decode(encoded_buffer.data(), decoded_buffer.data());
if (p_print!=nullptr){
p_print->write((uint8_t*)decoded_buffer.data(), decoded_buffer.size()*sizeof(int16_t));
delay(2);
}
encoded_buffer_pos = 0;
}
}
return len;
}
protected:
Print *p_print = nullptr;
iLBCDecode *p_ilbc = nullptr;
Vector<int16_t> decoded_buffer{0};
Vector<uint8_t> encoded_buffer{0};
int16_t encoded_buffer_pos = 0;
EnumLBCFrameSize frame_size;
bool use_enhancer;
};
/**
* @brief Encoder for iLBC - Depends on
* https://github.com/pschatzmann/libopenilbc
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ILBCEncoder : public AudioEncoder {
public:
ILBCEncoder(EnumLBCFrameSize frameSize = ms30) {
info.sample_rate = 8000;
info.channels = 1;
info.bits_per_sample = 16;
frame_size = frameSize;
}
~ILBCEncoder(){
end();
}
bool begin() {
TRACEI();
if (p_print==nullptr){
LOGE("Output not defined");
return false;
}
if (info.bits_per_sample!=16){
LOGE("bits_per_sample must be 16: %d",info.bits_per_sample);
return false;
}
if (info.sample_rate!=8000){
LOGW("The sample rate should be 8000: %d", info.sample_rate);
}
if (info.channels!=1){
LOGW("channels should be 1: %d", info.channels);
}
if (p_ilbc==nullptr){
p_ilbc = new iLBCEncode(frame_size);
}
decoded_buffer.resize(p_ilbc->getSamples());
encoded_buffer.resize(p_ilbc->getEncodedBytes());
decoded_buffer_pos = 0;
return true;
}
virtual void end() {
TRACEI();
if (p_ilbc != nullptr) {
delete p_ilbc;
p_ilbc = nullptr;
}
}
virtual const char *mime() { return "audio/ilbc"; }
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return p_ilbc != nullptr; }
virtual size_t write(const uint8_t *data, size_t len) {
if (p_ilbc==nullptr) return 0;
LOGI("write: %d", len);
int samples = len / sizeof(int16_t);
int16_t *p_samples = (int16_t *)data;
for (int j=0;j<samples;j++){
decoded_buffer[decoded_buffer_pos++]=p_samples[j];
if (decoded_buffer_pos>=decoded_buffer.size()){
memset(encoded_buffer.data(),0,encoded_buffer.size());
p_ilbc->encode(decoded_buffer.data(), encoded_buffer.data());
if (p_print!=nullptr){
p_print->write(encoded_buffer.data(), encoded_buffer.size());
}
decoded_buffer_pos = 0;
}
}
return len;
}
protected:
Print *p_print = nullptr;
iLBCEncode *p_ilbc = nullptr;
Vector<float> decoded_buffer{0};
Vector<uint8_t> encoded_buffer{0};
int16_t decoded_buffer_pos = 0;
EnumLBCFrameSize frame_size;
};
} // namespace audio_tools

View File

@@ -0,0 +1,123 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/**
* @brief DecoderL16 - Converts an 16 Bit Stream into 16Bits network byte order.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderL16 : public AudioDecoder {
public:
/**
* @brief Construct a new DecoderL16 object
*/
DecoderL16() { TRACED(); }
/**
* @brief Construct a new DecoderL16 object
*
* @param out_stream Output Stream to which we write the decoded result
*/
DecoderL16(Print &out_stream, bool active = true) {
TRACED();
p_print = &out_stream;
}
/**
* @brief Construct a new DecoderL16 object
*
* @param out_stream Output Stream to which we write the decoded result
* @param bi Object that will be notified about the Audio Formt (Changes)
*/
DecoderL16(Print &out_stream, AudioInfoSupport &bi) {
TRACED();
setOutput(out_stream);
addNotifyAudioChange(bi);
}
/// Defines the output Stream
void setOutput(Print &out_stream) override { p_print = &out_stream; }
virtual size_t write(const uint8_t *data, size_t len) override {
if (p_print == nullptr)
return 0;
int16_t *data16 = (int16_t *)data;
for (int j = 0; j < len / 2; j++) {
data16[j] = ntohs(data16[j]);
}
return p_print->write((uint8_t *)data, len);
}
virtual operator bool() override { return p_print!=nullptr; }
protected:
Print *p_print = nullptr;
};
/**
* @brief EncoderL16s - Condenses 16 bit PCM data stream to 8 bits
* data.
* Most microcontrollers can not process 8 bit audio data directly. 8 bit data
* however is very memory efficient and helps if you need to store audio on
* constrained resources. This encoder translates 16bit data into 8bit data.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncoderL16 : public AudioEncoder {
public:
// Empty Constructor - the output stream must be provided with begin()
EncoderL16() {}
// Constructor providing the output stream
EncoderL16(Print &out) { p_print = &out; }
/// Defines the output Stream
void setOutput(Print &out_stream) override { p_print = &out_stream; }
/// Provides "audio/pcm"
const char *mime() override { return "audio/l16"; }
/// starts the processing using the actual RAWAudioInfo
virtual bool begin() override { is_open = true; return true;}
/// starts the processing
bool begin(Print &out) {
p_print = &out;
return begin();
}
/// stops the processing
void end() override { is_open = false; }
/// Writes PCM data to be encoded as RAW
virtual size_t write(const uint8_t *data, size_t len) override {
if (p_print == nullptr)
return 0;
int16_t *data16 = (int16_t *)data;
for (int j = 0; j < len / 2; j++) {
data16[j] = htons(data16[j]);
}
return p_print->write((uint8_t *)data, len);
}
operator bool() override { return is_open; }
bool isOpen() { return is_open; }
protected:
Print *p_print = nullptr;
bool is_open;
};
} // namespace audio_tools

View File

@@ -0,0 +1,193 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
namespace audio_tools {
/**
* @brief DecoderL8 - Converts an 8 Bit Stream into 16Bits
* Most microcontrollers can not output 8 bit data directly. 8 bit data however
* is very memory efficient and helps if you need to store audio on constrained
* resources. This decoder translates 8bit data into 16bit data.
* By default the encoded data is represented as uint8_t, so the values are from
* 0 to 255.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderL8 : public AudioDecoder {
public:
/**
* @brief Construct a new DecoderL8 object
*/
DecoderL8(bool isSigned = false) {
TRACED();
setSigned(isSigned);
}
/**
* @brief Construct a new DecoderL8 object
*
* @param out_stream Output Stream to which we write the decoded result
*/
DecoderL8(Print &out_stream, bool active = true) {
TRACED();
p_print = &out_stream;
}
/**
* @brief Construct a new DecoderL8 object
*
* @param out_stream Output Stream to which we write the decoded result
* @param bi Object that will be notified about the Audio Formt (Changes)
*/
DecoderL8(Print &out_stream, AudioInfoSupport &bi) {
TRACED();
setOutput(out_stream);
addNotifyAudioChange(bi);
}
/// By default the encoded values are unsigned, but you can change them to
/// signed
void setSigned(bool isSigned) { is_signed = isSigned; }
/// for most decoders this is not needed
virtual void setAudioInfo(AudioInfo from) override {
TRACED();
if (from.bits_per_sample!=16){
LOGE("Bits per sample not supported: %d", from.bits_per_sample);
}
from.bits_per_sample = 16;
if (info != from) {
notifyAudioChange(from);
}
info = from;
}
virtual size_t write(const uint8_t *data, size_t len) override {
if (p_print == nullptr) return 0;
buffer.resize(len);
memset(buffer.data(), 0, len * 2);
if (is_signed) {
int8_t *pt8 = (int8_t *)data;
for (size_t j = 0; j < len; j++) {
buffer[j] = convertSample(pt8[j]);
}
} else {
uint8_t *pt8 = (uint8_t *)data;
for (size_t j = 0; j < len; j++) {
buffer[j] = convertSample(pt8[j]);
}
}
int write_byte_count = len * sizeof(int16_t);
size_t result = p_print->write((uint8_t *)buffer.data(), write_byte_count);
LOGD("DecoderL8 %d -> %d -> %d", (int)len, write_byte_count, (int)result);
return result / sizeof(int16_t);
}
int16_t convertSample(int16_t in) {
int32_t tmp = in;
if (!is_signed) {
tmp -= 129;
}
return NumberConverter::clipT<int16_t>(tmp * 258);
}
virtual operator bool() override { return p_print!=nullptr; }
protected:
bool is_signed = false;
Vector<int16_t> buffer;
};
/**
* @brief EncoderL8s - Condenses 16 bit PCM data stream to 8 bits
* data.
* Most microcontrollers can not process 8 bit audio data directly. 8 bit data
* however is very memory efficient and helps if you need to store audio on
* constrained resources. This encoder translates 16bit data into 8bit data.
* By default the encoded data is represented as uint8_t, so the values are from
* 0 to 255.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncoderL8 : public AudioEncoder {
public:
// Empty Constructor - the output stream must be provided with begin()
EncoderL8(bool isSigned = false) {
TRACED();
setSigned(isSigned);
}
// Constructor providing the output stream
EncoderL8(Print &out) { p_print = &out; }
/// By default the encoded values are unsigned, but can change them to signed
void setSigned(bool isSigned) { is_signed = isSigned; }
/// Defines the output Stream
void setOutput(Print &out_stream) override { p_print = &out_stream; }
/// Provides "audio/pcm"
const char *mime() override { return "audio/l8"; }
/// starts the processing using the actual RAWAudioInfo
bool begin() override { is_open = true; return true;}
/// starts the processing
bool begin(Print &out) {
p_print = &out;
return begin();
}
/// stops the processing
void end() override { is_open = false; }
/// Writes PCM data to be encoded as RAW
size_t write(const uint8_t *data, size_t len) override {
if (p_print == nullptr) return 0;
int16_t *pt16 = (int16_t *)data;
size_t samples = len / 2;
buffer.resize(samples);
memset(buffer.data(), 0, samples);
for (size_t j = 0; j < samples; j++) {
buffer[j] = convertSample(pt16[j]);
}
size_t result = p_print->write((uint8_t *)buffer.data(), samples);
LOGD("EncoderL8 %d -> %d -> %d", (int)len,(int) samples, (int)result);
return result * sizeof(int16_t);
}
operator bool() override { return is_open; }
int16_t convertSample(int16_t sample) {
int16_t tmp = NumberConverter::clipT<int8_t>(sample / 258);
if (!is_signed) {
tmp += 129;
// clip to range
if (tmp < 0) {
tmp = 0;
} else if (tmp > 255) {
tmp = 255;
}
}
return tmp;
}
bool isOpen() { return is_open; }
protected:
Print *p_print = nullptr;
bool is_open;
bool is_signed = false;
Vector<int8_t> buffer;
};
} // namespace audio_tools

View File

@@ -0,0 +1,318 @@
/**
* @file CodecLC3.h
* @author Phil Schatzmann
* @brief Codec for lc3 using https://github.com/pschatzmann/arduino-liblc3
* @version 0.1
* @date 2022-04-24
*
* @copyright Copyright (c) 2022
*
*/
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "lc3.h"
namespace audio_tools {
// 20 to 400
#define DEFAULT_BYTE_COUNT 40
// 7500 or 10000
#define LC3_DEFAULT_DT_US 7500
/**
* @brief Decoder for LC3. Depends on
* https://github.com/pschatzmann/arduino-liblc3
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class LC3Decoder : public AudioDecoder {
public:
LC3Decoder(AudioInfo info, int dt_us = LC3_DEFAULT_DT_US,
uint16_t inputByteCount = DEFAULT_BYTE_COUNT) {
this->dt_us = dt_us;
this->info = info;
this->input_byte_count = inputByteCount;
}
LC3Decoder(int dt_us = LC3_DEFAULT_DT_US,
uint16_t inputByteCount = DEFAULT_BYTE_COUNT) {
this->dt_us = dt_us;
this->input_byte_count = inputByteCount;
info.sample_rate = 32000;
info.bits_per_sample = 16;
info.channels = 1;
}
virtual bool begin() {
TRACEI();
// Return the number of PCM samples in a frame
num_frames = lc3_frame_samples(dt_us, info.sample_rate);
dec_size = lc3_decoder_size(dt_us, info.sample_rate);
LOGI("channels: %d", info.channels);
LOGI("sample_rate: %d", info.sample_rate);
LOGI("input_byte_count: %d", input_byte_count);
LOGI("dt_us: %d", dt_us);
LOGI("num_frames: %d", num_frames);
LOGI("dec_size: %d", dec_size);
if (!checkValues()) {
LOGE("Invalid Parameters");
return false;
}
// setup memory
input_buffer.resize(input_byte_count);
output_buffer.resize(num_frames * 2);
lc3_decoder_memory.resize(dec_size);
// setup decoder
lc3_decoder = lc3_setup_decoder(dt_us, info.sample_rate, 0,
(void *)lc3_decoder_memory.data());
notifyAudioChange(info);
input_pos = 0;
active = true;
return true;
}
virtual void end() {
TRACEI();
active = false;
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return active; }
virtual size_t write(const uint8_t *data, size_t len) {
if (!active) return 0;
LOGD("write %u", len);
uint8_t *p_ptr8 = (uint8_t *)data;
for (int j = 0; j < len; j++) {
input_buffer[input_pos++] = p_ptr8[j];
if (input_pos >= input_buffer.size()) {
if (lc3_decode(lc3_decoder, input_buffer.data(), input_buffer.size(),
pcm_format, (int16_t *)output_buffer.data(), 1) != 0) {
LOGE("lc3_decode");
}
// write all data to final output
int requested = output_buffer.size();
int written =
p_print->write((const uint8_t *)output_buffer.data(), requested);
if (written != requested) {
LOGE("Decoder Bytes requested: %d - written: %d", requested, written);
}
input_pos = 0;
}
}
return len;
}
protected:
Print *p_print = nullptr;
lc3_decoder_t lc3_decoder = nullptr;
lc3_pcm_format pcm_format;
Vector<uint8_t> lc3_decoder_memory;
Vector<uint16_t> output_buffer;
Vector<uint8_t> input_buffer;
size_t input_pos = 0;
int dt_us;
uint16_t input_byte_count = 20; // up to 400
uint16_t num_frames;
unsigned dec_size;
bool active = false;
bool checkValues() {
if (p_print == nullptr) {
LOGE("Output is not defined");
return false;
}
if (!LC3_CHECK_DT_US(dt_us)) {
LOGE("dt_us: %d", dt_us);
return false;
}
if (!LC3_CHECK_SR_HZ(info.sample_rate)) {
LOGE("sample_rate: %d", info.sample_rate);
return false;
}
if (info.channels!=1){
LOGE("channels: %d", info.channels);
}
if (num_frames == -1) {
LOGE("num_frames could not be determined - using m");
return false;
}
if (dec_size == 0) {
LOGE("dec_size");
return false;
}
switch (info.bits_per_sample) {
case 16:
pcm_format = LC3_PCM_FORMAT_S16;
break;
case 24:
pcm_format = LC3_PCM_FORMAT_S24;
break;
default:
LOGE("Bits per sample not supported: %d", info.bits_per_sample);
return false;
}
return true;
}
};
/**
* @brief Encoder for LC3 - Depends on
* https://github.com/pschatzmann/arduino-liblc3
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class LC3Encoder : public AudioEncoder {
public:
LC3Encoder(int dt_us = LC3_DEFAULT_DT_US,
uint16_t outputByteCount = DEFAULT_BYTE_COUNT) {
this->dt_us = dt_us;
info.sample_rate = 32000;
info.bits_per_sample = 16;
info.channels = 1;
output_byte_count = outputByteCount;
}
bool begin() {
TRACEI();
unsigned enc_size = lc3_encoder_size(dt_us, info.sample_rate);
num_frames = lc3_frame_samples(dt_us, info.sample_rate);
LOGI("sample_rate: %d", info.sample_rate);
LOGI("channels: %d", info.channels);
LOGI("dt_us: %d", dt_us);
LOGI("output_byte_count: %d", output_byte_count);
LOGI("enc_size: %d", enc_size);
LOGI("num_frames: %d", num_frames);
if (!checkValues()) {
return false;
}
// setup memory
lc3_encoder_memory.resize(enc_size);
input_buffer.resize(num_frames * 2);
output_buffer.resize(output_byte_count);
// setup encoder
lc3_encoder = lc3_setup_encoder(dt_us, info.sample_rate, 0,
lc3_encoder_memory.data());
input_pos = 0;
active = true;
return true;
}
virtual void end() {
TRACEI();
active = false;
}
virtual const char *mime() { return "audio/lc3"; }
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return lc3_encoder != nullptr; }
virtual size_t write(const uint8_t *data, size_t len) {
if (!active) return 0;
LOGD("write %u", len);
uint8_t *p_ptr8 = (uint8_t *)data;
for (int j = 0; j < len; j++) {
input_buffer[input_pos++] = p_ptr8[j];
if (input_pos >= num_frames * 2) {
if (lc3_encode(lc3_encoder, pcm_format,
(const int16_t *)input_buffer.data(), 1,
output_buffer.size(), output_buffer.data()) != 0) {
LOGE("lc3_encode");
}
// write all data to final output
int requested = output_buffer.size();
int written = p_print->write(output_buffer.data(), requested);
if (written != requested) {
LOGE("Encoder Bytes requested: %d - written: %d", requested, written);
}
input_pos = 0;
}
}
return len;
}
protected:
Print *p_print = nullptr;
unsigned dt_us = 1000;
uint16_t num_frames;
lc3_encoder_t lc3_encoder = nullptr;
lc3_pcm_format pcm_format;
uint16_t output_byte_count = 20;
Vector<uint8_t> lc3_encoder_memory;
Vector<uint8_t> output_buffer;
Vector<uint8_t> input_buffer;
int input_pos = 0;
bool active = false;
bool checkValues() {
if (p_print == nullptr) {
LOGE("Output is not defined");
return false;
}
if (!LC3_CHECK_DT_US(dt_us)) {
LOGE("dt_us: %d", dt_us);
return false;
}
if (!LC3_CHECK_SR_HZ(info.sample_rate)) {
LOGE("sample_rate: %d", info.sample_rate);
return false;
}
if (info.channels!=1){
LOGE("channels: %d", info.channels);
}
if (num_frames == -1) {
LOGE("Invalid num_frames");
return false;
}
switch (info.bits_per_sample) {
case 16:
pcm_format = LC3_PCM_FORMAT_S16;
break;
case 24:
pcm_format = LC3_PCM_FORMAT_S24;
break;
default:
LOGE("Bits per sample not supported: %d", info.bits_per_sample);
return false;
}
return true;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,164 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#ifndef HELIX_PRINT
#define HELIX_PRINT
#endif
#include "MP3DecoderHelix.h"
namespace audio_tools {
/**
* @brief MP3 Decoder using libhelix:
* https://github.com/pschatzmann/arduino-libhelix This is basically just a
* simple wrapper to provide AudioInfo and AudioInfoSupport
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MP3DecoderHelix : public AudioDecoder {
public:
MP3DecoderHelix() {
TRACED();
mp3 = new libhelix::MP3DecoderHelix();
if (mp3 != nullptr) {
mp3->setReference(this);
} else {
LOGE("Not enough memory for libhelix");
}
}
/**
* @brief Construct a new MP3DecoderMini object
*
* @param out_stream
*/
MP3DecoderHelix(Print &out_stream) {
TRACED();
mp3 = new libhelix::MP3DecoderHelix();
if (mp3 != nullptr) {
mp3->setReference(this);
} else {
LOGE("Not enough memory for libhelix");
}
setOutput(out_stream);
}
/**
* @brief Construct a new MP3DecoderMini object. The decoded output will go to
* the print object.
*
* @param out_stream
* @param bi
*/
MP3DecoderHelix(Print &out_stream, AudioInfoSupport &bi) {
TRACED();
mp3 = new libhelix::MP3DecoderHelix();
if (mp3 != nullptr) {
mp3->setReference(this);
} else {
LOGE("Not enough memory for libhelix");
}
setOutput(out_stream);
addNotifyAudioChange(bi);
}
/**
* @brief Destroy the MP3DecoderMini object
*
*/
~MP3DecoderHelix() {
if (mp3 != nullptr) delete mp3;
}
/// Defines the output Stream
void setOutput(Print &outStream) override {
AudioDecoder::setOutput(outStream);
if (mp3 != nullptr) mp3->setOutput(outStream);
}
/// Starts the processing
bool begin() override {
TRACEI();
if (mp3 == nullptr) {
LOGE("Not enough memory for libhelix");
return false;
}
mp3->begin();
return true;
}
/// Releases the reserved memory
void end() override {
TRACED();
if (mp3 != nullptr) mp3->end();
}
MP3FrameInfo audioInfoEx() { return mp3->audioInfo(); }
AudioInfo audioInfo() override {
AudioInfo baseInfo;
MP3FrameInfo i = audioInfoEx();
if (i.nChans != 0 && i.samprate != 0 && i.bitsPerSample != 0) {
baseInfo.channels = i.nChans;
baseInfo.sample_rate = i.samprate;
baseInfo.bits_per_sample = i.bitsPerSample;
}
return baseInfo;
}
/// Write mp3 data to decoder
size_t write(const uint8_t *data, size_t len) override {
LOGD("%s: %zu", LOG_METHOD, len);
if (mp3 == nullptr) return 0;
return mp3->write((uint8_t *)data, len);
}
/// checks if the class is active
operator bool() override { return mp3 != nullptr && (bool)*mp3; }
libhelix::MP3DecoderHelix *driver() { return mp3; }
/// Defines the callback object to which the Audio information change is
/// provided
void addNotifyAudioChange(AudioInfoSupport &bi) override {
TRACED();
AudioDecoder::addNotifyAudioChange(bi);
if (mp3 != nullptr) mp3->setInfoCallback(infoCallback, this);
}
/// notifies the subscriber about a change
static void infoCallback(MP3FrameInfo &i, void *ref) {
MP3DecoderHelix *p_helix = (MP3DecoderHelix *)ref;
if (p_helix != nullptr) {
TRACED();
AudioInfo baseInfo;
baseInfo.channels = i.nChans;
baseInfo.sample_rate = i.samprate;
baseInfo.bits_per_sample = i.bitsPerSample;
baseInfo.logInfo("MP3DecoderHelix");
p_helix->notifyAudioChange(baseInfo);
} else {
LOGE("Wrong Libhelix Version");
}
}
/// Provides the maximum frame size - this is allocated on the heap and you
/// can reduce the heap size my minimizing this value
size_t maxFrameSize() { return mp3->maxFrameSize(); }
/// Define your optimized maximum frame size
void setMaxFrameSize(size_t len) { mp3->setMaxFrameSize(len); }
/// Provides the maximum pwm buffer size - this is allocated on the heap and
/// you can reduce the heap size my minimizing this value
size_t maxPCMSize() { return mp3->maxPCMSize(); }
/// Define your optimized maximum pwm buffer size
void setMaxPCMSize(size_t len) { mp3->setMaxPCMSize(len); }
protected:
libhelix::MP3DecoderHelix *mp3 = nullptr;
};
} // namespace audio_tools

View File

@@ -0,0 +1,150 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "MP3EncoderLAME.h"
namespace audio_tools {
/**
* @brief LAME parameters
* @author Phil Schatzmann
* @copyright GPLv3
*/
struct AudioInfoLAME : public liblame::AudioInfo {
AudioInfoLAME () {
sample_rate = 44100;
channels = 2;
bits_per_sample = 16;
};
AudioInfoLAME (const AudioInfoLAME &) = default;
int quality = 7; // 0..9. 0=best (very slow). 9=worst.
};
/**
* @brief Encodes PCM data to the MP3 format and writes the result to a stream
* This is basically just a wrapper using https://github.com/pschatzmann/arduino-liblame
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MP3EncoderLAME : public AudioEncoder {
public:
MP3EncoderLAME(){
TRACED();
}
MP3EncoderLAME(Print &out_stream){
TRACED();
p_print = &out_stream;
}
~MP3EncoderLAME(){
TRACED();
end();
}
/// Defines the output stream
void setOutput(Print &out_stream){
TRACED();
p_print = &out_stream;
if (enc!=nullptr){
enc->setOutput(out_stream);
}
}
/// Defines the Audio Info
void setAudioInfo(AudioInfo from) {
TRACED();
AudioEncoder::setAudioInfo(from);
lame_info.channels = from.channels;
lame_info.sample_rate = from.sample_rate;
lame_info.bits_per_sample = from.bits_per_sample;
}
/// Defines the Audio Info
void setAudioInfo(AudioInfoLAME from) {
TRACED();
lame_info = from;
}
bool begin(AudioInfoLAME from) {
setAudioInfo(from);
return begin();
}
// starts the processing
bool begin() {
createEnc();
if (enc==nullptr) return false;
enc->begin();
return true;
}
AudioInfoLAME &audioInfoExt(){
return lame_info;
}
AudioInfoLAME defaultConfig(){
AudioInfoLAME def;
return def;
}
// convert PCM data to convert into MP3
size_t write(const uint8_t *data, size_t len){
if (enc==nullptr) return 0;
LOGD("write %d bytes", (int) len);
return enc->write((uint8_t*)data, len);
}
// release resources
void end(){
TRACED();
if (enc!=nullptr){
enc->end();
delete enc;
enc = nullptr;
}
}
liblame::MP3EncoderLAME *driver() {
return enc;
}
const char *mime() {
return "audio/mp3";
}
virtual operator bool() {
return enc!=nullptr && (bool)(*enc);
}
protected:
liblame::MP3EncoderLAME *enc=nullptr;
AudioInfoLAME lame_info;
Print *p_print=nullptr;
// Create enc only at begin so that we can use psram
void createEnc(){
TRACED();
if (enc==nullptr){
enc = new liblame::MP3EncoderLAME();
if (p_print!=nullptr){
setOutput(*p_print);
} else {
LOGE("Output undefined");
}
LOGI("LibLAME channels: %d", lame_info.channels);
LOGI("LibLAME sample_rate: %d", lame_info.sample_rate);
LOGI("LibLAME bits_per_sample: %d", lame_info.bits_per_sample);
LOGI("LibLAME quality: %d", lame_info.quality);
enc->setAudioInfo(lame_info);
}
}
};
}

View File

@@ -0,0 +1,140 @@
#pragma once
#define MINIMP3_IMPLEMENTATION
#define MINIMP3_NO_STDIO
#define LOGGING_ACTIVE true
#include "Stream.h"
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "MP3DecoderMAD.h"
namespace audio_tools {
// forward audio changes
static AudioInfoSupport *audioChangeMAD=nullptr;
/**
* @brief MP3 Decoder using https://github.com/pschatzmann/arduino-libmad
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MP3DecoderMAD : public AudioDecoder {
public:
MP3DecoderMAD(){
TRACED();
mad = new libmad::MP3DecoderMAD();
}
MP3DecoderMAD(libmad::MP3DataCallback dataCallback, libmad::MP3InfoCallback infoCB=nullptr){
TRACED();
mad = new libmad::MP3DecoderMAD(dataCallback, infoCB);
}
MP3DecoderMAD(Print &mad_output_streamput, libmad::MP3InfoCallback infoCB = nullptr){
TRACED();
mad = new libmad::MP3DecoderMAD(mad_output_streamput, infoCB);
}
~MP3DecoderMAD(){
TRACED();
delete mad;
}
void setOutput(Print &out) override {
TRACED();
mad->setOutput(out);
}
/// Defines the callback which receives the decoded data
void setAudioDataCallback(libmad::MP3DataCallback cb){
TRACED();
mad->setDataCallback(cb);
}
/// Defines the callback which receives the Info changes
void setInfoCallback(libmad::MP3InfoCallback cb){
TRACED();
mad->setInfoCallback(cb);
}
/// Starts the processing
bool begin() override {
TRACED();
mad->begin();
return true;
}
/// Releases the reserved memory
void end() override{
TRACED();
mad->end();
}
/// Provides the last valid audio information
libmad::MadAudioInfo audioInfoEx(){
TRACED();
return mad->audioInfo();
}
AudioInfo audioInfo() override {
TRACED();
libmad::MadAudioInfo info = audioInfoEx();
AudioInfo base;
base.channels = info.channels;
base.sample_rate = info.sample_rate;
base.bits_per_sample = info.bits_per_sample;
return base;
}
/// Makes the mp3 data available for decoding: however we recommend to provide the data via a callback or input stream
size_t write(const uint8_t *data, size_t len) override {
TRACED();
return mad->write(data,len);
}
/// Makes the mp3 data available for decoding: however we recommend to provide the data via a callback or input stream
size_t write(void *data, size_t len){
TRACED();
return mad->write(data,len);
}
/// Returns true as long as we are processing data
operator bool() override{
return (bool)*mad;
}
libmad::MP3DecoderMAD *driver() {
return mad;
}
static void audioChangeCallback(libmad::MadAudioInfo &info){
if (audioChangeMAD!=nullptr){
TRACED();
AudioInfo base;
base.channels = info.channels;
base.sample_rate = info.sample_rate;
base.bits_per_sample = info.bits_per_sample;
// notify audio change
audioChangeMAD->setAudioInfo(base);
}
}
virtual void addNotifyAudioChange(AudioInfoSupport &bi) override {
TRACED();
audioChangeMAD = &bi;
// register audio change handler
mad->setInfoCallback(audioChangeCallback);
}
protected:
libmad::MP3DecoderMAD *mad;
};
} // namespace

View File

@@ -0,0 +1,177 @@
#pragma once
#define MINIMP3_NO_STDIO
//#define MINIMP3_NO_SIMD
//#define MINIMP3_IMPLEMENTATION
//#define MINIMP3_ONLY_MP3
//#define MINIMP3_FLOAT_OUTPUT
#ifndef MINIMP3_MAX_SAMPLE_RATE
#define MINIMP3_MAX_SAMPLE_RATE 44100
#endif
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "minimp3.h"
namespace audio_tools {
/**
* @brief MP3 Decoder using https://github.com/pschatzmann/minimp3.
* This decoder does not provide any good results and it is not suited to decode any audio above 32000 on an ESP32. So the
* sample rate is limited by the MINIMP3_MAX_SAMPLE_RATE variable.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MP3DecoderMini : public AudioDecoder {
public:
MP3DecoderMini() = default;
/// Destroy the MP3DecoderMini object
~MP3DecoderMini() {
if (active) {
end();
}
}
void setBufferLength(int len) { buffer_size = len; }
/// Starts the processing
bool begin() {
TRACED();
//esp_task_wdt_delete(nullptr);
::mp3dec_init(&mp3d);
buffer.resize(buffer_size);
pcm.resize(MINIMP3_MAX_SAMPLES_PER_FRAME);
buffer_pos = 0;
active = true;
return true;
}
/// Releases the reserved memory
void end() {
TRACED();
flush();
active = false;
}
/// Defines the output Stream
void setOutput(Print &outStream) { this->out = &outStream; }
/// Write mp3 data to decoder
size_t write(const uint8_t *data, size_t len) {
LOGD("write: %zu", len);
if (active) {
if (buffer_pos+len>=buffer.size()){
decode(len);
}
assert(buffer_pos+len<buffer.size());
memcpy(buffer.data()+buffer_pos, data, len);
buffer_pos += len;
}
return len;
}
/// Decodes the last outstanding data
void flush() {
// decode the full buffer
decode(0);
buffer_pos = 0;
}
/// checks if the class is active
virtual operator bool() { return active; }
void setSampleRateLimit(int limit){
sample_rate_limit = limit;
}
protected:
Print *out = nullptr;
mp3dec_t mp3d;
mp3dec_frame_info_t mp3dec_info;
size_t buffer_size = 5 * 1024;
size_t buffer_pos = 0;
Vector<uint8_t> buffer;
Vector<mp3d_sample_t> pcm;
#ifdef MINIMP3_FLOAT_OUTPUT
Vector<int16_t> pcm16;
#endif
bool active;
int sample_rate_limit = MINIMP3_MAX_SAMPLE_RATE; //32000;
/// Process single bytes so that we can decode a full frame when it is available
void decode(int write_len) {
LOGD("decode: %zd ", buffer_pos);
int open = buffer_pos;
int processed = 0;
int samples;
do {
// decode data
samples = ::mp3dec_decode_frame(&mp3d, buffer.data()+processed, open,
pcm.data(), &mp3dec_info);
LOGD("frame_offset: %d - frame_bytes: %d -> samples %d", mp3dec_info.frame_offset, mp3dec_info.frame_bytes, samples);
open -= mp3dec_info.frame_bytes;
processed += mp3dec_info.frame_bytes;
// output decoding result
if (samples > 0) {
provideResult(samples);
}
// process until we have space for the next write
} while(processed < write_len);
// save unprocessed data
buffer_pos = open;
memmove(buffer.data(),buffer.data()+processed, open);
}
/// Provides Metadata and PCM data
void provideResult(int samples) {
LOGD("provideResult: %d samples", samples);
AudioInfo tmp;
tmp.sample_rate = mp3dec_info.hz>sample_rate_limit ? sample_rate_limit : mp3dec_info.hz;
tmp.channels = mp3dec_info.channels;
tmp.bits_per_sample = 16;
// notify about audio changes
if (tmp != info) {
tmp.logInfo();
notifyAudioChange(tmp);
}
// store last info so that we can detect any changes
info = info;
// provide result pwm data
if (out != nullptr) {
#ifdef MINIMP3_FLOAT_OUTPUT
pcm16.resize(samples);
f32_to_s16(pcm.data(), pcm16.data(), samples);
out->write((uint8_t *)pcm16.data(), samples * sizeof(int16_t));
#else
out->write((uint8_t *)pcm.data(), samples * sizeof(mp3d_sample_t));
#endif
}
}
void f32_to_s16(float *in, int16_t *out, int num_samples) {
int i = 0;
for(; i < num_samples; i++){
float sample = in[i] * 32768.0f;
if (sample >= 32766.5f)
out[i] = (int16_t) 32767;
else if (sample <= -32767.5f)
out[i] = (int16_t)-32768;
else {
int16_t s = (int16_t)(sample + .5f);
s -= (s < 0); /* away from zero, to be compliant */
out[i] = s;
}
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,492 @@
#pragma once
#define TS_PACKET_SIZE 188
#ifndef MTS_WRITE_BUFFER_SIZE
#define MTS_WRITE_BUFFER_SIZE 2000
#endif
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/AudioTypes.h"
#include "AudioToolsConfig.h"
#include "stdlib.h"
namespace audio_tools {
/**
* @brief PMT Program Element Stream Types
* @ingroup basic
*/
enum class MTSStreamType {
VIDEO = 0x01,
VIDEO_H262 = 0x02,
AUDIO_MP3 = 0x03,
AUDIO_MP3_LOW_BITRATE = 0x04,
PRV_SECTIONS = 0x05,
PES_PRV = 0x06,
MHEG = 0x07,
H222_0_DSM_CC = 0x08,
H222_1 = 0x09,
A = 0x0A,
B = 0x0B,
C = 0x0C,
D = 0x0D,
H222_0_AUX = 0x0E,
AUDIO_AAC = 0x0F,
VISUAL = 0x10,
AUDIO_AAC_LATM = 0x11,
SL_PES = 0x12,
SL_SECTIONS = 0x13,
SYNC_DOWNLOAD = 0x14,
PES_METADATA = 0x15,
METDATA_SECTIONS = 0x16,
METADATA_DATA_CAROUSEL = 0x17,
METADATA_OBJ_CAROUSEL = 0x18,
METADATA_SYNC_DOWNLOAD = 0x19,
IPMP = 0x1A,
VIDEO_AVC = 0X1B,
VIDEO_H222_0 = 0x1C,
DCII_VIDEO = 0x80,
AUDIO_A53 = 0x81,
SCTE_STD_SUBTITLE = 0x82,
SCTE_ISOCH_DATA = 0x83,
ATSC_PROG_ID = 0x85,
SCTE_25 = 0x86,
AUDIO_EAC3 = 0x87,
AUDIO_DTS_HD = 0x88,
DVB_MPE_FEC = 0x90,
ULE = 0x91,
VEI = 0x92,
ATSC_DATA_SERVICE_TABLE = 0x95,
SCTE_IP_DATA = 0xA0,
DCII_TEXT = 0xC0,
ATSC_SYNC_DATA = 0xC2,
SCTE_AYSNC_DATA = 0xC3,
ATSC_USER_PRIV_PROG_ELEMENTS = 0xC4,
VC1 = 0xEA,
ATSC_USER_PRIV = 0xEB,
};
// enum class AACProfile : uint8_t {
// MAIN = 0, // AAC Main (High complexity, rarely used)
// LC = 1, // AAC Low Complexity (Most common)
// SSR = 2, // AAC Scalable Sample Rate (Rare)
// LTP = 3 // AAC Long Term Prediction (Not widely supported)
// };
/**
* @brief MPEG-TS (MTS) decoder. Extracts (demuxes) the indicated audio/video
* data from a MPEG-TS (MTS) data stream. You can define the relevant stream
* types via the API: addStreamType(MTSStreamType). By default, the
* decoder selects the AUDIO_AAC, AUDIO_AAC_LATM stream types.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
**/
class MTSDecoder : public AudioDecoder {
public:
/// Default constructor
MTSDecoder() = default;
/// Provide the AAC decoder (or MP3 Decoder) to receive the extracted content
MTSDecoder(AudioDecoder &dec) { p_dec = &dec; };
/// Start the prcessor
bool begin() override {
TRACED();
pmt_pid = 0xFFFF; // undefined
pes_count = 0;
is_adts_missing = false;
open_pes_data_size = 0;
frame_length = 0;
// default supported stream types
if (stream_types.empty()) {
addStreamType(MTSStreamType::AUDIO_AAC);
addStreamType(MTSStreamType::AUDIO_AAC_LATM);
}
// automatically close when called multiple times
if (is_active) {
end();
}
if (p_dec) p_dec->begin();
is_active = true;
return true;
}
/// Stops the processing
void end() override {
TRACED();
if (p_dec) p_dec->end();
is_active = false;
}
virtual operator bool() override { return is_active; }
/// Provides the mime type: "video/MP2T";
const char *mime() { return "video/MP2T"; }
size_t write(const uint8_t *data, size_t len) override {
// only process when open
if (!is_active) {
TRACEE();
return 0;
}
// wait until we have enough data
if (buffer.availableForWrite() < len) {
LOGI("MTSDecoder::write: Buffer full");
demux();
return 0;
}
LOGI("MTSDecoder::write: %d", (int)len);
size_t result = buffer.writeArray((uint8_t *)data, len);
demux();
return result;
}
/// Set a new write buffer size (default is 2000)
void resizeBuffer(int size) { buffer.resize(size); }
/// Clears the stream type filter
void clearStreamTypes() {
TRACED();
stream_types.clear();
}
/// Defines the stream type that should be extracted
void addStreamType(MTSStreamType type) {
TRACED();
stream_types.push_back(type);
}
/// Checks if the stream type is active
bool isStreamTypeActive(MTSStreamType type) {
for (int j = 0; j < stream_types.size(); j++) {
if (stream_types[j] == type) return true;
}
return false;
}
/// Defines where the decoded result is written to
void setOutput(AudioStream &out_stream) override {
if (p_dec) {
p_dec->setOutput(out_stream);
} else {
AudioDecoder::setOutput(out_stream);
}
}
/// Defines where the decoded result is written to
void setOutput(AudioOutput &out_stream) override {
if (p_dec) {
p_dec->setOutput(out_stream);
} else {
AudioDecoder::setOutput(out_stream);
}
}
/// Defines where the decoded result is written to
void setOutput(Print &out_stream) override {
if (p_dec) {
p_dec->setOutput(out_stream);
} else {
AudioDecoder::setOutput(out_stream);
}
}
protected:
bool is_active = false;
SingleBuffer<uint8_t> buffer{MTS_WRITE_BUFFER_SIZE};
Vector<MTSStreamType> stream_types;
Vector<int> pids{0};
AudioDecoder *p_dec = nullptr;
uint16_t pmt_pid = 0xFFFF;
// AACProfile aac_profile = AACProfile::LC;
MTSStreamType selected_stream_type;
int open_pes_data_size = 0;
int frame_length = 0;
bool is_adts_missing = false;
size_t pes_count = 0;
/// Add the PID for which we want to extract the audio data from the PES
/// packets
void addPID(uint16_t pid) {
if (pid == 0) return;
for (int j = 0; j < pids.size(); j++) {
if (pids[j] == pid) return;
}
LOGI("-> PMT PID: 0x%04X(%d)", pid, pid);
pids.push_back(pid);
}
/// demux the available data
void demux() {
TRACED();
int count = 0;
while (parse()) {
LOGI("demux: step #%d with PES #%d", ++count, (int)pes_count);
}
LOGI("Number of demux calls: %d", count);
}
/// Find the position of the next sync byte: Usually on position 0
int syncPos() {
int len = buffer.available();
if (len < TS_PACKET_SIZE) return -1;
for (int j = 0; j < len; j++) {
if (buffer.data()[j] == 0x47) {
return j;
}
}
return -1;
}
/// Parse a single packet and remove the processed data
bool parse() {
int pos = syncPos();
if (pos < 0) return false;
if (pos != 0) {
LOGW("Sync byte not found at position 0. Skipping %d bytes", pos);
buffer.clearArray(pos);
}
// parse data
uint8_t *packet = buffer.data();
int pid = ((packet[1] & 0x1F) << 8) | (packet[2] & 0xFF);
LOGI("PID: 0x%04X(%d)", pid, pid);
// PES contains the audio data
if (!is_adts_missing && pids.contains(pid)) {
parsePES(packet, pid);
} else {
parsePacket(packet, pid);
}
// remove processed data
buffer.clearArray(TS_PACKET_SIZE);
return true;
}
/// Detailed processing for parsing a single packet
void parsePacket(uint8_t *packet, int pid) {
TRACEI();
bool payloadUnitStartIndicator = false;
int payloadStart =
getPayloadStart(packet, false, payloadUnitStartIndicator);
int len = TS_PACKET_SIZE - payloadStart;
// if we are at the beginning we start with a pat
if (pid == 0 && payloadUnitStartIndicator) {
pids.clear();
}
// PID 0 is for PAT
if (pid == 0) {
parsePAT(&packet[payloadStart], len);
} else if (pid == pmt_pid && packet[payloadStart] == 0x02) {
parsePMT(&packet[payloadStart], len);
} else {
LOGE("-> Packet ignored for PID 0x%x", pid);
}
}
int getPayloadStart(uint8_t *packet, bool isPES,
bool &payloadUnitStartIndicator) {
uint8_t adaptionField = (packet[3] & 0x30) >> 4;
int adaptationSize = 0;
int offset = 4; // Start after TS header (4 bytes)
// Check for adaptation field
// 00 (0) → Invalid (should never happen).
// 01 (1) → Payload only (no adaptation field).
// 10 (2) → Adaptation field only (no payload).
// 11 (3) → Adaptation field + payload.
if (adaptionField == 0b11) { // Adaptation field exists
adaptationSize = packet[4] + 1;
offset += adaptationSize;
}
// If PUSI is set, there's a pointer field (skip it)
if (packet[1] & 0x40) {
if (!isPES) offset += packet[offset] + 1;
payloadUnitStartIndicator = true;
}
LOGI("Payload Unit Start Indicator (PUSI): %d", payloadUnitStartIndicator);
LOGI("Adaption Field Control: 0x%x / size: %d", adaptionField,
adaptationSize);
return offset;
}
void parsePAT(uint8_t *pat, int len) {
TRACEI();
assert(pat[0] == 0); // Program Association section
int startOfProgramNums = 8;
int lengthOfPATValue = 4;
int sectionLength = ((pat[1] & 0x0F) << 8) | (pat[2] & 0xFF);
LOGI("PAT Section Length: %d", sectionLength);
if (sectionLength >= len) {
LOGE("Unexpected PAT Section Length: %d", sectionLength);
sectionLength = len;
}
int indexOfPids = 0;
for (int i = startOfProgramNums; i <= sectionLength;
i += lengthOfPATValue) {
int program_number = ((pat[i] & 0xFF) << 8) | (pat[i + 1] & 0xFF);
int pid = ((pat[i + 2] & 0x1F) << 8) | (pat[i + 3] & 0xFF);
LOGI("Program Num: 0x%04X(%d) / PID: 0x%04X(%d) ", program_number,
program_number, pid, pid);
if (pmt_pid == 0xFFFF && pid >= 0x0020 && pid <= 0x1FFE) {
pmt_pid = pid;
}
}
LOGI("Using PMT PID: 0x%04X(%d)", pmt_pid, pmt_pid);
}
void parsePMT(uint8_t *pmt, int len) {
TRACEI();
assert(pmt[0] == 0x02); // Program Association section
int staticLengthOfPMT = 12;
int sectionLength = ((pmt[1] & 0x0F) << 8) | (pmt[2] & 0xFF);
LOGI("- PMT Section Length: %d", sectionLength);
int programInfoLength = ((pmt[10] & 0x0F) << 8) | (pmt[11] & 0xFF);
LOGI("- PMT Program Info Length: %d", programInfoLength);
int cursor = staticLengthOfPMT + programInfoLength;
while (cursor < sectionLength - 1) {
MTSStreamType streamType = static_cast<MTSStreamType>(pmt[cursor] & 0xFF);
int elementaryPID =
((pmt[cursor + 1] & 0x1F) << 8) | (pmt[cursor + 2] & 0xFF);
LOGI("-- Stream Type: 0x%02X(%d) [%s] for Elementary PID: 0x%04X(%d)",
(int)streamType, (int)streamType, toStr(streamType), elementaryPID,
elementaryPID);
if (isStreamTypeActive(streamType)) {
selected_stream_type = streamType;
addPID(elementaryPID);
}
int esInfoLength =
((pmt[cursor + 3] & 0x0F) << 8) | (pmt[cursor + 4] & 0xFF);
LOGI("-- ES Info Length: 0x%04X(%d)", esInfoLength, esInfoLength);
cursor += 5 + esInfoLength;
}
}
void parsePES(uint8_t *packet, int pid) {
LOGI("parsePES: %d", pid);
++pes_count;
// calculate payload start
bool payloadUnitStartIndicator = false;
int payloadStart = getPayloadStart(packet, true, payloadUnitStartIndicator);
// PES
uint8_t *pes = packet + payloadStart;
int len = TS_PACKET_SIZE - payloadStart;
// PES (AAC) data
uint8_t *pesData = nullptr;
int pesDataSize = 0;
if (payloadUnitStartIndicator) {
assert(len >= 6);
// PES header is not alligned correctly
if (!isPESStartCodeValid(pes)) {
LOGE("PES header not aligned correctly");
return;
}
int pesPacketLength =
(static_cast<int>(pes[4]) << 8) | static_cast<int>(pes[5]);
// PES Header size is at least 6 bytes, but can be larger with optional
// fields
int pesHeaderSize = 6;
if ((pes[6] & 0xC0) != 0) { // Check for PTS/DTS flags
pesHeaderSize += 3 + ((pes[7] & 0xC0) == 0xC0 ? 5 : 0);
pesHeaderSize += pes[8]; // PES header stuffing size
}
LOGI("- PES Header Size: %d", pesHeaderSize);
pesData = pes + pesHeaderSize;
pesDataSize = len - pesHeaderSize;
assert(pesHeaderSize < len);
assert(pesDataSize > 0);
/// Check for ADTS
if (pes_count == 1 && selected_stream_type == MTSStreamType::AUDIO_AAC) {
is_adts_missing = findSyncWord(pesData, pesDataSize) == -1;
}
open_pes_data_size = pesPacketLength;
} else {
pesData = pes;
pesDataSize = len;
}
// Recalculate the open data
open_pes_data_size -= pesDataSize;
if (open_pes_data_size < 0) {
return;
}
/// Write the data
LOGI("- writing %d bytes (open: %d)", pesDataSize, open_pes_data_size);
if (p_print) {
size_t result = writeData<uint8_t>(p_print, pesData, pesDataSize);
assert(result == pesDataSize);
}
if (p_dec) {
size_t result =
writeDataT<uint8_t, AudioDecoder>(p_dec, pesData, pesDataSize);
assert(result == pesDataSize);
}
}
/// check for PES packet start code prefix
bool isPESStartCodeValid(uint8_t *pes) {
if (pes[0] != 0) return false;
if (pes[1] != 0) return false;
if (pes[2] != 0x1) return false;
return true;
}
/// Convert the relevant MTSStreamType to a string
const char *toStr(MTSStreamType type) {
switch (type) {
case MTSStreamType::AUDIO_MP3:
return "AUDIO_MP3";
case MTSStreamType::AUDIO_MP3_LOW_BITRATE:
return "AUDIO_MP3_LOW_BITRATE";
case MTSStreamType::AUDIO_AAC:
return "AUDIO_AAC";
case MTSStreamType::AUDIO_AAC_LATM:
return "AUDIO_AAC_LATM";
default:
return "UNKNOWN";
}
}
/// Finds the mp3/aac sync word
int findSyncWord(const uint8_t *buf, size_t nBytes, uint8_t synch = 0xFF,
uint8_t syncl = 0xF0) {
for (int i = 0; i < nBytes - 1; i++) {
if ((buf[i + 0] & synch) == synch && (buf[i + 1] & syncl) == syncl)
return i;
}
return -1;
}
};
/// @brief Legacy alias for MPEG Transport Stream decoder
/// @ingroup codecs
using MPEG_TSDecoder = MTSDecoder;
} // namespace audio_tools

View File

@@ -0,0 +1,138 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/AudioBasic/Net.h"
#if defined(ARDUINO) && !defined(IS_MIN_DESKTOP)
#include "Print.h"
#endif
namespace audio_tools {
/**
* @brief PCM decoder which converts from network format to the host format.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderNetworkFormat : public AudioDecoder {
public:
DecoderNetworkFormat() = default;
DecoderNetworkFormat(Print &out_stream) {
TRACED();
pt_print = &out_stream;
}
DecoderNetworkFormat(Print &out_stream, AudioInfoSupport &bi) {
pt_print = &out_stream;
}
~DecoderNetworkFormat() {}
virtual void setOutput(Print &out_stream) { pt_print = &out_stream; }
bool begin() { return true; }
void end() {}
size_t write(const uint8_t *data, size_t len) {
TRACED();
switch (audioInfo().bits_per_sample) {
case 8:
// nothing to do
break;
case 16: {
int16_t *data16 = (int16_t *)data;
for (int i = 0; i < len / sizeof(int16_t); i++) {
data16[i] = ntohs(data16[i]);
}
} break;
case 24:
case 32: {
int32_t *data32 = (int32_t *)data;
for (int i = 0; i < len / sizeof(int32_t); i++) {
data32[i] = ntohl(data32[i]);
}
} break;
default:
LOGE("bits_per_sample not supported: %d",
(int)audioInfo().bits_per_sample);
break;
}
return pt_print->write((uint8_t *)data, len);
}
operator bool() { return true; }
/// The result is encoded data - by default this is false
virtual bool isResultPCM() { return true; }
protected:
Print *pt_print = nullptr;
};
/**
* @brief Encoder which converts from the host format to the network format.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class EncoderNetworkFormat : public AudioEncoder {
public:
EncoderNetworkFormat() { TRACED(); }
EncoderNetworkFormat(Print &out_stream) {
TRACED();
pt_print = &out_stream;
}
EncoderNetworkFormat(Print &out_stream, AudioInfoSupport &bi) {
pt_print = &out_stream;
}
~EncoderNetworkFormat() {}
virtual void setOutput(Print &out_stream) { pt_print = &out_stream; }
bool begin() { return true; }
void end() {}
size_t write(const uint8_t *data, size_t len) {
TRACED();
switch (audioInfo().bits_per_sample) {
case 8:
// nothing to do
break;
case 16: {
int16_t *data16 = (int16_t *)data;
for (int i = 0; i < len / sizeof(int16_t); i++) {
data16[i] = htons(data16[i]);
}
} break;
case 24:
case 32: {
int32_t *data32 = (int32_t *)data;
for (int i = 0; i < len / sizeof(int32_t); i++) {
data32[i] = htonl(data32[i]);
}
} break;
default:
LOGE("bits_per_sample not supported: %d",
(int)audioInfo().bits_per_sample);
break;
}
return pt_print->write((uint8_t *)data, len);
}
operator bool() { return true; }
const char *mime() { return "audio/pcm"; }
protected:
Print *pt_print = nullptr;
};
} // namespace audio_tools

View File

@@ -0,0 +1,478 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "Print.h"
#include "opus.h"
#ifndef OPUS_ENC_MAX_BUFFER_SIZE
#define OPUS_ENC_MAX_BUFFER_SIZE 2048
#endif
#ifndef OPUS_DEC_MAX_BUFFER_SIZE
#define OPUS_DEC_MAX_BUFFER_SIZE 4 * 1024
#endif
namespace audio_tools {
/**
* @brief Setting for Opus Decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
struct OpusSettings : public AudioInfo {
OpusSettings() {
/// 8000,12000,16000 ,24000,48000
sample_rate = 48000;
/// 1 or 2
channels = 2;
/// must be 16!
bits_per_sample = 16;
}
int max_buffer_size = OPUS_DEC_MAX_BUFFER_SIZE;
int max_buffer_write_size = 512;
};
/**
* @brief Setting for Opus Encoder where the following values are valid:
* -1 indicates that the default value should be used and that this codec is not
setting the value.
*
int channels[2] = {1, 2};<br>
int applications[3] = {OPUS_APPLICATION_AUDIO, OPUS_APPLICATION_VOIP,
OPUS_APPLICATION_RESTRICTED_LOWDELAY};<br>
int sample_rates[] = {8000,12000,16000 ,24000,48000}<br>
int bitrates[11] = {6000, 12000, 16000, 24000, 32000, 48000,
64000, 96000, 510000, OPUS_AUTO, OPUS_BITRATE_MAX};<br>
int force_channels[4] = {OPUS_AUTO, OPUS_AUTO, 1, 2};<br>
int use_vbr[3] = {0, 1, 1};<br>
int vbr_constraints[3] = {0, 1, 1};<br>
int complexities[11] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};<br>
int max_bandwidths[6] = {
OPUS_BANDWIDTH_NARROWBAND, OPUS_BANDWIDTH_MEDIUMBAND,
OPUS_BANDWIDTH_WIDEBAND, OPUS_BANDWIDTH_SUPERWIDEBAND,
OPUS_BANDWIDTH_FULLBAND, OPUS_BANDWIDTH_FULLBAND};<br>
int signals[4] = {OPUS_AUTO, OPUS_AUTO, OPUS_SIGNAL_VOICE,
OPUS_SIGNAL_MUSIC};<br> int inband_fecs[3] = {0, 0, 1};<br> int
packet_loss_perc[4] = {0, 1, 2, 5};<br> int lsb_depths[2] = {8, 24};<br> int
prediction_disabled[3] = {0, 0, 1};<br> int use_dtx[2] = {0, 1};<br> int
frame_sizes_ms_x2[9] =
{OPUS_FRAMESIZE_2_5_MS,OPUS_FRAMESIZE_5_MS,OPUS_FRAMESIZE_10_MS,OPUS_FRAMESIZE_20_MS,OPUS_FRAMESIZE_40_MS,OPUS_FRAMESIZE_60_MS,OPUS_FRAMESIZE_80_MS,OPUS_FRAMESIZE_100_MS,OPUS_FRAMESIZE_120_MS}
x2 to avoid 2.5 ms <br>
* @author Phil Schatzmann
* @copyright GPLv3
**/
struct OpusEncoderSettings : public OpusSettings {
OpusEncoderSettings() : OpusSettings() {
/// Default is 5760
max_buffer_size = OPUS_ENC_MAX_BUFFER_SIZE;
}
/// OPUS_APPLICATION_AUDIO, OPUS_APPLICATION_VOIP,
/// OPUS_APPLICATION_RESTRICTED_LOWDELAY
int application = OPUS_APPLICATION_AUDIO;
/// 6000, 12000, 16000, 24000, 32000, 48000, 64000, 96000, 510000,
/// OPUS_AUTO, OPUS_BITRATE_MAX
int bitrate = -1;
/// OPUS_AUTO, OPUS_AUTO, 1, 2
int force_channel = -1;
/// 0, 1
int vbr = -1;
/// 0, 1
int vbr_constraint = -1;
/// 0 to 10
int complexity = -1;
/// OPUS_BANDWIDTH_NARROWBAND,
/// OPUS_BANDWIDTH_MEDIUMBAND,OPUS_BANDWIDTH_WIDEBAND,
/// OPUS_BANDWIDTH_SUPERWIDEBAND, OPUS_BANDWIDTH_FULLBAND,
/// OPUS_BANDWIDTH_FULLBAND
int max_bandwidth = -1;
/// OPUS_AUTO, OPUS_SIGNAL_VOICE, OPUS_SIGNAL_MUSIC
int signal = -1;
/// 0, 1
int inband_fec = -1;
/// 0, 1, 2, 5
int packet_loss_perc = -1;
/// 8, 24
int lsb_depth = -1;
/// 0, 1
int prediction_disabled = -1;
/// 0, 1
int use_dtx = -1;
/// OPUS_FRAMESIZE_2_5_MS,OPUS_FRAMESIZE_5_MS,OPUS_FRAMESIZE_10_MS,OPUS_FRAMESIZE_20_MS,OPUS_FRAMESIZE_40_MS,OPUS_FRAMESIZE_60_MS,OPUS_FRAMESIZE_80_MS,OPUS_FRAMESIZE_100_MS,OPUS_FRAMESIZE_120_MS
int frame_sizes_ms_x2 = -1; /* x2 to avoid 2.5 ms */
};
/**
* @brief Decoder for the Opus audio format.
* Each Opus frame must be provided with one write() call. Therefore, Opus
* is usually encapsulated in a container format (e.g., Ogg) that splits
* the stream into frames.
*
* Depends on https://github.com/pschatzmann/arduino-libopus.git
*
* @author Phil Schatzmann
* @ingroup codecs
* @ingroup decoder
* @copyright GPLv3
*/
class OpusAudioDecoder : public AudioDecoder {
public:
/**
* @brief Construct a new OpusDecoder object
*/
OpusAudioDecoder(bool releaseOnEnd = false) : release_on_end(releaseOnEnd) {}
/**
* @brief Construct a new OpusDecoder object
*
* @param out_stream Output Stream to which we write the decoded result
*/
OpusAudioDecoder(Print &out_stream) {
TRACED();
setOutput(out_stream);
}
/// Defines the output Stream
void setOutput(Print &out_stream) override { p_print = &out_stream; }
AudioInfo audioInfo() override { return cfg; }
/// Provides access to the configuration
OpusSettings &config() { return cfg; }
OpusSettings &defaultConfig() { return cfg; }
bool begin(OpusSettings settings) {
TRACED();
AudioDecoder::setAudioInfo(settings);
cfg = settings;
notifyAudioChange(cfg);
return begin();
}
bool begin() override {
TRACED();
if (!isValidRate(cfg.sample_rate)) {
LOGE("Sample rate not supported: %d", cfg.sample_rate);
return false;
}
outbuf.resize(cfg.max_buffer_size);
assert(outbuf.data() != nullptr);
// allocate decoder
size_t size = opus_decoder_get_size(cfg.channels);
decbuf.resize(size);
assert(decbuf.data() != nullptr);
dec = (OpusDecoder *)decbuf.data();
int err = opus_decoder_init(dec, cfg.sample_rate, cfg.channels);
if (err != OPUS_OK) {
LOGE("opus_decoder_create: %s for sample_rate: %d, channels:%d",
opus_strerror(err), cfg.sample_rate, cfg.channels);
return false;
}
active = true;
return true;
}
void end() override {
TRACED();
dec = nullptr;
if (release_on_end) {
outbuf.resize(0);
decbuf.resize(0);
}
active = false;
}
void setAudioInfo(AudioInfo from) override {
AudioDecoder::setAudioInfo(from);
info = from;
cfg.sample_rate = from.sample_rate;
cfg.channels = from.channels;
cfg.bits_per_sample = from.bits_per_sample;
}
/// write one full opus frame
size_t write(const uint8_t *data, size_t len) override {
if (!active || p_print == nullptr) return 0;
// decode data
LOGD("OpusAudioDecoder::write: %d", (int)len);
int in_band_forward_error_correction = 0;
int frame_count = cfg.max_buffer_size / cfg.channels / sizeof(opus_int16);
int out_samples =
opus_decode(dec, (uint8_t *)data, len, (opus_int16 *)outbuf.data(),
frame_count, in_band_forward_error_correction);
if (out_samples < 0) {
LOGW("opus-decode: %s", opus_strerror(out_samples));
} else if (out_samples > 0) {
// write data to final destination
int out_bytes = out_samples * cfg.channels * sizeof(int16_t);
LOGD("opus-decode: %d", out_bytes);
int open = out_bytes;
int processed = 0;
while (open > 0) {
int to_write = std::min(open, cfg.max_buffer_write_size);
int written = p_print->write(outbuf.data() + processed, to_write);
open -= written;
processed += written;
}
}
return len;
}
operator bool() override { return active; }
/// Defines if the resources should be released when the stream is closed
/// (default: false)
void setReleaseOnEnd(bool flag) { release_on_end = flag; }
protected:
Print *p_print = nullptr;
OpusDecoder *dec = nullptr;
OpusSettings cfg;
bool active = false;
Vector<uint8_t> outbuf{0};
Vector<uint8_t> decbuf{0};
const uint32_t valid_rates[5] = {8000, 12000, 16000, 24000, 48000};
bool release_on_end = false;
bool isValidRate(int rate) {
for (auto &valid : valid_rates) {
if (valid == rate) return true;
}
return false;
}
};
/**
* @brief Encode for Opus audio.
*
* Depends on https://github.com/pschatzmann/arduino-libopus.git
* Please note that each fully encoded frame is written to the output stream.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OpusAudioEncoder : public AudioEncoder {
public:
// Empty Constructor - the output stream must be provided with begin()
OpusAudioEncoder() = default;
// Constructor providing the output stream
OpusAudioEncoder(Print &out) { setOutput(out); }
/// Defines the output Stream
void setOutput(Print &out_stream) override { p_print = &out_stream; }
/// Provides "audio/pcm"
const char *mime() override { return "audio/opus"; }
/// We actually do nothing with this
void setAudioInfo(AudioInfo from) override {
AudioEncoder::setAudioInfo(from);
cfg.sample_rate = from.sample_rate;
cfg.channels = from.channels;
cfg.bits_per_sample = from.bits_per_sample;
}
/// starts the processing using the actual OpusAudioInfo
bool begin() override {
int err;
int size = getFrameSizeSamples(cfg.sample_rate) * 2;
frame.resize(size);
assert(frame.data() != nullptr);
enc = opus_encoder_create(cfg.sample_rate, cfg.channels, cfg.application,
&err);
if (err != OPUS_OK) {
LOGE("opus_encoder_create: %s for sample_rate: %d, channels:%d",
opus_strerror(err), cfg.sample_rate, cfg.channels);
return false;
}
is_open = settings();
return true;
}
/// Provides access to the configuration
OpusEncoderSettings &config() { return cfg; }
OpusEncoderSettings &defaultConfig() { return cfg; }
bool begin(OpusEncoderSettings settings) {
cfg = settings;
return begin();
}
/// stops the processing
void end() override {
// flush buffered data
encodeFrame();
// release memory
opus_encoder_destroy(enc);
is_open = false;
}
/// Writes PCM data to be encoded as Opus
size_t write(const uint8_t *data, size_t len) override {
if (!is_open || p_print == nullptr) return 0;
LOGD("OpusAudioEncoder::write: %d", (int)len);
// fill frame
for (int j = 0; j < len; j++) {
encodeByte(data[j]);
}
return len;
}
operator bool() override { return is_open; }
bool isOpen() { return is_open; }
protected:
Print *p_print = nullptr;
OpusEncoder *enc = nullptr;
OpusEncoderSettings cfg;
bool is_open = false;
Vector<uint8_t> frame{0};
int frame_pos = 0;
void encodeByte(uint8_t data) {
// add byte to frame
frame[frame_pos++] = data;
// if frame is complete -> encode
if (frame_pos >= frame.size()) {
encodeFrame();
frame_pos = 0;
}
}
void encodeFrame() {
if (frame.size() > 0) {
// allocate temp buffer on stack
int packet_len =
OPUS_ENC_MAX_BUFFER_SIZE > 0 ? OPUS_ENC_MAX_BUFFER_SIZE : 512;
uint8_t packet[packet_len];
int frames = frame.size() / cfg.channels / sizeof(int16_t);
LOGD("opus_encode - frame_size: %d", frames);
int len = opus_encode(enc, (opus_int16 *)frame.data(), frames, packet,
packet_len);
if (len < 0) {
LOGE("opus_encode: %s", opus_strerror(len));
} else if (len > 0) {
LOGD("opus-encode: %d", len);
int eff = p_print->write(packet, len);
if (eff != len) {
LOGE("encodeFrame data lost: %d->%d", len, eff);
}
}
}
}
/// Returns the frame size in samples
int getFrameSizeSamples(int sampling_rate) {
switch (cfg.frame_sizes_ms_x2) {
case OPUS_FRAMESIZE_2_5_MS:
return sampling_rate / 400;
case OPUS_FRAMESIZE_5_MS:
return sampling_rate / 200;
case OPUS_FRAMESIZE_10_MS:
return sampling_rate / 100;
case OPUS_FRAMESIZE_20_MS:
return sampling_rate / 50;
case OPUS_FRAMESIZE_40_MS:
return sampling_rate / 25;
case OPUS_FRAMESIZE_60_MS:
return 3 * sampling_rate / 50;
case OPUS_FRAMESIZE_80_MS:
return 4 * sampling_rate / 50;
case OPUS_FRAMESIZE_100_MS:
return 5 * sampling_rate / 50;
case OPUS_FRAMESIZE_120_MS:
return 6 * sampling_rate / 50;
}
return sampling_rate / 100;
}
bool settings() {
bool ok = true;
if (cfg.bitrate >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_BITRATE(cfg.bitrate)) != OPUS_OK) {
LOGE("invalid bitrate: %d", cfg.bitrate);
ok = false;
}
if (cfg.force_channel >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(cfg.force_channel)) !=
OPUS_OK) {
LOGE("invalid force_channel: %d", cfg.force_channel);
ok = false;
};
if (cfg.vbr >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_VBR(cfg.vbr)) != OPUS_OK) {
LOGE("invalid vbr: %d", cfg.vbr);
ok = false;
}
if (cfg.vbr_constraint >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_VBR_CONSTRAINT(cfg.vbr_constraint)) !=
OPUS_OK) {
LOGE("invalid vbr_constraint: %d", cfg.vbr_constraint);
ok = false;
}
if (cfg.complexity >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(cfg.complexity)) != OPUS_OK) {
LOGE("invalid complexity: %d", cfg.complexity);
ok = false;
}
if (cfg.max_bandwidth >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_MAX_BANDWIDTH(cfg.max_bandwidth)) !=
OPUS_OK) {
LOGE("invalid max_bandwidth: %d", cfg.max_bandwidth);
ok = false;
}
if (cfg.signal >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_SIGNAL(cfg.signal)) != OPUS_OK) {
LOGE("invalid signal: %d", cfg.signal);
ok = false;
}
if (cfg.inband_fec >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(cfg.inband_fec)) != OPUS_OK) {
LOGE("invalid inband_fec: %d", cfg.inband_fec);
ok = false;
}
if (cfg.packet_loss_perc >= 0 &&
opus_encoder_ctl(
enc, OPUS_SET_PACKET_LOSS_PERC(cfg.packet_loss_perc)) != OPUS_OK) {
LOGE("invalid pkt_loss: %d", cfg.packet_loss_perc);
ok = false;
}
if (cfg.lsb_depth >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(cfg.lsb_depth)) != OPUS_OK) {
LOGE("invalid lsb_depth: %d", cfg.lsb_depth);
ok = false;
}
if (cfg.prediction_disabled >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_PREDICTION_DISABLED(
cfg.prediction_disabled)) != OPUS_OK) {
LOGE("invalid pred_disabled: %d", cfg.prediction_disabled);
ok = false;
}
if (cfg.use_dtx >= 0 &&
opus_encoder_ctl(enc, OPUS_SET_DTX(cfg.use_dtx)) != OPUS_OK) {
LOGE("invalid use_dtx: %d", cfg.use_dtx);
ok = false;
}
if (cfg.frame_sizes_ms_x2 > 0 &&
opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(
cfg.frame_sizes_ms_x2)) != OPUS_OK) {
LOGE("invalid frame_sizes_ms_x2: %d", cfg.frame_sizes_ms_x2);
ok = false;
}
return ok;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,178 @@
#pragma once
#include "AudioTools/AudioCodecs/CodecOpus.h"
#include "AudioTools/AudioCodecs/ContainerOgg.h"
namespace audio_tools {
/// Opus header
struct __attribute__((packed)) OpusOggHeader {
char signature[8] = {'O', 'p', 'u', 's', 'H', 'e', 'a', 'd'};
uint8_t version = 1;
uint8_t channelCount = 0;
uint16_t preSkip = 3840;
uint32_t sampleRate = 0;
int16_t outputGain = 0;
uint8_t channelMappingFamily = 0;
};
/// Simplified header w/o comments
struct __attribute__((packed)) OpusOggCommentHeader {
char signature[8] = {'O', 'p', 'u', 's', 'T', 'a', 'g', 's'};
uint32_t vendorStringLength = 8;
char vendor[8] = "Arduino";
uint32_t userCommentListLength = 0;
};
/**
* @brief Opus Decoder which uses the Ogg Container. See
* https://datatracker.ietf.org/doc/html/rfc7845. The audio data is transmitted
* in frames and the header information contains the sampler rate, channels and
* other critical info.
* Dependency: https://github.com/pschatzmann/arduino-libopus
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OpusOggDecoder : public OggContainerDecoder {
public:
OpusOggDecoder() {
p_codec = &dec; // OpusAudioDecoder
out.setDecoder(p_codec);
};
/// Provides access to the Opus configuration
OpusSettings &config() { return dec.config(); }
bool begin(OpusSettings settings) {
OggContainerDecoder::begin();
return dec.begin(settings);
}
bool begin() override {
TRACED();
OggContainerDecoder::begin();
return dec.begin();
}
void end() override {
TRACED();
OggContainerDecoder::end();
dec.end();
}
protected:
OpusOggHeader header;
OpusAudioDecoder dec;
virtual void beginOfSegment(ogg_packet *op) override {
LOGD("bos");
if (op->packet == nullptr) return;
if (strncmp("OpusHead", (char *)op->packet, 8) == 0) {
memmove(&header, (char *)op->packet, sizeof(header));
AudioInfo info = audioInfo();
info.sample_rate = header.sampleRate;
info.channels = header.channelCount;
info.bits_per_sample = 16;
info.logInfo();
setAudioInfo(info);
} else if (strncmp("OpusTags", (char *)op->packet, 8) == 0) {
// not processed
}
}
};
class OpusOggWriter : public OggContainerOutput {
protected:
OpusOggHeader header;
OpusOggCommentHeader comment;
ogg_packet oh1;
bool writeHeader() override {
LOGI("writeHeader");
bool result = true;
header.sampleRate = cfg.sample_rate;
header.channelCount = cfg.channels;
// write header
oh.packet = (uint8_t *)&header;
oh.bytes = sizeof(header);
oh.granulepos = 0;
oh.packetno = packetno++;
oh.b_o_s = true;
oh.e_o_s = false;
if (!writePacket(oh)) {
result = false;
LOGE("writePacket-header");
}
// write comment header
oh1.packet = (uint8_t *)&comment;
oh1.bytes = sizeof(comment);
oh1.granulepos = 0;
oh1.packetno = packetno++;
oh1.b_o_s = true;
oh1.e_o_s = false;
if (!writePacket(oh1, OGGZ_FLUSH_AFTER)) {
result = false;
LOGE("writePacket-header1");
}
TRACED();
return result;
}
};
/**
* @brief Opus Encoder which uses the Ogg Container: see
* https://datatracker.ietf.org/doc/html/rfc7845
* Dependency: https://github.com/pschatzmann/arduino-libopus
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OpusOggEncoder : public OggContainerEncoder {
public:
OpusOggEncoder() {
setOggOutput(&ogg_writer);
setEncoder(&enc);
}
/// Provides "audio/opus"
const char *mime() override { return "audio/ogg;codecs=opus"; }
/// Provides access to the Opus config
OpusEncoderSettings &config() { return enc.config(); }
/// provides the frame duration in us (e.g. for RTSP)
uint32_t frameDurationUs() override {
// Get frame duration from encoder settings
int frameDurationMs = config().frame_sizes_ms_x2;
uint32_t frameDurationUs = 20000;
switch (frameDurationMs) {
case OPUS_FRAMESIZE_2_5_MS:
frameDurationUs = 2500;
break;
case OPUS_FRAMESIZE_5_MS:
frameDurationUs = 5000;
break;
case OPUS_FRAMESIZE_10_MS:
frameDurationUs = 10000;
break;
case OPUS_FRAMESIZE_20_MS:
frameDurationUs = 20000;
break;
}
return frameDurationUs;
}
protected:
// use custom writer
OpusOggWriter ogg_writer;
// use opus encoder
OpusAudioEncoder enc;
};
#include "AudioTools/Communication/RTSP/RTSPFormat.h"
} // namespace audio_tools

View File

@@ -0,0 +1,400 @@
/**
* @file CodecSBC.h
* @author Phil Schatzmann
* @brief SBC Codec using https://github.com/pschatzmann/arduino-libsbc
* @version 0.1
* @date 2022-04-24
*/
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "sbc.h"
#include "sbc/formats.h"
namespace audio_tools {
/**
* @brief Decoder for SBC. Depends on
* https://github.com/pschatzmann/arduino-libsbc.
* Inspired by sbcdec.c
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class SBCDecoder : public AudioDecoder {
public:
SBCDecoder(int bufferSize = 8192) {
result_buffer = new uint8_t[bufferSize];
result_buffer_size = bufferSize;
}
~SBCDecoder() {
if (result_buffer != nullptr)
delete[] result_buffer;
if (input_buffer != nullptr)
delete[] input_buffer;
}
virtual bool begin() {
TRACEI();
is_first = true;
is_active = true;
sbc_init(&sbc, 0L);
return true;
}
virtual void end() {
TRACEI();
sbc_finish(&sbc);
is_active = false;
}
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
uint8_t *start = (uint8_t *)data;
int count = len;
if (is_first) {
framelen = firstWrite(data, len);
LOGI("framelen: %d", framelen);
// check if we have a valid frame length
if (isValidFrameLen(framelen)) {
start = start + framelen;
count = len - framelen;
is_first = false;
}
}
if (!is_first) {
for (int j = 0; j < count; j++) {
processByte(start[j]);
}
}
return len;
}
// Provides the uncompressed length (of the PCM data) in bytes
int bytesUncompressed() {
return codeSize();
}
/// Provides the compressed length in bytes (after encoding)
int bytesCompressed() {
return frameLength();
}
protected:
Print *p_print = nullptr;
sbc_t sbc;
bool is_first = true;
bool is_active = false;
uint8_t *result_buffer = nullptr;
int result_buffer_size;
int framelen;
uint8_t *input_buffer = nullptr;
int input_pos = 0;
/// Provides the compressed length in bytes (after encoding)
int frameLength() { return sbc_get_frame_length(&sbc); }
// Provides the uncompressed length (of the PCM data) in bytes
int codeSize() { return sbc_get_codesize(&sbc); }
/// Process audio info
void setupAudioInfo() {
info.bits_per_sample = 16;
info.channels = sbc.mode == SBC_MODE_MONO ? 1 : 2;
LOGI("channels: %d", info.channels);
switch (sbc.frequency) {
case SBC_FREQ_16000:
info.sample_rate = 16000;
break;
case SBC_FREQ_32000:
info.sample_rate = 32000;
break;
case SBC_FREQ_44100:
info.sample_rate = 44100;
break;
case SBC_FREQ_48000:
info.sample_rate = 48000;
break;
default:
LOGE("Unsupported sample rate");
info.sample_rate = 0;
break;
}
LOGI("sample_rate: %d", info.sample_rate);
notifyAudioChange(info);
}
bool isValidFrameLen(int len) { return len > 0 && len < 256; }
/// Determines the framelen
int firstWrite(const void *data, size_t length) {
size_t result_len = 0;
int frame_len = sbc_parse(&sbc, data, length);
if (isValidFrameLen(frame_len)) {
// setup audio info
setupAudioInfo();
// setup input buffer for subsequent decoding stpes
setupInputBuffer(frame_len);
}
return frame_len;
}
void setupInputBuffer(int len) {
LOGI("input_buffer: %d", len);
if (input_buffer != nullptr)
delete[] input_buffer;
input_buffer = new uint8_t[len];
}
/// Build decoding buffer and decode when frame is full
void processByte(uint8_t byte) {
// add byte to buffer
input_buffer[input_pos++] = byte;
// decode if buffer is full
if (input_pos >= framelen) {
size_t result_len = 0;
sbc_decode(&sbc, input_buffer, framelen, result_buffer,
result_buffer_size, &result_len);
if (result_len > 0) {
p_print->write(result_buffer, result_len);
}
input_pos = 0;
}
}
};
/**
* @brief Encoder for SBC - Depends on
* https://github.com/pschatzmann/arduino-libsbc.
* Inspired by sbcenc.c
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class SBCEncoder : public AudioEncoder {
public:
SBCEncoder(int subbands = 8, int blocks = 16, int bitpool = 32,
int allocation_method = SBC_AM_LOUDNESS) {
setSubbands(subbands);
setBlocks(blocks);
setBitpool(bitpool);
setAllocationMethod(allocation_method);
}
/// Defines the subbands: Use 4 or 8
void setSubbands(int subbands) {
if (subbands == 8 || subbands == 4) {
this->subbands = subbands;
} else {
LOGE("Invalid subbands: %d - using 8", subbands);
this->subbands = 8;
}
}
/// Defines the number of blocks: valid values (4,8,12,16)
void setBlocks(int blocks) {
if (blocks == 16 || blocks == 12 || blocks == 8 || blocks == 4) {
this->blocks = blocks;
} else {
LOGE("Invalid blocks: %d - using 16", blocks);
this->blocks = 16;
}
}
/// Defines the bitpool (2-86?)
void setBitpool(int bitpool) { this->bitpool = bitpool; }
/// Defines the allocation method: Use SBC_AM_LOUDNESS, SBC_AM_SNR
void setAllocationMethod(int allocation_method) {
if (allocation_method == SBC_AM_LOUDNESS || allocation_method == SBC_AM_SNR) {
this->allocation_method = allocation_method;
} else {
LOGE("Invalid allocation Method: %d - using SBC_AM_LOUDNESS", allocation_method);
this->allocation_method = SBC_AM_LOUDNESS;
}
}
/// Restarts the processing
bool begin() {
TRACEI();
is_first = true;
is_active = setup();
current_codesize = codeSize();
buffer.resize(current_codesize);
result_buffer.resize(frameLength());
return true;
}
/// Ends the processing
virtual void end() {
TRACEI();
sbc_finish(&sbc);
is_active = false;
}
virtual const char *mime() { return "audio/sbc"; }
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
operator bool() { return is_active; }
virtual size_t write(const uint8_t *data, size_t len) {
LOGD("write: %d", len);
if (!is_active) {
LOGE("inactive");
return 0;
}
if (p_print==nullptr){
LOGE("output not defined");
return 0;
}
// encode bytes
for (int j = 0; j < len; j++) {
processByte(data[j]);
}
return len;
}
int bytesUncompressed() {
return codeSize();
}
int bytesCompressed() {
return frameLength();
}
protected:
Print *p_print = nullptr;
sbc_t sbc;
bool is_first = true;
bool is_active = false;
int current_codesize = 0;
int buffer_pos = 0;
Vector<uint8_t> buffer{0};
Vector<uint8_t> result_buffer{0};
int subbands = 4;
int blocks = 4;
int bitpool = 32;
int allocation_method;
/// Provides the compressed length in bytes (after encoding)
int frameLength() { return sbc_get_frame_length(&sbc); }
/// Provides the uncompressed length (of the PCM data) in bytes
int codeSize() { return sbc_get_codesize(&sbc); }
/// Determines audio information and calls sbc_init;
bool setup() {
sbc_init(&sbc, 0L);
if (info.bits_per_sample!=16){
LOGE("Invalid bits_per_sample: %d", info.bits_per_sample);
return false;
}
switch (info.sample_rate) {
case 16000:
sbc.frequency = SBC_FREQ_16000;
break;
case 32000:
sbc.frequency = SBC_FREQ_32000;
break;
case 44100:
sbc.frequency = SBC_FREQ_44100;
break;
case 48000:
sbc.frequency = SBC_FREQ_48000;
break;
default:
LOGE("Invalid sample_rate: %d", info.sample_rate);
return false;
}
switch (info.channels) {
case 1:
sbc.mode = SBC_MODE_MONO;
break;
case 2:
sbc.mode = SBC_MODE_STEREO;
break;
default:
LOGE("Invalid channels: %d", info.channels);
return false;
}
switch (subbands) {
case 4:
sbc.subbands = SBC_SB_4;
break;
case 8:
sbc.subbands = SBC_SB_8;
break;
default:
LOGE("Invalid subbands: %d", subbands);
return false;
}
switch (blocks) {
case 4:
sbc.blocks = SBC_BLK_4;
break;
case 8:
sbc.blocks = SBC_BLK_8;
break;
case 12:
sbc.blocks = SBC_BLK_12;
break;
case 16:
sbc.blocks = SBC_BLK_16;
break;
default:
LOGE("Invalid blocks: %d", blocks);
return false;
}
sbc.bitpool = bitpool;
sbc.allocation = allocation_method;
return true;
}
// add byte to decoding buffer and decode if buffer is full
void processByte(uint8_t byte) {
buffer[buffer_pos++] = byte;
if (buffer_pos >= current_codesize) {
ssize_t written;
// Encodes ONE input block into ONE output block */
// ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len,
// void *output, size_t output_len, ssize_t *written);
sbc_encode(&sbc, &buffer[0], current_codesize, &result_buffer[0],
result_buffer.size(), &written);
LOGD("sbc_encode: %d -> %d (buffer: %d))", current_codesize, written,
result_buffer.size());
p_print->write(&result_buffer[0], written);
buffer_pos = 0;
}
}
};
} // namespace audio_tools

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,297 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioToolsConfig.h"
#include "ogg.h"
#include "vorbis-tremor.h"
// #include "AudioTools/AudioCodecs/ContainerOgg.h"
// #include "ivorbiscodec.h"
// #include "ivorbisfile.h"
namespace audio_tools {
#ifndef VARBIS_MAX_READ_SIZE
#define VARBIS_MAX_READ_SIZE 1024
#endif
#define VORBIS_HEADER_OPEN_LIMIT 1024
/**
* @brief Vorbis Streaming Decoder using
* https://github.com/pschatzmann/arduino-libvorbis-tremor
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class VorbisDecoder : public StreamingDecoder {
public:
VorbisDecoder() = default;
/// Destroy the VorbisDecoder object
~VorbisDecoder() {
if (active) {
end();
}
}
/// Starts the processing
bool begin() override {
LOGI("begin");
// Ensure we start with clean state
if (active) {
LOGW("Decoder already active, calling end() first");
end();
}
callbacks.read_func = read_func;
callbacks.seek_func = seek_func;
callbacks.close_func = nullptr;
callbacks.tell_func = tell_func;
assert(p_input != nullptr);
if (p_input->available() < VORBIS_HEADER_OPEN_LIMIT) {
delay(delay_wait_for_data_ms);
}
LOGI("available: %d", p_input->available());
is_ov_open = ovOpen();
LOGI("ovOpen result: %d", is_ov_open);
active = is_ov_open;
return is_ov_open;
}
/// Releases the reserved memory
void end() override {
LOGI("end");
if (is_ov_open && active) {
ov_clear(&file);
LOGI("ov_clear completed");
}
is_ov_open = false;
is_first = true;
active = false;
pcm.clear(); // Free the PCM buffer
}
/// Provides the last available MP3FrameInfo
AudioInfo audioInfo() override { return cfg; }
/// checks if the class is active
virtual operator bool() override { return active; }
virtual bool copy() override {
TRACED();
// open if not already done
if (!is_ov_open) {
if (!ovOpen()) {
LOGE("Failed to open Vorbis stream");
return false;
}
}
// Defensive checks before calling Vorbis functions
if (pcm.data() == nullptr) {
LOGE("PCM buffer is null - memory allocation failed");
return false;
}
if (pcm.size() == 0) {
LOGE("PCM buffer size is 0");
return false;
}
// Additional sanity check for the file structure
if (!active) {
LOGE("Decoder is not active");
return false;
}
LOGD("ov_read: buffer size %d", pcm.size());
bitstream = 0;
// Call ov_read with additional error checking
long result = ov_read(&file, (char *)pcm.data(), pcm.size(), &bitstream);
LOGI("copy result: %d", (int)result);
if (result > 0) {
AudioInfo current = currentInfo();
if (current != cfg) {
cfg = current;
cfg.logInfo();
notifyAudioChange(cfg);
}
if (p_print != nullptr) {
p_print->write(pcm.data(), result);
} else {
LOGE("Output stream is null");
return false;
}
delay(1);
return true;
} else {
if (result == 0 || result == -3) {
// data interruption
LOGD("copy: %d - %s", (int)result, readError(result));
} else {
LOGE("copy: %d - %s", (int)result, readError(result));
}
delay(delay_on_no_data_ms);
return false;
}
}
/// Provides "audio/ogg"
const char *mime() override { return "audio/vorbis+ogg"; }
/// Defines the delay when there is no data
void setDelayOnNoData(size_t delay) { delay_on_no_data_ms = delay; }
/// Defines the delay to wait if there is not enough data to open the decoder
void setWaitForData(size_t wait) { delay_wait_for_data_ms = wait; }
/// Defines the default read size
void setReadSize(size_t size) {
max_read_size = size;
// Ensure we don't set an unreasonably large size
if (max_read_size > 8192) {
LOGW("Read size %zu is very large, consider smaller buffer",
max_read_size);
}
}
protected:
AudioInfo cfg;
Vector<uint8_t> pcm{0};
OggVorbis_File file;
ov_callbacks callbacks;
int bitstream = 0;
size_t delay_on_no_data_ms = 100;
size_t delay_wait_for_data_ms = 500;
size_t max_read_size = VARBIS_MAX_READ_SIZE;
bool active = false;
bool is_first = true;
bool is_ov_open = false;
bool ovOpen() {
pcm.resize(max_read_size);
checkMemory(true);
int rc = ov_open_callbacks(this, &file, nullptr, 0, callbacks);
if (rc < 0) {
LOGE("ov_open_callbacks failed with error %d: %s", rc, getOpenError(rc));
} else {
LOGI("ov_open_callbacks succeeded");
is_ov_open = true;
}
checkMemory(true);
return is_ov_open;
}
AudioInfo currentInfo() {
AudioInfo result;
if (!is_ov_open) {
LOGE("Cannot get audio info - stream not open");
return result;
}
vorbis_info *info = ov_info(&file, -1);
if (info == nullptr) {
LOGE("ov_info returned null pointer");
return result;
}
result.sample_rate = info->rate;
result.channels = info->channels;
result.bits_per_sample = 16;
LOGD("Audio info - rate: %d, channels: %d", info->rate, info->channels);
return result;
}
virtual size_t readBytes(uint8_t *data, size_t len) override {
size_t read_size = min(len, (size_t)max_read_size);
size_t result = p_input->readBytes((uint8_t *)data, read_size);
LOGD("readBytes: %zu", result);
return result;
}
static size_t read_func(void *ptr, size_t size, size_t nmemb,
void *datasource) {
VorbisDecoder *self = (VorbisDecoder *)datasource;
assert(datasource != nullptr);
size_t result = self->readBytes((uint8_t *)ptr, size * nmemb);
LOGD("read_func: %d -> %d", size * nmemb, (int)result);
return result;
}
static int seek_func(void *datasource, ogg_int64_t offset, int whence) {
VorbisDecoder *self = (VorbisDecoder *)datasource;
return -1;
}
static long tell_func(void *datasource) {
VorbisDecoder *self = (VorbisDecoder *)datasource;
return -1;
}
// static int close_func(void *datasource) {
// VorbisDecoder *self = (VorbisDecoder *)datasource;
// self->end();
// return 0;
// }
const char *readError(long error) {
if (error >= 0) {
return "OK";
}
switch (error) {
case OV_HOLE:
return "Interruption in the data";
case OV_EBADLINK:
return "Invalid stream section";
case OV_EREAD:
return "Read error";
case OV_EFAULT:
return "Internal fault";
case OV_EIMPL:
return "Unimplemented feature";
case OV_EINVAL:
return "Invalid argument";
case OV_ENOTVORBIS:
return "Not a Vorbis file";
case OV_EBADHEADER:
return "Invalid Vorbis header";
case OV_EVERSION:
return "Vorbis version mismatch";
case OV_ENOSEEK:
return "Stream not seekable";
default:
return "Unknown error";
}
}
const char *getOpenError(int error) {
switch (error) {
case 0:
return "Success";
case OV_EREAD:
return "Read from media error";
case OV_ENOTVORBIS:
return "Not Vorbis data";
case OV_EVERSION:
return "Vorbis version mismatch";
case OV_EBADHEADER:
return "Invalid Vorbis bitstream header";
case OV_EFAULT:
return "Internal logic fault";
default:
return "Unknown open error";
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,687 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/AudioEncoded.h"
#include "AudioTools/AudioCodecs/AudioFormat.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
#define READ_BUFFER_SIZE 512
#define MAX_WAV_HEADER_LEN 200
namespace audio_tools {
/**
* @brief Sound information which is available in the WAV header
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
struct WAVAudioInfo : AudioInfo {
WAVAudioInfo() = default;
WAVAudioInfo(const AudioInfo &from) {
sample_rate = from.sample_rate;
channels = from.channels;
bits_per_sample = from.bits_per_sample;
}
AudioFormat format = AudioFormat::PCM;
int byte_rate = 0;
int block_align = 0;
bool is_streamed = true;
bool is_valid = false;
uint32_t data_length = 0;
uint32_t file_size = 0;
int offset = 0;
};
static const char *wav_mime = "audio/wav";
/**
* @brief Parser for Wav header data
* for details see https://de.wikipedia.org/wiki/RIFF_WAVE
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
class WAVHeader {
public:
WAVHeader() = default;
/// Adds data to the 44 byte wav header data buffer and make it available for
/// parsing
int write(uint8_t *data, size_t data_len) {
return buffer.writeArray(data, data_len);
}
/// Call begin when header data is complete to parse the data
bool parse() {
LOGI("WAVHeader::begin: %u", (unsigned)buffer.available());
this->data_pos = 0l;
memset((void *)&headerInfo, 0, sizeof(WAVAudioInfo));
if (!setPos("RIFF")) return false;
headerInfo.file_size = read_int32();
if (!setPos("WAVE")) return false;
if (!setPos("fmt ")) return false;
int fmt_length = read_int32();
headerInfo.format = (AudioFormat)read_int16();
headerInfo.channels = read_int16();
headerInfo.sample_rate = read_int32();
headerInfo.byte_rate = read_int32();
headerInfo.block_align = read_int16();
headerInfo.bits_per_sample = read_int16();
if (!setPos("data")) return false;
headerInfo.data_length = read_int32();
if (headerInfo.data_length == 0 || headerInfo.data_length >= 0x7fff0000) {
headerInfo.is_streamed = true;
headerInfo.data_length = ~0;
}
logInfo();
buffer.clear();
return true;
}
/// Returns true if the header is complete (containd data tag)
bool isDataComplete() {
int pos = getDataPos();
return pos > 0 && buffer.available() >= pos;
}
/// number of bytes available in the header buffer
size_t available() { return buffer.available(); }
/// Determines the data start position using the data tag
int getDataPos() {
int pos =
StrView((char *)buffer.data(), MAX_WAV_HEADER_LEN, buffer.available())
.indexOf("data");
return pos > 0 ? pos + 8 : 0;
}
/// provides the info from the header
WAVAudioInfo &audioInfo() { return headerInfo; }
/// Sets the info in the header
void setAudioInfo(WAVAudioInfo info) { headerInfo = info; }
/// Just write a wav header to the indicated outputbu
int writeHeader(Print *out) {
writeRiffHeader(buffer);
writeFMT(buffer);
writeDataHeader(buffer);
int len = buffer.available();
out->write(buffer.data(), buffer.available());
return len;
}
/// Reset internal stored header information and buffer
void clear() {
data_pos = 0;
WAVAudioInfo empty;
empty.sample_rate = 0;
empty.channels = 0;
empty.bits_per_sample = 0;
headerInfo = empty;
buffer.setClearWithZero(true);
buffer.reset();
}
/// Debug helper: dumps header bytes as printable characters
void dumpHeader() {
char msg[buffer.available() + 1];
memset(msg, 0, buffer.available() + 1);
for (int j = 0; j < buffer.available(); j++) {
char c = (char)buffer.data()[j];
if (!isalpha(c)) {
c = '.';
}
msg[j] = c;
}
LOGI("Header: %s", msg);
}
protected:
struct WAVAudioInfo headerInfo;
SingleBuffer<uint8_t> buffer{MAX_WAV_HEADER_LEN};
size_t data_pos = 0;
bool setPos(const char *id) {
int id_len = strlen(id);
int pos = indexOf(id);
if (pos < 0) return false;
data_pos = pos + id_len;
return true;
}
int indexOf(const char *str) {
return StrView((char *)buffer.data(), MAX_WAV_HEADER_LEN,
buffer.available())
.indexOf(str);
}
uint32_t read_tag() {
uint32_t tag = 0;
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
return tag;
}
uint32_t getChar32() { return getChar(); }
uint32_t read_int32() {
uint32_t value = 0;
value |= getChar32() << 0;
value |= getChar32() << 8;
value |= getChar32() << 16;
value |= getChar32() << 24;
return value;
}
uint16_t read_int16() {
uint16_t value = 0;
value |= getChar() << 0;
value |= getChar() << 8;
return value;
}
void skip(int n) {
int i;
for (i = 0; i < n; i++) getChar();
}
int getChar() {
if (data_pos < buffer.size())
return buffer.data()[data_pos++];
else
return -1;
}
void seek(long int offset, int origin) {
if (origin == SEEK_SET) {
data_pos = offset;
} else if (origin == SEEK_CUR) {
data_pos += offset;
}
}
size_t tell() { return data_pos; }
bool eof() { return data_pos >= buffer.size() - 1; }
void logInfo() {
LOGI("WAVHeader sound_pos: %d", getDataPos());
LOGI("WAVHeader channels: %d ", headerInfo.channels);
LOGI("WAVHeader bits_per_sample: %d", headerInfo.bits_per_sample);
LOGI("WAVHeader sample_rate: %d ", (int)headerInfo.sample_rate);
LOGI("WAVHeader format: %d", (int)headerInfo.format);
}
void writeRiffHeader(BaseBuffer<uint8_t> &buffer) {
buffer.writeArray((uint8_t *)"RIFF", 4);
write32(buffer, headerInfo.file_size - 8);
buffer.writeArray((uint8_t *)"WAVE", 4);
}
void writeFMT(BaseBuffer<uint8_t> &buffer) {
uint16_t fmt_len = 16;
buffer.writeArray((uint8_t *)"fmt ", 4);
write32(buffer, fmt_len);
write16(buffer, (uint16_t)headerInfo.format); // PCM
write16(buffer, headerInfo.channels);
write32(buffer, headerInfo.sample_rate);
write32(buffer, headerInfo.byte_rate);
write16(buffer, headerInfo.block_align); // frame size
write16(buffer, headerInfo.bits_per_sample);
}
void write32(BaseBuffer<uint8_t> &buffer, uint64_t value) {
buffer.writeArray((uint8_t *)&value, 4);
}
void write16(BaseBuffer<uint8_t> &buffer, uint16_t value) {
buffer.writeArray((uint8_t *)&value, 2);
}
void writeDataHeader(BaseBuffer<uint8_t> &buffer) {
buffer.writeArray((uint8_t *)"data", 4);
write32(buffer, headerInfo.file_size);
int offset = headerInfo.offset;
if (offset > 0) {
uint8_t empty[offset];
memset(empty, 0, offset);
buffer.writeArray(empty, offset); // resolve issue with wrong aligment
}
}
};
/**
* @brief A simple WAVDecoder: We parse the header data on the first record to
* determine the format. If no AudioDecoderExt is specified we just write the
* PCM data to the output that is defined by calling setOutput(). You can define
* a ADPCM decoder to decode WAV files that contain ADPCM data.
*
* Optionally, if the input WAV file contains 8-bit PCM data, you can enable automatic
* conversion to 16-bit PCM output by calling setConvert8to16(true). This will convert
* unsigned 8-bit samples to signed 16-bit samples before writing to the output stream,
* and the reported bits_per_sample in audioInfo() will be 16 when conversion is active.
* The same is valid for the 24 bit conversion which converts 24 bit (3 byte) to 32 bit
* (4 byte).
*
* Please note that you need to call begin() everytime you process a new file to let the decoder
* know that we start with a new header.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class WAVDecoder : public AudioDecoder {
public:
/**
* @brief Construct a new WAVDecoder object for PCM data
*/
WAVDecoder() = default;
/**
* @brief Construct a new WAVDecoder object for ADPCM data
*
*/
WAVDecoder(AudioDecoderExt &dec, AudioFormat fmt) { setDecoder(dec, fmt); }
/// Defines an optional decoder if the format is not PCM
void setDecoder(AudioDecoderExt &dec, AudioFormat fmt) {
TRACED();
decoder_format = fmt;
p_decoder = &dec;
}
/// Defines the output Stream
void setOutput(Print &out_stream) override { this->p_print = &out_stream; }
/// Prepare decoder for a new WAV stream
bool begin() override {
TRACED();
header.clear();
setupEncodedAudio();
byte_buffer.reset();
buffer24.reset();
isFirst = true;
active = true;
return true;
}
/// Finish decoding and release temporary buffers
void end() override {
TRACED();
byte_buffer.reset();
buffer24.reset();
active = false;
}
/// Provides MIME type "audio/wav"
const char *mime() { return wav_mime; }
/// Extended WAV specific info (original header values)
WAVAudioInfo &audioInfoEx() { return header.audioInfo(); }
/// Exposed AudioInfo (may reflect conversion flags)
AudioInfo audioInfo() override {
WAVAudioInfo info = header.audioInfo();
if (convert8to16 && info.format == AudioFormat::PCM &&
info.bits_per_sample == 8) {
info.bits_per_sample = 16;
}
// 32 bits gives better result
if (convert24 && info.format == AudioFormat::PCM &&
info.bits_per_sample == 24) {
info.bits_per_sample = 32;
}
return info;
}
/// Write incoming WAV data (header + PCM) into output
virtual size_t write(const uint8_t *data, size_t len) override {
TRACED();
size_t result = 0;
if (active) {
if (isFirst) {
int data_start = decodeHeader((uint8_t *)data, len);
// we do not have the complete header yet: need more data
if (data_start == 0) return len;
// process the outstanding data
result = data_start +
write_out((uint8_t *)data + data_start, len - data_start);
} else if (isValid) {
result = write_out((uint8_t *)data, len);
}
}
return result;
}
/// Check if the decoder is active
virtual operator bool() override { return active; }
/// Convert 8 bit to 16 bit PCM data (default: enabled)
void setConvert8Bit(bool enable) {
convert8to16 = enable;
}
/// Convert 24 bit (3 byte) to 32 bit (4 byte) PCM data (default: enabled)
void setConvert24Bit(bool enable) {
convert24 = enable;
}
protected:
WAVHeader header;
bool isFirst = true;
bool isValid = true;
bool active = false;
AudioFormat decoder_format = AudioFormat::PCM;
AudioDecoderExt *p_decoder = nullptr;
EncodedAudioOutput dec_out;
SingleBuffer<uint8_t> byte_buffer{0};
SingleBuffer<int32_t> buffer24{0};
bool convert8to16 = true; // Optional conversion flag
bool convert24 = true; // Optional conversion flag
const size_t batch_size = 256;
Print &out() { return p_decoder == nullptr ? *p_print : dec_out; }
virtual size_t write_out(const uint8_t *in_ptr, size_t in_size) {
// check if we need to convert int24 data from 3 bytes to 4 bytes
size_t result = 0;
if (convert24 && header.audioInfo().format == AudioFormat::PCM &&
header.audioInfo().bits_per_sample == 24 && sizeof(int24_t) == 4) {
write_out_24(in_ptr, in_size);
result = in_size;
} else if (convert8to16 && header.audioInfo().format == AudioFormat::PCM &&
header.audioInfo().bits_per_sample == 8) {
result = write_out_8to16(in_ptr, in_size);
} else {
result = out().write(in_ptr, in_size);
}
return result;
}
/// Convert 8-bit PCM to 16-bit PCM and write out
size_t write_out_8to16(const uint8_t *in_ptr, size_t in_size) {
size_t total_written = 0;
size_t samples_remaining = in_size;
size_t offset = 0;
int16_t out_buf[batch_size];
while (samples_remaining > 0) {
size_t current_batch =
samples_remaining > batch_size ? batch_size : samples_remaining;
for (size_t i = 0; i < current_batch; ++i) {
out_buf[i] = ((int16_t)in_ptr[offset + i] - 128) << 8;
}
writeDataT<int16_t>(&out(), out_buf, current_batch);
offset += current_batch;
samples_remaining -= current_batch;
}
return in_size;
}
/// convert 3 byte int24 to 4 byte int32
size_t write_out_24(const uint8_t *in_ptr, size_t in_size) {
// store 1 sample
buffer24.resize(batch_size);
byte_buffer.resize(3);
for (size_t i = 0; i < in_size; i++) {
// Add byte to buffer
byte_buffer.write(in_ptr[i]);
// Process complete sample when buffer is full
if (byte_buffer.isFull()) {
int24_3bytes_t sample24{byte_buffer.data()};
int32_t converted_sample = sample24.scale32();
buffer24.write(converted_sample);
if (buffer24.isFull()) {
writeDataT<int32_t>(&out(), buffer24.data(), buffer24.available());
buffer24.reset();
}
byte_buffer.reset();
}
}
return in_size;
}
/// Decodes the header data: Returns the start pos of the data
int decodeHeader(uint8_t *in_ptr, size_t in_size) {
int result = in_size;
// we expect at least the full header
int written = header.write(in_ptr, in_size);
if (!header.isDataComplete()) {
LOGW("WAV header misses 'data' section in len: %d",
(int)header.available());
header.dumpHeader();
return 0;
}
// parse header
if (!header.parse()) {
LOGE("WAV header parsing failed");
return 0;
}
isFirst = false;
isValid = header.audioInfo().is_valid;
LOGI("WAV sample_rate: %d", (int)header.audioInfo().sample_rate);
LOGI("WAV data_length: %u", (unsigned)header.audioInfo().data_length);
LOGI("WAV is_streamed: %d", header.audioInfo().is_streamed);
LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" : "false");
// check format
AudioFormat format = header.audioInfo().format;
isValid = format == decoder_format;
if (isValid) {
// update blocksize
if (p_decoder != nullptr) {
int block_size = header.audioInfo().block_align;
p_decoder->setBlockSize(block_size);
}
// update sampling rate if the target supports it
AudioInfo bi = audioInfo();
notifyAudioChange(bi);
} else {
LOGE("WAV format not supported: %d", (int)format);
}
return header.getDataPos();
}
void setupEncodedAudio() {
if (p_decoder != nullptr) {
assert(p_print != nullptr);
dec_out.setOutput(p_print);
dec_out.setDecoder(p_decoder);
dec_out.begin(info);
}
}
};
/**
* @brief A simple WAV file encoder. If no AudioEncoderExt is specified the WAV
* file contains PCM data, otherwise it is encoded as ADPCM. The WAV header is
* written with the first writing of audio data. Calling begin() is making sure
* that the header is written again.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class WAVEncoder : public AudioEncoder {
public:
/**
* @brief Construct a new WAVEncoder object for PCM data
*/
WAVEncoder() = default;
/**
* @brief Construct a new WAVEncoder object for ADPCM data
*/
WAVEncoder(AudioEncoderExt &enc, AudioFormat fmt) { setEncoder(enc, fmt); };
/// Associates an external encoder for non-PCM formats
void setEncoder(AudioEncoderExt &enc, AudioFormat fmt) {
TRACED();
wav_info.format = fmt;
p_encoder = &enc;
}
/// Defines the otuput stream
void setOutput(Print &out) override {
TRACED();
p_print = &out;
}
/// Provides "audio/wav"
const char *mime() override { return wav_mime; }
/// Provides the default configuration
WAVAudioInfo defaultConfig() {
WAVAudioInfo info;
info.format = AudioFormat::PCM;
info.sample_rate = DEFAULT_SAMPLE_RATE;
info.bits_per_sample = DEFAULT_BITS_PER_SAMPLE;
info.channels = DEFAULT_CHANNELS;
info.is_streamed = true;
info.is_valid = true;
info.data_length = 0x7fff0000;
info.file_size = info.data_length + 36;
return info;
}
/// Update actual WAVAudioInfo
virtual void setAudioInfo(AudioInfo from) override {
wav_info.sample_rate = from.sample_rate;
wav_info.channels = from.channels;
wav_info.bits_per_sample = from.bits_per_sample;
// recalculate byte rate, block align...
setAudioInfo(wav_info);
}
/// Defines the WAVAudioInfo
virtual void setAudioInfo(WAVAudioInfo ai) {
AudioEncoder::setAudioInfo(ai);
if (p_encoder) p_encoder->setAudioInfo(ai);
wav_info = ai;
LOGI("sample_rate: %d", (int)wav_info.sample_rate);
LOGI("channels: %d", wav_info.channels);
// bytes per second
wav_info.byte_rate = wav_info.sample_rate * wav_info.channels *
wav_info.bits_per_sample / 8;
if (wav_info.format == AudioFormat::PCM) {
wav_info.block_align =
wav_info.bits_per_sample / 8 * wav_info.channels;
}
if (wav_info.is_streamed || wav_info.data_length == 0 ||
wav_info.data_length >= 0x7fff0000) {
LOGI("is_streamed! because length is %u",
(unsigned)wav_info.data_length);
wav_info.is_streamed = true;
wav_info.data_length = ~0;
} else {
size_limit = wav_info.data_length;
LOGI("size_limit is %d", (int)size_limit);
}
}
/// starts the processing
bool begin(WAVAudioInfo ai) {
header.clear();
setAudioInfo(ai);
return begin();
}
/// starts the processing using the actual WAVAudioInfo
virtual bool begin() override {
TRACED();
setupEncodedAudio();
header_written = false;
is_open = true;
return true;
}
/// stops the processing
void end() override { is_open = false; }
/// Writes PCM data to be encoded as WAV
virtual size_t write(const uint8_t *data, size_t len) override {
if (!is_open) {
LOGE("The WAVEncoder is not open - please call begin()");
return 0;
}
if (p_print == nullptr) {
LOGE("No output stream was provided");
return 0;
}
if (!header_written) {
LOGI("Writing Header");
header.setAudioInfo(wav_info);
int len = header.writeHeader(p_print);
wav_info.file_size -= len;
header_written = true;
}
int32_t result = 0;
Print *p_out = p_encoder == nullptr ? p_print : &enc_out;
;
if (wav_info.is_streamed) {
result = p_out->write((uint8_t *)data, len);
} else if (size_limit > 0) {
size_t write_size = min((size_t)len, (size_t)size_limit);
result = p_out->write((uint8_t *)data, write_size);
size_limit -= result;
if (size_limit <= 0) {
LOGI("The defined size was written - so we close the WAVEncoder now");
is_open = false;
}
}
return result;
}
/// Check if encoder is active and ready to write
operator bool() override { return is_open; }
/// Check if encoder is open
bool isOpen() { return is_open; }
/// Adds n empty bytes at the beginning of the data
void setDataOffset(uint16_t offset) { wav_info.offset = offset; }
protected:
WAVHeader header;
Print *p_print = nullptr; // final output CopyEncoder copy; // used for PCM
AudioEncoderExt *p_encoder = nullptr;
EncodedAudioOutput enc_out;
WAVAudioInfo wav_info = defaultConfig();
int64_t size_limit = 0;
bool header_written = false;
volatile bool is_open = false;
void setupEncodedAudio() {
if (p_encoder != nullptr) {
assert(p_print != nullptr);
enc_out.setOutput(p_print);
enc_out.setEncoder(p_encoder);
enc_out.setAudioInfo(wav_info);
enc_out.begin();
// block size only available after begin(): update block size
wav_info.block_align = p_encoder->blockSize();
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,502 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#define WAVE_FORMAT_IMA_ADPCM 0x0011
#define TAG(a, b, c, d) ((static_cast<uint32_t>(a) << 24) | (static_cast<uint32_t>(b) << 16) | (static_cast<uint32_t>(c) << 8) | (d))
#define READ_BUFFER_SIZE 512
namespace audio_tools {
const int16_t ima_index_table[16] {
-1, -1, -1, -1, 2, 4, 6, 8,
-1, -1, -1, -1, 2, 4, 6, 8
};
const int32_t ima_step_table[89] {
7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
};
/**
* @brief Sound information which is available in the WAV header - adjusted for IMA ADPCM
* @author Phil Schatzmann
* @author Norman Ritz
* @copyright GPLv3
*
*/
struct WavIMAAudioInfo : AudioInfo {
WavIMAAudioInfo() = default;
WavIMAAudioInfo(const AudioInfo& from) {
sample_rate = from.sample_rate;
channels = from.channels;
bits_per_sample = from.bits_per_sample;
}
int format = WAVE_FORMAT_IMA_ADPCM;
int byte_rate = 0;
int block_align = 0;
int frames_per_block = 0;
int num_samples = 0;
bool is_valid = false;
uint32_t data_length = 0;
uint32_t file_size = 0;
};
struct IMAState {
int32_t predictor = 0;
int step_index = 0;
};
const char* wav_ima_mime = "audio/x-wav";
/**
* @brief Parser for Wav header data adjusted for IMA ADPCM format - partially based on CodecWAV.h
* for details see https://de.wikipedia.org/wiki/RIFF_WAVE
* @author Phil Schatzmann
* @author Norman Ritz
* @copyright GPLv3
*/
typedef enum {
IMA_ERR_INVALID_CHUNK = -2,
IMA_ERR_INVALID_CONTAINER,
IMA_CHUNK_OK,
IMA_CHUNK_UNKNOWN
} chunk_result;
class WavIMAHeader {
public:
WavIMAHeader() {
clearHeader();
};
void clearHeader() {
data_pos = 0;
memset((void*)&headerInfo, 0, sizeof(WavIMAAudioInfo));
headerInfo.is_valid = false;
header_complete = false;
chunk_len = 0;
max_chunk_len = 8;
skip_len = 0;
isFirstChunk = true;
}
chunk_result parseChunk() {
data_pos = 0;
bool chunkUnknown = false;
uint32_t tag = read_tag();
uint32_t length = read_int32();
if (length < 4) {
return IMA_ERR_INVALID_CHUNK;
}
if (tag == TAG('R', 'I', 'F', 'F')) {
uint32_t container_type = read_tag();
if (container_type != TAG('W', 'A', 'V', 'E')) {
return IMA_ERR_INVALID_CONTAINER;
}
}
else if (tag == TAG('f', 'm', 't', ' ')) {
if (length < 20) {
// Insufficient data for 'fmt '
return IMA_ERR_INVALID_CHUNK;
}
headerInfo.format = read_int16();
headerInfo.channels = read_int16();
headerInfo.sample_rate = read_int32();
headerInfo.byte_rate = read_int32();
headerInfo.block_align = read_int16();
headerInfo.bits_per_sample = read_int16();
// Skip the size parameter for extra information as for IMA ADPCM the following data should always be 2 bytes.
skip(2);
headerInfo.frames_per_block = read_int16();
if (headerInfo.format != WAVE_FORMAT_IMA_ADPCM || headerInfo.channels > 2) {
// Insufficient or invalid data for waveformatex
LOGE("Format not supported: %d, %d\n", headerInfo.format, headerInfo.channels);
return IMA_ERR_INVALID_CHUNK;
} else {
headerInfo.is_valid = true; // At this point we know that the format information is valid
}
} else if (tag == TAG('f', 'a', 'c', 't')) {
/* In the context of ADPCM the fact chunk should contain the total number of mono or stereo samples
however we shouldn't rely on this as some programs (e.g. Audacity) write an incorrect value in some cases. This value is currently not used by the decoder.
*/
headerInfo.num_samples = read_int32();
} else if (tag == TAG('d', 'a', 't', 'a')) {
// Size of the data chunk.
headerInfo.data_length = length;
} else {
chunkUnknown = true;
}
// Skip any remaining data that exceeds the buffer
if (tag != TAG('R', 'I', 'F', 'F') && length > 20) skip_len = length - 20;
return chunkUnknown ? IMA_CHUNK_UNKNOWN : IMA_CHUNK_OK;
}
/* Adds data to the header data buffer
Because the header isn't necessarily uniform, we go through each chunk individually
and only copy the ones we need. This could probably still be optimized. */
int write(uint8_t* data, size_t data_len) {
int write_len;
int data_offset = 0;
while (data_len > 0 && !header_complete) {
if (skip_len > 0) {
/* Used to skip any unknown chunks or chunks that are longer than expected.
Some encoders like ffmpeg write meta information before the "data" chunk by default. */
write_len = min(skip_len, data_len);
skip_len -= write_len;
data_offset += write_len;
data_len -= write_len;
}
else {
// Search / Wait for the individual chunks and write them to the temporary buffer.
write_len = min(data_len, max_chunk_len - chunk_len);
memmove(chunk_buffer + chunk_len, data + data_offset, write_len);
chunk_len += write_len;
data_offset += write_len;
data_len -= write_len;
if (chunk_len == max_chunk_len) {
data_pos = 0;
if (max_chunk_len == 8) {
uint32_t chunk_tag = read_tag();
uint32_t chunk_size = read_int32();
if (isFirstChunk && chunk_tag != TAG('R', 'I', 'F', 'F')) {
headerInfo.is_valid = false;
return IMA_ERR_INVALID_CONTAINER;
}
isFirstChunk = false;
if (chunk_tag == TAG('R', 'I', 'F', 'F')) chunk_size = 4;
else if (chunk_tag == TAG('d', 'a', 't', 'a')) {
parseChunk();
header_complete = true;
logInfo();
break;
}
/* Wait for the rest of the data before processing the chunk.
The largest chunk we expect is the "fmt " chunk which is 20 bytes long in this case. */
write_len = min((size_t)chunk_size, (size_t)20);
max_chunk_len += write_len;
continue;
}
else {
chunk_result result = parseChunk();
switch (result) {
// Abort processing the header if the RIFF container or a required chunk is not valid
case IMA_ERR_INVALID_CONTAINER:
case IMA_ERR_INVALID_CHUNK:
headerInfo.is_valid = false;
return result;
break;
}
chunk_len = 0;
max_chunk_len = 8;
}
}
}
}
return data_offset;
}
/// Returns true if the header is complete (data chunk has been found)
bool isDataComplete() {
return header_complete;
}
// provides the AudioInfo
WavIMAAudioInfo &audioInfo() {
return headerInfo;
}
protected:
struct WavIMAAudioInfo headerInfo;
uint8_t chunk_buffer[28];
size_t chunk_len = 0;
size_t max_chunk_len = 8;
size_t skip_len = 0;
size_t data_pos = 0;
bool header_complete = false;
bool isFirstChunk = true;
uint32_t read_tag() {
uint32_t tag = getChar();
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
return tag;
}
uint32_t read_int32() {
uint32_t value = (uint32_t)getChar();
value |= (uint32_t)getChar() << 8;
value |= (uint32_t)getChar() << 16;
value |= (uint32_t)getChar() << 24;
return value;
}
uint16_t read_int16() {
uint16_t value = getChar();
value |= getChar() << 8;
return value;
}
void skip(int n) {
n = min((size_t)n, chunk_len - data_pos);
for (int i=0; i<n; i++) if (data_pos < chunk_len) data_pos++;
return;
}
int getChar() {
if (data_pos < chunk_len) return chunk_buffer[data_pos++];
else return -1;
}
void logInfo() {
LOGI("WavIMAHeader format: %d", headerInfo.format);
LOGI("WavIMAHeader channels: %d", headerInfo.channels);
LOGI("WavIMAHeader sample_rate: %d", headerInfo.sample_rate);
LOGI("WavIMAHeader block align: %d", headerInfo.block_align);
LOGI("WavIMAHeader bits_per_sample: %d", headerInfo.bits_per_sample);
}
};
/**
* @brief Obsolete: WavIMADecoder - based on WAVDecoder - We parse the header data as we receive it
* and send the sound data to the stream which was indicated in the constructor.
* Only WAV files with WAVE_FORMAT_IMA_ADPCM are supported by this codec!
*
* We recommend using the WAVDecoder with a corresponding ADPCMDecoder instead.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @author Norman Ritz
* @copyright GPLv3
*/
class WavIMADecoder : public AudioDecoder {
public:
/**
* @brief Construct a new WavIMADecoder object
*/
WavIMADecoder() {
TRACED();
}
/**
* @brief Construct a new WavIMADecoder object
*
* @param out_stream Output Stream to which we write the decoded result
*/
WavIMADecoder(Print &out_stream, bool active=true) {
TRACED();
this->out = &out_stream;
this->active = active;
}
/**
* @brief Construct a new WavIMADecoder object
*
* @param out_stream Output Stream to which we write the decoded result
* @param bi Object that will be notified about the Audio Formt (Changes)
*/
WavIMADecoder(Print &out_stream, AudioInfoSupport &bi) {
TRACED();
this->out = &out_stream;
addNotifyAudioChange(bi);
}
~WavIMADecoder() {
if (input_buffer != nullptr) delete[] input_buffer;
if (output_buffer != nullptr) delete[] output_buffer;
}
/// Defines the output Stream
void setOutput(Print &out_stream) {
this->out = &out_stream;
}
bool begin() {
TRACED();
ima_states[0].predictor = 0;
ima_states[0].step_index = 0;
ima_states[1].predictor = 0;
ima_states[1].step_index = 0;
isFirst = true;
active = true;
header.clearHeader();
return true;
}
void end() {
TRACED();
active = false;
}
const char* mime() {
return wav_ima_mime;
}
WavIMAAudioInfo &audioInfoEx() {
return header.audioInfo();
}
AudioInfo audioInfo() override {
return header.audioInfo();
}
virtual size_t write(const uint8_t *data, size_t len) {
TRACED();
if (active) {
if (isFirst) {
// we expect at least the full header
int written = header.write((uint8_t*)data, len);
if (written == IMA_ERR_INVALID_CONTAINER || written == IMA_ERR_INVALID_CHUNK) {
isValid = false;
isFirst = false;
LOGE("File is not valid");
return len;
}
if (!header.isDataComplete()) {
return len;
}
size_t len_open = len - written;
uint8_t *sound_ptr = (uint8_t *) data + written;
isFirst = false;
isValid = header.audioInfo().is_valid;
LOGI("WAV sample_rate: %d", header.audioInfo().sample_rate);
LOGI("WAV data_length: %u", (unsigned) header.audioInfo().data_length);
LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" : "false");
isValid = header.audioInfo().is_valid;
if (isValid) {
if (input_buffer != nullptr) delete[] input_buffer;
if (output_buffer != nullptr) delete[] output_buffer;
bytes_per_encoded_block = header.audioInfo().block_align;
bytes_per_decoded_block = header.audioInfo().frames_per_block * header.audioInfo().channels * 2;
samples_per_decoded_block = bytes_per_decoded_block >> 1;
input_buffer = new uint8_t[bytes_per_encoded_block];
output_buffer = new int16_t[samples_per_decoded_block];
// update sampling rate if the target supports it
AudioInfo bi;
bi.sample_rate = header.audioInfo().sample_rate;
bi.channels = header.audioInfo().channels;
bi.bits_per_sample = 16;
remaining_bytes = header.audioInfo().data_length;
notifyAudioChange(bi);
// write prm data from first record
LOGI("WavIMADecoder writing first sound data");
processInput(sound_ptr, len_open);
}
} else if (isValid) {
processInput((uint8_t*)data, len);
}
}
return len;
}
/// Alternative API which provides the data from an input stream
int readStream(Stream &in) {
TRACED();
uint8_t buffer[READ_BUFFER_SIZE];
int len = in.readBytes(buffer, READ_BUFFER_SIZE);
return write(buffer, len);
}
virtual operator bool() {
return active;
}
protected:
WavIMAHeader header;
Print *out;
bool isFirst = true;
bool isValid = true;
bool active;
uint8_t *input_buffer = nullptr;
int32_t input_pos = 0;
size_t remaining_bytes = 0;
size_t bytes_per_encoded_block = 0;
int16_t *output_buffer = nullptr;
size_t bytes_per_decoded_block = 0;
size_t samples_per_decoded_block = 0;
IMAState ima_states[2];
int16_t decodeSample(uint8_t sample, int channel = 0) {
int step_index = ima_states[channel].step_index;
int32_t step = ima_step_table[step_index];
step_index += ima_index_table[sample];
if (step_index < 0) step_index = 0;
else if (step_index > 88) step_index = 88;
ima_states[channel].step_index = step_index;
int32_t predictor = ima_states[channel].predictor;
uint8_t sign = sample & 8;
uint8_t delta = sample & 7;
int32_t diff = step >> 3;
if (delta & 4) diff += step;
if (delta & 2) diff += (step >> 1);
if (delta & 1) diff += (step >> 2);
if (sign) predictor -= diff;
else predictor += diff;
if (predictor < -32768) predictor = -32768;
else if (predictor > 32767) predictor = 32767;
ima_states[channel].predictor = predictor;
return (int16_t)predictor;
}
void decodeBlock(int channels) {
if (channels == 0 || channels > 2) return;
input_pos = 4;
int output_pos = 1;
ima_states[0].predictor = (int16_t)((input_buffer[1] << 8) + input_buffer[0]);
ima_states[0].step_index = input_buffer[2];
output_buffer[0] = ima_states[0].predictor;
if (channels == 2) {
ima_states[1].predictor = (int16_t)(input_buffer[5] << 8) + input_buffer[4];
ima_states[1].step_index = input_buffer[6];
output_buffer[1] = ima_states[1].predictor;
input_pos = 8;
output_pos = 2;
}
for (int i=0; i<samples_per_decoded_block-channels; i++) {
uint8_t sample = (i & 1) ? input_buffer[input_pos++] >> 4 : input_buffer[input_pos] & 15;
if (channels == 1) output_buffer[output_pos++] = decodeSample(sample);
else {
output_buffer[output_pos] = decodeSample(sample, (i >> 3) & 1);
output_pos += 2;
if ((i & 15) == 7) output_pos -= 15;
else if ((i & 15) == 15) output_pos--;
}
}
}
void processInput(const uint8_t* data, size_t size) {
int max_size = min(size, remaining_bytes);
for (int i=0; i<max_size; i++) {
input_buffer[input_pos++] = data[i];
if (input_pos == bytes_per_encoded_block) {
decodeBlock(header.audioInfo().channels);
input_pos = 0;
out->write((uint8_t*)output_buffer, bytes_per_decoded_block);
}
}
remaining_bytes -= max_size;
if (remaining_bytes == 0) active = false;
}
};
}

View File

@@ -0,0 +1,720 @@
#pragma once
#include <string.h>
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/AudioBasic/Str.h"
#include "AudioTools/AudioCodecs/AudioFormat.h"
#include "AudioTools/Video/Video.h"
#include "AudioTools/CoreAudio/Buffers.h"
#define LIST_HEADER_SIZE 12
#define CHUNK_HEADER_SIZE 8
namespace audio_tools {
/**
* @brief We try to keep the necessary buffer for parsing as small as possible,
* The data() method provides the start of the actual data and with consume
* we remove the processed data from the buffer to make space again.
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ParseBuffer {
public:
size_t writeArray(uint8_t *data, size_t len) {
int to_write = min(availableToWrite(), (size_t)len);
memmove(vector.data() + available_byte_count, data, to_write);
available_byte_count += to_write;
return to_write;
}
void consume(int size) {
memmove(vector.data(), &vector[size], available_byte_count - size);
available_byte_count -= size;
}
void resize(int size) { vector.resize(size + 4); }
uint8_t *data() { return vector.data(); }
size_t availableToWrite() { return size() - available_byte_count; }
size_t available() { return available_byte_count; }
void clear() {
available_byte_count = 0;
memset(vector.data(), 0, vector.size());
}
bool isEmpty() { return available_byte_count == 0; }
size_t size() { return vector.size(); }
long indexOf(const char *str) {
uint8_t *ptr = (uint8_t *)memmem(vector.data(), available_byte_count, str,
strlen(str));
return ptr == nullptr ? -1l : ptr - vector.data();
}
protected:
Vector<uint8_t> vector{0};
size_t available_byte_count = 0;
};
/// @brief Four-character code identifier for AVI format
/// @ingroup codecs
using FOURCC = char[4];
struct AVIMainHeader {
// FOURCC fcc;
// uint32_t cb;
uint32_t dwMicroSecPerFrame;
uint32_t dwMaxBytesPerSec;
uint32_t dwPaddingGranularity;
uint32_t dwFlags;
uint32_t dwTotalFrames;
uint32_t dwInitialFrames;
uint32_t dwStreams;
uint32_t dwSuggestedBufferSize;
uint32_t dwWidth;
uint32_t dwHeight;
uint32_t dwReserved[4];
};
struct RECT {
uint32_t dwWidth;
uint32_t dwHeight;
};
struct AVIStreamHeader {
FOURCC fccType;
FOURCC fccHandler;
uint32_t dwFlags;
uint16_t wPriority;
uint16_t wLanguage;
uint32_t dwInitialFrames;
uint32_t dwScale;
uint32_t dwRate;
uint32_t dwStart;
uint32_t dwLength;
uint32_t dwSuggestedBufferSize;
uint32_t dwQuality;
uint32_t dwSampleSize;
RECT rcFrame;
};
struct BitmapInfoHeader {
uint32_t biSize;
uint64_t biWidth;
uint64_t biHeight;
uint16_t biPlanes;
uint16_t biBitCount;
uint32_t biCompression;
uint32_t biSizeImage;
uint64_t biXPelsPerMeter;
uint64_t biYPelsPerMeter;
uint32_t biClrUsed;
uint32_t biClrImportant;
};
struct WAVFormatX {
AudioFormat wFormatTag;
uint16_t nChannels;
uint32_t nSamplesPerSec;
uint32_t nAvgBytesPerSec;
uint16_t nBlockAlign;
uint16_t wBitsPerSample;
uint16_t cbSize;
};
// struct WAVFormat {
// uint16_t wFormatTag;
// uint16_t nChannels;
// uint32_t nSamplesPerSec;
// uint32_t nAvgBytesPerSec;
// uint16_t nBlockAlign;
// };
enum StreamContentType { Audio, Video };
enum ParseObjectType { AVIList, AVIChunk, AVIStreamData };
enum ParseState {
ParseHeader,
ParseHdrl,
ParseAvih,
ParseStrl,
SubChunkContinue,
SubChunk,
ParseRec,
ParseStrf,
AfterStrf,
ParseMovi,
ParseIgnore,
};
/**
* @brief Represents a LIST or a CHUNK: The ParseObject represents the
* current parsing result. We just keep position information and ids
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ParseObject {
public:
void set(size_t currentPos, StrView id, size_t size, ParseObjectType type) {
set(currentPos, id.c_str(), size, type);
}
void set(size_t currentPos, const char *id, size_t size,
ParseObjectType type) {
object_type = type;
data_size = size;
start_pos = currentPos;
// allign on word
if (size % 2 != 0) {
data_size++;
}
end_pos = currentPos + data_size + 4;
// save FOURCC
if (id != nullptr) {
memcpy(chunk_id, id, 4);
chunk_id[4] = 0;
}
open = data_size;
}
const char *id() { return chunk_id; }
size_t size() { return data_size; }
ParseObjectType type() { return object_type; }
bool isValid() {
switch (object_type) {
case AVIStreamData:
return isAudio() || isVideo();
case AVIChunk:
return open > 0;
case AVIList:
return true;
}
return false;
}
// for Chunk
AVIMainHeader *asAVIMainHeader(void *ptr) { return (AVIMainHeader *)ptr; }
AVIStreamHeader *asAVIStreamHeader(void *ptr) {
return (AVIStreamHeader *)ptr;
}
WAVFormatX *asAVIAudioFormat(void *ptr) { return (WAVFormatX *)ptr; }
BitmapInfoHeader *asAVIVideoFormat(void *ptr) {
return (BitmapInfoHeader *)ptr;
}
size_t open;
size_t end_pos;
size_t start_pos;
size_t data_size;
// for AVIStreamData
int streamNumber() {
return object_type == AVIStreamData ? (chunk_id[1] << 8) | chunk_id[0] : 0;
}
bool isAudio() {
return object_type == AVIStreamData
? chunk_id[2] == 'w' && chunk_id[3] == 'b'
: false;
}
bool isVideoUncompressed() {
return object_type == AVIStreamData
? chunk_id[2] == 'd' && chunk_id[3] == 'b'
: false;
}
bool isVideoCompressed() {
return object_type == AVIStreamData
? chunk_id[2] == 'd' && chunk_id[3] == 'c'
: false;
}
bool isVideo() { return isVideoCompressed() || isVideoUncompressed(); }
protected:
// ParseBuffer data_buffer;
char chunk_id[5] = {};
ParseObjectType object_type;
};
/**
* @brief AVI Container Decoder which can be fed with small chunks of data. The
* minimum length must be bigger then the header size! The file structure is
* documented at
* https://learn.microsoft.com/en-us/windows/win32/directshow/avi-riff-file-reference
* @ingroup codecs
* @ingroup decoder
* @ingroup video
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AVIDecoder : public ContainerDecoder {
public:
AVIDecoder(int bufferSize = 1024) {
parse_buffer.resize(bufferSize);
p_decoder = &copy_decoder;
p_output_audio = new EncodedAudioOutput(&copy_decoder);
}
AVIDecoder(AudioDecoder *audioDecoder, VideoOutput *videoOut = nullptr,
int bufferSize = 1024) {
parse_buffer.resize(bufferSize);
p_decoder = audioDecoder;
p_output_audio = new EncodedAudioOutput(audioDecoder);
if (videoOut != nullptr) {
setOutputVideoStream(*videoOut);
}
}
~AVIDecoder() {
if (p_output_audio != nullptr)
delete p_output_audio;
}
bool begin() override {
parse_state = ParseHeader;
header_is_avi = false;
is_parsing_active = true;
current_pos = 0;
header_is_avi = false;
stream_header_idx = -1;
is_metadata_ready = false;
return true;
}
/// Defines the audio output stream - usually called by EncodedAudioStream
virtual void setOutput(Print &out_stream) override {
// p_output_audio = &out_stream;
p_output_audio->setOutput(&out_stream);
}
///
void setMute(bool mute) { is_mute = mute; }
virtual void setOutputVideoStream(VideoOutput &out_stream) {
p_output_video = &out_stream;
}
virtual size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", (int)len);
int result = parse_buffer.writeArray((uint8_t *)data, len);
if (is_parsing_active) {
// we expect the first parse to succeed
if (parse()) {
// if so we process the parse_buffer
while (parse_buffer.available() > 4) {
if (!parse())
break;
}
} else {
LOGD("Parse Error");
parse_buffer.clear();
result = len;
is_parsing_active = false;
}
}
return result;
}
operator bool() override { return is_parsing_active; }
void end() override { is_parsing_active = false; };
/// Provides the information from the main header chunk
AVIMainHeader mainHeader() { return main_header; }
/// Provides the information from the stream header chunks
AVIStreamHeader streamHeader(int idx) { return stream_header[idx]; }
/// Provides the video information
BitmapInfoHeader aviVideoInfo() { return video_info; };
const char *videoFormat() { return video_format; }
/// Provides the audio information
WAVFormatX aviAudioInfo() { return audio_info; }
/// Provides the audio_info.wFormatTag
AudioFormat audioFormat() { return audio_info.wFormatTag; }
/// Returns true if all metadata has been parsed and is available
bool isMetadataReady() { return is_metadata_ready; }
/// Register a validation callback which is called after parsing just before
/// playing the audio
void setValidationCallback(bool (*cb)(AVIDecoder &avi)) {
validation_cb = cb;
}
/// Provide the length of the video in seconds
int videoSeconds() { return video_seconds; }
/// Replace the synchronization logic with your implementation
void setVideoAudioSync(VideoAudioSync *yourSync) { p_synch = yourSync; }
protected:
bool header_is_avi = false;
bool is_parsing_active = true;
ParseState parse_state = ParseHeader;
ParseBuffer parse_buffer;
AVIMainHeader main_header;
int stream_header_idx = -1;
Vector<AVIStreamHeader> stream_header;
BitmapInfoHeader video_info;
WAVFormatX audio_info;
Vector<StreamContentType> content_types;
Stack<ParseObject> object_stack;
ParseObject current_stream_data;
EncodedAudioOutput *p_output_audio = nullptr;
VideoOutput *p_output_video = nullptr;
long open_subchunk_len = 0;
long current_pos = 0;
long movi_end_pos = 0;
Str spaces;
Str str;
char video_format[5] = {0};
bool is_metadata_ready = false;
bool (*validation_cb)(AVIDecoder &avi) = nullptr;
bool is_mute = false;
CopyDecoder copy_decoder;
AudioDecoder *p_decoder = nullptr;
int video_seconds = 0;
VideoAudioSync defaultSynch;
VideoAudioSync *p_synch = &defaultSynch;
bool isCurrentStreamAudio() {
return strncmp(stream_header[stream_header_idx].fccType, "auds", 4) == 0;
}
bool isCurrentStreamVideo() {
return strncmp(stream_header[stream_header_idx].fccType, "vids", 4) == 0;
}
// we return true if at least one parse step was successful
bool parse() {
bool result = true;
switch (parse_state) {
case ParseHeader: {
result = parseHeader();
if (result)
parse_state = ParseHdrl;
} break;
case ParseHdrl: {
ParseObject hdrl = parseList("hdrl");
result = hdrl.isValid();
if (result) {
parse_state = ParseAvih;
}
} break;
case ParseAvih: {
ParseObject avih = parseChunk("avih");
result = avih.isValid();
if (result) {
main_header = *(avih.asAVIMainHeader(parse_buffer.data()));
stream_header.resize(main_header.dwStreams);
consume(avih.size());
parse_state = ParseStrl;
}
} break;
case ParseStrl: {
ParseObject strl = parseList("strl");
ParseObject strh = parseChunk("strh");
stream_header[++stream_header_idx] =
*(strh.asAVIStreamHeader(parse_buffer.data()));
consume(strh.size());
parse_state = ParseStrf;
} break;
case ParseStrf: {
ParseObject strf = parseChunk("strf");
if (isCurrentStreamAudio()) {
audio_info = *(strf.asAVIAudioFormat(parse_buffer.data()));
setupAudioInfo();
LOGI("audioFormat: %d (%x)", (int)audioFormat(),(int)audioFormat());
content_types.push_back(Audio);
consume(strf.size());
} else if (isCurrentStreamVideo()) {
video_info = *(strf.asAVIVideoFormat(parse_buffer.data()));
setupVideoInfo();
LOGI("videoFormat: %s", videoFormat());
content_types.push_back(Video);
video_format[4] = 0;
consume(strf.size());
} else {
result = false;
}
parse_state = AfterStrf;
} break;
case AfterStrf: {
// ignore all data until we find a new List
int pos = parse_buffer.indexOf("LIST");
if (pos >= 0) {
consume(pos);
ParseObject tmp = tryParseList();
if (StrView(tmp.id()).equals("strl")) {
parse_state = ParseStrl;
} else if (StrView(tmp.id()).equals("movi")) {
parse_state = ParseMovi;
} else {
// e.g. ignore info
consume(tmp.size() + LIST_HEADER_SIZE);
}
} else {
// no valid data, so throw it away, we keep the last 4 digits in case
// if it contains the beginning of a LIST
cleanupStack();
consume(parse_buffer.available() - 4);
}
} break;
case ParseMovi: {
ParseObject movi = tryParseList();
if (StrView(movi.id()).equals("movi")) {
consume(LIST_HEADER_SIZE);
is_metadata_ready = true;
if (validation_cb)
is_parsing_active = (validation_cb(*this));
processStack(movi);
movi_end_pos = movi.end_pos;
parse_state = SubChunk;
// trigger new write
result = false;
}
} break;
case SubChunk: {
// rec is optinal
ParseObject hdrl = tryParseList();
if (StrView(hdrl.id()).equals("rec")) {
consume(CHUNK_HEADER_SIZE);
processStack(hdrl);
}
current_stream_data = parseAVIStreamData();
parse_state = SubChunkContinue;
open_subchunk_len = current_stream_data.open;
if (current_stream_data.isVideo()) {
LOGI("video:[%d]->[%d]", (int)current_stream_data.start_pos,
(int)current_stream_data.end_pos);
if (p_output_video != nullptr)
p_output_video->beginFrame(current_stream_data.open);
} else if (current_stream_data.isAudio()) {
LOGI("audio:[%d]->[%d]", (int)current_stream_data.start_pos,
(int)current_stream_data.end_pos);
} else {
LOGW("unknown subchunk at %d", (int)current_pos);
}
} break;
case SubChunkContinue: {
writeData();
if (open_subchunk_len == 0) {
if (current_stream_data.isVideo() && p_output_video != nullptr) {
uint32_t time_used_ms = p_output_video->endFrame();
p_synch->delayVideoFrame(main_header.dwMicroSecPerFrame, time_used_ms);
}
if (tryParseChunk("idx").isValid()) {
parse_state = ParseIgnore;
} else if (tryParseList("rec").isValid()) {
parse_state = ParseRec;
} else {
if (current_pos >= movi_end_pos) {
parse_state = ParseIgnore;
} else {
parse_state = SubChunk;
}
}
}
} break;
case ParseIgnore: {
LOGD("ParseIgnore");
parse_buffer.clear();
} break;
default:
result = false;
break;
}
return result;
}
void setupAudioInfo() {
info.channels = audio_info.nChannels;
info.bits_per_sample = audio_info.wBitsPerSample;
info.sample_rate = audio_info.nSamplesPerSec;
info.logInfo();
// adjust the audio info if necessary
if (p_decoder != nullptr) {
p_decoder->setAudioInfo(info);
info = p_decoder->audioInfo();
}
notifyAudioChange(info);
}
void setupVideoInfo() {
memcpy(video_format, stream_header[stream_header_idx].fccHandler, 4);
AVIStreamHeader *vh = &stream_header[stream_header_idx];
if (vh->dwScale <= 0) {
vh->dwScale = 1;
}
int rate = vh->dwRate / vh->dwScale;
video_seconds = rate <= 0 ? 0 : vh->dwLength / rate;
LOGI("videoSeconds: %d seconds", video_seconds);
}
void writeData() {
long to_write = min((long)parse_buffer.available(), open_subchunk_len);
if (current_stream_data.isAudio()) {
LOGD("audio %d", (int)to_write);
if (!is_mute){
p_synch->writeAudio(p_output_audio, parse_buffer.data(), to_write);
}
open_subchunk_len -= to_write;
cleanupStack();
consume(to_write);
} else if (current_stream_data.isVideo()) {
LOGD("video %d", (int)to_write);
if (p_output_video != nullptr)
p_output_video->write(parse_buffer.data(), to_write);
open_subchunk_len -= to_write;
cleanupStack();
consume(to_write);
}
}
// 'RIFF' fileSize fileType (data)
bool parseHeader() {
bool header_is_avi = false;
int headerSize = 12;
if (getStr(0, 4).equals("RIFF")) {
ParseObject result;
uint32_t header_file_size = getInt(4);
header_is_avi = getStr(8, 4).equals("AVI ");
result.set(current_pos, "AVI ", header_file_size, AVIChunk);
processStack(result);
consume(headerSize);
} else {
LOGE("parseHeader");
}
return header_is_avi;
}
/// We parse a chunk and provide the FOURCC id and size: No content data is
/// stored
ParseObject tryParseChunk() {
ParseObject result;
result.set(current_pos, getStr(0, 4), 0, AVIChunk);
return result;
}
/// We try to parse the indicated chunk and determine the size: No content
/// data is stored
ParseObject tryParseChunk(const char *id) {
ParseObject result;
if (getStr(0, 4).equals(id)) {
result.set(current_pos, id, 0, AVIChunk);
}
return result;
}
ParseObject tryParseList(const char *id) {
ParseObject result;
StrView &list_id = getStr(8, 4);
if (list_id.equals(id) && getStr(0, 3).equals("LIST")) {
result.set(current_pos, getStr(8, 4), getInt(4), AVIList);
}
return result;
}
/// We try to parse the actual state for any list
ParseObject tryParseList() {
ParseObject result;
if (getStr(0, 4).equals("LIST")) {
result.set(current_pos, getStr(8, 4), getInt(4), AVIList);
}
return result;
}
/// We load the indicated chunk from the current data
ParseObject parseChunk(const char *id) {
ParseObject result;
int chunk_size = getInt(4);
if (getStr(0, 4).equals(id) && parse_buffer.size() >= chunk_size) {
result.set(current_pos, id, chunk_size, AVIChunk);
processStack(result);
consume(CHUNK_HEADER_SIZE);
}
return result;
}
/// We load the indicated list from the current data
ParseObject parseList(const char *id) {
ParseObject result;
if (getStr(0, 4).equals("LIST") && getStr(8, 4).equals(id)) {
int size = getInt(4);
result.set(current_pos, id, size, AVIList);
processStack(result);
consume(LIST_HEADER_SIZE);
}
return result;
}
ParseObject parseAVIStreamData() {
ParseObject result;
int size = getInt(4);
result.set(current_pos, getStr(0, 4), size, AVIStreamData);
if (result.isValid()) {
processStack(result);
consume(8);
}
return result;
}
void processStack(ParseObject &result) {
cleanupStack();
object_stack.push(result);
spaces.setChars(' ', object_stack.size());
LOGD("%s - %s (%d-%d) size:%d", spaces.c_str(), result.id(),
(int)result.start_pos, (int)result.end_pos, (int)result.data_size);
}
void cleanupStack() {
ParseObject current;
// make sure that we remove the object from the stack of we past the end
object_stack.peek(current);
while (current.end_pos <= current_pos) {
object_stack.pop(current);
object_stack.peek(current);
}
}
/// Provides the string at the indicated byte offset with the indicated length
StrView &getStr(int offset, int len) {
str.setCapacity(len + 1);
const char *data = (const char *)parse_buffer.data();
str.copyFrom((data + offset), len, 5);
return str;
}
/// Provides the int32 at the indicated byte offset
uint32_t getInt(int offset) {
uint32_t *result = (uint32_t *)(parse_buffer.data() + offset);
return *result;
}
/// We remove the processed bytes from the beginning of the buffer
void consume(int len) {
current_pos += len;
parse_buffer.consume(len);
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,407 @@
/**
* @file ContainerBinary.h
* @author Phil Schatzmann
* @brief A lean and efficient container format which provides Header records
* with audio info, Audio records with the audio and Meta which
* can contain any additional information. This can be used together with a
* codec which does not transmit the audio information or has variable frame
* lengths. We expect that a single write() is providing full frames.
*
* @version 0.1
* @date 2022-05-04
*
* @copyright Copyright (c) 2022
*
*/
#pragma once
#include <string.h>
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
namespace audio_tools {
enum class ContainerType : uint8_t {
Header = 1,
Audio = 2,
Meta = 3,
Undefined = 0
};
struct CommonHeader {
CommonHeader() = default;
CommonHeader(ContainerType type, uint16_t len) {
this->type = type;
this->len = len;
}
char header[2] = {'\r','\n'};
ContainerType type;
uint16_t len;
uint8_t checksum = 0;
};
struct SimpleContainerConfig {
SimpleContainerConfig() = default;
CommonHeader common{ContainerType::Header, sizeof(AudioInfo)};
AudioInfo info;
};
struct SimpleContainerDataHeader {
CommonHeader common{ContainerType::Audio, 0};
};
struct SimpleContainerMetaDataHeader {
CommonHeader common{ContainerType::Meta, 0};
};
// struct ProcessedResult {
// ContainerType type = ContainerType::Undefined;
// // total length incl header
// int total_len = 0;
// // processed bytes incl header of last step
// int processed = 0;
// // still (total) open
// int open = 0;
// };
/// @brief Calculates the checksum
static uint8_t checkSum(const uint8_t *data, size_t len) {
uint8_t result = 0;
for (int j = 0; j < len; j++) {
result ^= data[j];
}
return result;
}
/// @brief Error types
enum BinaryContainerEncoderError { InvalidHeader, InvalidChecksum, DataMissing};
/**
* @brief Wraps the encoded data into Config, Data, and Meta segments so that we
* can recover the audio configuration and orignial segments if this is
* relevant. We assume that a full segment is written with each call of write();
* The segments are separated with a new line character.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class BinaryContainerEncoder : public AudioEncoder {
public:
BinaryContainerEncoder() = default;
BinaryContainerEncoder(AudioEncoder &encoder) { p_codec = &encoder; }
BinaryContainerEncoder(AudioEncoder *encoder) { p_codec = encoder; }
void setEncoder(AudioEncoder *encoder) { p_codec = encoder; }
void setOutput(Print &outStream) {
LOGD("BinaryContainerEncoder::setOutput");
p_out = &outStream;
}
bool begin() override {
TRACED();
// target.begin();
bool rc = p_codec->begin();
p_codec->setAudioInfo(cfg.info);
is_beginning = true;
return rc;
}
void setAudioInfo(AudioInfo info) override {
TRACED();
if (info != audioInfo()) {
cfg.info = info;
}
}
AudioInfo audioInfo() override { return cfg.info; }
/// Adds meta data segment
size_t writeMeta(const uint8_t *data, size_t len) {
LOGD("BinaryContainerEncoder::writeMeta: %d", (int)len);
meta.common.len = len + sizeof(SimpleContainerMetaDataHeader);
uint8_t tmp_array[meta.common.len];
memcpy(tmp_array, &meta, sizeof(meta));
memcpy(tmp_array + sizeof(meta), data, len);
output(tmp_array, meta.common.len);
return len;
}
/// Add data segment. On first write we also add a AudioInfo header
size_t write(const uint8_t *data, size_t len) {
LOGD("BinaryContainerEncoder::write: %d", (int)len);
if (is_beginning) {
writeHeader();
is_beginning = false;
}
writeAudio((uint8_t *)data, len);
return len;
}
void end() { p_codec->end(); }
operator bool() { return true; };
virtual const char *mime() { return "audio/binary"; };
protected:
uint64_t packet_count = 0;
bool is_beginning = true;
int repeat_header;
SimpleContainerConfig cfg;
SimpleContainerDataHeader dh;
SimpleContainerMetaDataHeader meta;
AudioEncoder *p_codec = nullptr;
Print *p_out = nullptr;
void writeAudio(const uint8_t *data, size_t len) {
LOGD("writeAudio: %d", (int)len);
// encode data
SingleBuffer<uint8_t> tmp_buffer{(int)len};
QueueStream<uint8_t> tmp{tmp_buffer};
tmp.begin();
p_codec->setOutput(tmp);
p_codec->write(data, len);
// output of audio data header
dh.common.len = tmp.available() + sizeof(CommonHeader);
dh.common.checksum = checkSum(tmp_buffer.data(), tmp_buffer.available());
output((uint8_t *)&dh, sizeof(dh));
// output of data
output(tmp_buffer.data(), tmp_buffer.available());
}
void writeHeader() {
LOGD("writeHeader");
output((uint8_t *)&cfg, sizeof(cfg));
}
size_t output(const uint8_t *data, size_t len) {
if (p_out != nullptr) {
int written = p_out->write((uint8_t *)data, len);
LOGD("output: %d -> %d", (int)len, written);
} else
LOGW("output not defined");
return len;
}
};
/**
* @brief Decodes the provided data from the DAT and CFG segments
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class BinaryContainerDecoder : public ContainerDecoder {
public:
BinaryContainerDecoder() = default;
BinaryContainerDecoder(AudioDecoder &decoder) { p_codec = &decoder; }
BinaryContainerDecoder(AudioDecoder *decoder) { p_codec = decoder; }
void setDecoder(AudioDecoder *decoder){
p_codec = decoder;
}
// Defines the output: this method is called 2 times: first to define
// output defined in the EnocdedAudioStream and then to define the
// real output in the output chain.
void setOutput(Print &outStream) {
LOGD("BinaryContainerDecoder::setOutput")
p_out = &outStream;
}
void setMetaCallback(void (*callback)(uint8_t*, int, void*)) {
meta_callback = callback;
}
bool begin() {
TRACED();
is_first = true;
return true;
}
void end() { TRACED(); }
size_t write(const uint8_t *data, size_t len) {
LOGD("write: %d", (int)len);
uint8_t *data8 = (uint8_t *)data;
if (buffer.size() < len) {
buffer.resize(
std::max(static_cast<int>(DEFAULT_BUFFER_SIZE + header_size),
static_cast<int>(len * 4 + header_size)));
}
size_t result = buffer.writeArray(data8, len);
while (parseBuffer())
;
return ignore_write_errors ? len : result;
}
operator bool() { return true; };
void addErrorHandler(void (*error_handler)(BinaryContainerEncoderError error, BinaryContainerDecoder* source, void* ref)){
this->error_handler = error_handler;
}
/// If set to true we do not expect a retry to write the missing data but continue just with the next. (Default is true);
void setIgnoreWriteErrors(bool flag){
ignore_write_errors = flag;
}
/// Provide additional information for callback
void setReference(void* ref){
reference = ref;
}
protected:
bool is_first = true;
CommonHeader header;
const size_t header_size = sizeof(header);
AudioDecoder *p_codec = nullptr;
SingleBuffer<uint8_t> buffer{0};
Print *p_out = nullptr;
void (*meta_callback)(uint8_t* data, int len, void* ref) = nullptr;
void (*error_handler)(BinaryContainerEncoderError error, BinaryContainerDecoder* source, void* ref) = nullptr;
bool ignore_write_errors = true;
void * reference = nullptr;
bool parseBuffer() {
LOGD("parseBuffer");
bool result = false;
StrView str{(const char *)buffer.data()};
int start = str.indexOf("\r\n");
LOGD("start: %d", start);
if (start < 0) {
return false;
}
// get next record
if (buffer.available() - start > sizeof(header)) {
// determine header
memmove((uint8_t *)&header, buffer.data() + start, sizeof(header));
// check header
if (!isValidHeader()) {
LOGW("invalid header: %d", header.type);
if (error_handler) error_handler(InvalidHeader, this, reference);
nextRecord();
return false;
};
if (buffer.available() - start >= header.len) {
// move to start of frame
buffer.clearArray(start);
// process frame
result = processData();
} else {
LOGD("not enough data - available %d / req: %d", buffer.available(),
header.len);
if (error_handler) error_handler(DataMissing, this, reference);
}
} else {
LOGD("not enough data for header: %d", buffer.available());
if (error_handler) error_handler(DataMissing, this, reference);
}
return result;
}
// processes the completed data from the buffer: e.g. writes it
bool processData() {
LOGD("processData");
bool rc = false;
switch (header.type) {
case ContainerType::Header: {
LOGD("Header");
SimpleContainerConfig config;
buffer.readArray((uint8_t *)&config, sizeof(config));
info = config.info;
notifyAudioChange(info);
info.logInfo();
p_codec->setAudioInfo(info);
p_codec->begin();
rc = true;
} break;
case ContainerType::Audio: {
LOGD("Audio");
buffer.clearArray(sizeof(header));
int data_len = header.len - header_size;
uint8_t crc = checkSum(buffer.data(), data_len);
if (header.checksum == crc) {
// decode
SingleBuffer<uint8_t> tmp_buffer{data_len * 5};
QueueStream<uint8_t> tmp{tmp_buffer};
tmp.begin();
p_codec->setOutput(tmp);
p_codec->write(buffer.data(), data_len);
// output decoded data
output(tmp_buffer.data(), tmp_buffer.available());
buffer.clearArray(data_len);
} else {
LOGW("invalid checksum");
if (error_handler) error_handler(InvalidChecksum, this, reference);
// move to next record
nextRecord();
return false;
}
rc = true;
} break;
case ContainerType::Meta: {
LOGD("Meta");
buffer.clearArray(sizeof(header));
int data_len = header.len - header_size;
if (meta_callback != nullptr) {
meta_callback(buffer.data(), data_len, reference);
}
buffer.clearArray(data_len);
rc = true;
} break;
}
return rc;
}
bool isValidHeader() {
switch (header.type) {
case ContainerType::Header:
return header.checksum == 0;
case ContainerType::Audio:
return true;
case ContainerType::Meta:
return header.checksum == 0;
}
return false;
}
uint8_t peekBufferValue(){
uint8_t byte_value=0;
buffer.peek(byte_value);
return byte_value;
}
void nextRecord() {
TRACED();
uint8_t byte_value;
while (buffer.available() && peekBufferValue() != '\n')
buffer.read(byte_value);
}
// writes the data to the decoder which forwards it to the output; if there
// is no coded we write to the output instead
size_t output(uint8_t *data, size_t len) {
LOGD("output: %d", (int)len);
if (p_out != nullptr)
p_out->write((uint8_t *)data, len);
else
LOGW("output not defined");
return len;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,174 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/M4AAudioDemuxer.h"
#include "AudioTools/AudioCodecs/MultiDecoder.h"
namespace audio_tools {
/**
* @brief M4A Demuxer that extracts audio from M4A/MP4 containers.
* The audio is decoded into pcm with the help of the provided decoder.
* format.
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class ContainerM4A : public ContainerDecoder {
public:
/**
* @brief Default constructor: If no decoder is provided, the
* raw audio data is provided to the defined output.
*/
ContainerM4A() {
demux.setReference(this);
demux.setCallback(decodeAudio);
};
/**
* @brief Constructor with decoder. Sets up the demuxer and decoder
* notification.
* @param decoder Reference to a MultiDecoder for PCM output.
*/
ContainerM4A(MultiDecoder& decoder) : ContainerM4A() { setDecoder(decoder); }
/**
* @brief Set the output stream for decoded or raw audio.
* @param out_stream Output AudioStream.
*/
void setOutput(Print& out_stream) override {
if (p_decoder != nullptr && p_decoder->getOutput()!=&out_stream) {
p_decoder->setOutput(out_stream);
}
ContainerDecoder::setOutput(out_stream);
}
/**
* @brief Returns true if the result is PCM (decoder is present).
* @return true if PCM output, false otherwise.
*/
bool isResultPCM() override { return p_decoder != nullptr ? true : false; }
/**
* @brief Initialize the demuxer and decoder.
* @return true on success.
*/
bool begin() override {
demux.begin();
if (p_decoder) p_decoder->begin();
is_active = true;
return true;
}
/**
* @brief End the demuxer and decoder, releasing resources.
*/
void end() override {
TRACED();
is_active = false;
is_magic_cookie_processed = false;
if (p_decoder) p_decoder->end();
}
/**
* @brief Feed data to the demuxer for parsing.
* @param data Pointer to input data.
* @param len Length of input data.
* @return Number of bytes processed (always len).
*/
size_t write(const uint8_t* data, size_t len) override {
if (is_active == false) return len;
demux.write(data, len);
return len;
}
/**
* @brief Returns true if the demuxer is active.
* @return true if active, false otherwise.
*/
operator bool() override { return is_active; }
/**
* @brief Sets the buffer to use for sample sizes.
* You can use this to provide a custom buffer that
* does not rely on RAM (e.g a file based buffer or
* one using Redis)
* @param buffer Reference to the buffer to use.
*/
virtual void setSampleSizesBuffer(BaseBuffer<stsz_sample_size_t>& buffer) {
demux.setSampleSizesBuffer(buffer);
}
/**
* @brief Sets the buffer to use for sample sizes. This is currently
* not used!
* @param buffer Reference to the buffer to use.
*/
virtual void setChunkOffsetsBuffer(BaseBuffer<uint32_t>& buffer) {
demux.setChunkOffsetsBuffer(buffer);
}
/**
* @brief Sets the decoder to use for audio frames.
* @param decoder Reference to a MultiDecoder for PCM output.
* @return true if set successfully, false otherwise.
*/
bool setDecoder(MultiDecoder& decoder) {
p_decoder = &decoder;
p_decoder->addNotifyAudioChange(*this);
return true;
}
M4AAudioDemuxer& getDemuxer() {
return demux;
}
protected:
bool is_active = false; ///< True if demuxer is active.
bool is_magic_cookie_processed =
false; ///< True if ALAC magic cookie has been processed.
MultiDecoder* p_decoder = nullptr; ///< Pointer to the MultiDecoder.
M4AAudioDemuxer demux; ///< Internal demuxer instance.
/**
* @brief Static callback for demuxed audio frames.
* Handles decoder selection and magic cookie for ALAC.
* @param frame The demuxed audio frame.
* @param ref Reference to the ContainerM4A instance.
*/
static void decodeAudio(const M4AAudioDemuxer::Frame& frame, void* ref) {
ContainerM4A* self = static_cast<ContainerM4A*>(ref);
if (self->p_decoder == nullptr) {
self->p_print->write(frame.data, frame.size);
return;
}
MultiDecoder& dec = *(self->p_decoder);
const char* old_mime = dec.selectedMime();
// select decoder based on mime type
if (!dec.selectDecoder(frame.mime)) {
const char* mime = frame.mime ? frame.mime : "(nullptr)";
LOGE("No decoder found for mime type: %s", mime);
return;
}
// for ALAC only: process magic cookie if not done yet
if (StrView(frame.mime) == "audio/alac" &&
!self->is_magic_cookie_processed) {
auto& magic_cookie = self->demux.getALACMagicCookie();
if (magic_cookie.size() > 0) {
if (!dec.setCodecConfig(magic_cookie.data(), magic_cookie.size())) {
LOGE("Failed to set ALAC magic cookie for decoder: %s",
dec.selectedMime());
}
}
self->is_magic_cookie_processed = true;
}
// write encoded data to decoder
dec.write(frame.data, frame.size);
// restore previous decoder
dec.selectDecoder(old_mime);
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,331 @@
/**
* @file ContainerOSC.h
* @author Phil Schatzmann
* @brief A simple container format which uses OSC messages to
* tramsmit Header records with audio info and Audio records with the audio
* data.
*
* @version 0.1
* @date 2025-05-20
*
* @copyright Copyright (c) 2022
*
*/
#pragma once
#include <string.h>
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/MultiDecoder.h"
#include "AudioTools/Communication/OSCData.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
namespace audio_tools {
/**
* @brief Wraps the encoded data into OSC info and data segments so that the
* receiver can recover the audio configuration and orignial segments.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OSCContainerEncoder : public AudioEncoder {
public:
OSCContainerEncoder() = default;
OSCContainerEncoder(AudioEncoder &encoder) { p_codec = &encoder; }
void setEncoder(AudioEncoder *encoder) { p_codec = encoder; }
void setOutput(Print &outStream) { p_out = &outStream; }
bool begin() override {
TRACED();
if (p_codec == nullptr) return false;
osc_out.setOutput(*p_out);
osc_out.begin();
p_codec->setOutput(osc_out);
p_codec->setAudioInfo(audioInfo());
is_active = p_codec->begin();
writeAudioInfo(audioInfo(), p_codec->mime());
return is_active;
}
void setAudioInfo(AudioInfo info) override {
TRACED();
if (is_active) writeAudioInfo(audioInfo(), p_codec->mime());
AudioWriter::setAudioInfo(info);
}
/// Add data segment. On first write we also add a AudioInfo header
size_t write(const uint8_t *data, size_t len) {
LOGD("OSCContainerEncoder::write: %d", (int)len);
if ((repeat_info > 0) && (packet_count % repeat_info == 0)) {
writeAudioInfo(audioInfo(), p_codec->mime());
}
p_codec->write(data, len);
packet_count++;
return len;
}
void end() {
p_codec->end();
is_active = false;
}
operator bool() { return is_active; };
virtual const char *mime() { return "audio/OSC"; };
/// Activate/deactivate the sending of the audio info
void setInfoActive(bool flag) { is_send_info_active = flag; }
/// Automatically resend audio info ever nth write.
void setRepeatInfoEvery(int packet_count) {
this->repeat_info = packet_count;
}
/// Returns the sequence number of the next packet
uint64_t getSequenceNumber() { return osc_out.getSequenceNumber(); }
/// Define a reference object to be provided by the callback
void setReference(void *ref) { osc_out.setReference(ref); }
/// Get informed about the encoded packages
void setEncodedWriteCallback(void (*write_callback)(uint8_t *data, size_t len,
uint64_t seq,
void *ref)) {
osc_out.setEncodedWriteCallback(write_callback);
}
/// Resend the encoded data
size_t resendEncodedData(uint8_t *data, size_t len, uint64_t seq) {
return osc_out.write(data, len, seq);
}
protected:
uint64_t packet_count = 0;
int repeat_info = 0;
bool is_active = false;
bool is_send_info_active = true;
AudioEncoder *p_codec = nullptr;
Print *p_out = nullptr;
/// Output Encoded Audio via OSC
class OSCOutput : public AudioOutput {
public:
void setReference(void *ref) { this->ref = ref; }
void setOutput(Print &outStream) { p_out = &outStream; }
void setEncodedWriteCallback(void (*write_callback)(
uint8_t *data, size_t len, uint64_t seq, void *ref)) {
this->encoded_write_callback = write_callback;
}
uint64_t getSequenceNumber() { return sequence_number; }
bool begin() {
sequence_number = 0;
return true;
}
size_t write(const uint8_t *data, size_t len) override {
size_t result = write(data, len);
sequence_number++;
return result;
}
size_t write(const uint8_t *data, size_t len, uint64_t seq) {
LOGD("writeAudio: %d", (int)len);
if (encoded_write_callback != nullptr) {
encoded_write_callback((uint8_t *)data, len, sequence_number, ref);
}
uint8_t osc_data[len + 20]; // 20 is guess to cover address & fmt
OSCData osc{osc_data, sizeof(osc_data)};
osc.setAddress("/audio/data");
osc.setFormat("ttb");
osc.write((uint64_t)millis());
// we use a uint64_t for a sequence number
osc.write(sequence_number);
osc.write(data, len);
p_out->write(osc_data, osc.size());
return len;
}
protected:
void (*encoded_write_callback)(uint8_t *data, size_t len, uint64_t seq,
void *ref) = nullptr;
Print *p_out = nullptr;
uint64_t sequence_number = 0;
void *ref = nullptr;
} osc_out;
/// OUtput AudioInfo via OSC
void writeAudioInfo(AudioInfo info, const char *mime) {
if (is_send_info_active) {
LOGD("writeAudioInfo");
uint8_t osc_data[100];
OSCData osc{osc_data, sizeof(osc_data)};
osc.setAddress("/audio/info");
osc.setFormat("iiis");
osc.write((int32_t)info.sample_rate);
osc.write((int32_t)info.channels);
osc.write((int32_t)info.bits_per_sample);
osc.write(mime);
p_out->write(osc_data, osc.size());
}
}
};
/**
* @brief Decodes the provided data from the OSC segments. I recommend to
* assign a MultiDecoder so that we can support muiltiple audio types.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OSCContainerDecoder : public ContainerDecoder {
public:
OSCContainerDecoder() = default;
OSCContainerDecoder(AudioDecoder &decoder) {
setDecoder(decoder);
}
OSCContainerDecoder(MultiDecoder &decoder) {
setDecoder(decoder);
}
/// Defines the decoder to be used
void setDecoder(AudioDecoder &decoder) { p_codec = &decoder; }
/// Defines the decoder to be used: special logic for multidecoder
void setDecoder(MultiDecoder &decoder) {
p_codec = &decoder;
is_multi_decoder = true;
}
/// Optionally define you own OSCData object
void setOSCData(OSCData &osc) { p_osc = &osc; }
void setOutput(Print &outStream) {
LOGD("OSCContainerDecoder::setOutput")
p_out = &outStream;
}
bool begin() {
TRACED();
if (p_codec == nullptr || p_osc == nullptr) return false;
p_osc->setReference(this);
p_osc->addCallback("/audio/info", parseInfo, OSCCompare::StartsWith);
p_osc->addCallback("/audio/data", parseData, OSCCompare::StartsWith);
is_active = true;
return true;
}
void end() { is_active = false; }
size_t write(const uint8_t *data, size_t len) {
if (!is_active) return 0;
LOGD("write: %d", (int)len);
if (!p_osc->parse((uint8_t *)data, len)) {
return 0;
}
return len;
}
operator bool() { return is_active; };
/// Provides the mime type from the encoder
const char *mime() { return mime_str.c_str(); };
/// Provides the sequence number of the last packet
uint64_t getSequenceNumber() { return seq_no; }
/// Adds an new parser callback for a specific address matching string
bool addParserCallback(const char *address,
bool (*callback)(OSCData &data, void *ref),
OSCCompare compare = OSCCompare::Matches) {
if (p_osc == nullptr) return false;
p_osc->addCallback(address, callback, compare);
return true;
}
/// Replace the write to the decoder with a callback:
void setWriteCallback(bool (*write_callback)(uint64_t time, uint64_t seq,
uint8_t *data, size_t len,
void *ref)) {
this->write_callback = write_callback;
}
/// Callback to be called when data is missing
void setMissingDataCallback(void (*missing_data_callback)(uint64_t from_seq,
uint64_t to_seq,
void *ref)) {
this->missing_data_callback = missing_data_callback;
}
/// Provide a reference object to the callback
void setReference(void *ref) { this->ref = ref; }
protected:
bool is_active = false;
bool is_multi_decoder = false;
AudioDecoder *p_codec = nullptr;
SingleBuffer<uint8_t> buffer{0};
Print *p_out = nullptr;
OSCData osc_default;
OSCData *p_osc = &osc_default;
Str mime_str;
uint64_t seq_no = 0;
/// Return false to complete the processing w/o writing to the decoder
bool (*write_callback)(uint64_t time, uint64_t seq, uint8_t *data, size_t len,
void *ref) = nullptr;
void (*missing_data_callback)(uint64_t from_seq, uint64_t to_seq,
void *ref) = missingDataCallback;
void *ref = nullptr;
/// Default callback for missing data: just log the missing range
static void missingDataCallback(uint64_t from_seq, uint64_t to_seq,
void *ref) {
LOGW("Missing sequence numbers %d - %d", from_seq, to_seq);
}
static bool parseData(OSCData &osc, void *ref) {
uint64_t time = osc.readTime();
uint64_t seq = osc.readTime();
OSCBinaryData data = osc.readData();
OSCContainerDecoder *self = static_cast<OSCContainerDecoder *>(ref);
// Check for missing sequence numbers
if (self->seq_no + 1 != seq) {
self->missing_data_callback(self->seq_no + 1, seq - 1, self->ref);
}
// store the actual sequence number
self->seq_no = seq;
// call write callbak if defined
if (self->write_callback != nullptr) {
bool ok = self->write_callback(time, seq, data.data, data.len, ref);
if (!ok) return true;
}
// output to decoder
if (self->p_codec != nullptr) {
self->p_codec->write(data.data, data.len);
}
return true;
}
static bool parseInfo(OSCData &osc, void *ref) {
AudioInfo info;
info.sample_rate = osc.readInt32();
info.channels = osc.readInt32();
info.bits_per_sample = osc.readInt32();
const char *mime = osc.readString();
OSCContainerDecoder *self = static_cast<OSCContainerDecoder *>(ref);
if (self != nullptr) {
self->setAudioInfo(info);
self->mime_str = mime;
LOGI("mime: %s", mime);
// select the right decoder based on the mime type
if (self->is_multi_decoder)
static_cast<MultiDecoder*>(self->p_codec)->selectDecoder(mime);
}
return true;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,421 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/CodecOpus.h"
#include "AudioTools/CoreAudio/Buffers.h"
#include "oggz.h"
#define OGG_READ_SIZE (1024)
#define OGG_DEFAULT_BUFFER_SIZE (OGG_READ_SIZE)
// #define OGG_DEFAULT_BUFFER_SIZE (246)
// #define OGG_READ_SIZE (512)
namespace audio_tools {
/**
* @brief Decoder for Ogg Container. Decodes a packet from an Ogg
* container. The Ogg begin segment contains the AudioInfo structure. You can
* subclass and overwrite the beginOfSegment() method to implement your own
* headers
* Dependency: https://github.com/pschatzmann/arduino-libopus
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OggContainerDecoder : public ContainerDecoder {
public:
/**
* @brief Construct a new OggContainerDecoder object
*/
OggContainerDecoder() {
p_codec = &dec_copy;
out.setDecoder(p_codec);
}
OggContainerDecoder(AudioDecoder *decoder) { setDecoder(decoder); }
OggContainerDecoder(AudioDecoder &decoder) { setDecoder(&decoder); }
void setDecoder(AudioDecoder *decoder) {
p_codec = decoder;
out.setDecoder(p_codec);
}
/// Defines the output Stream
void setOutput(Print &print) override { out.setOutput(&print); }
void addNotifyAudioChange(AudioInfoSupport &bi) override {
out.addNotifyAudioChange(bi);
ContainerDecoder::addNotifyAudioChange(bi);
}
AudioInfo audioInfo() override { return out.audioInfo(); }
bool begin(AudioInfo info) override {
TRACED();
this->info = info;
return begin();
}
bool begin() override {
TRACED();
out.setAudioInfo(info);
out.begin();
if (p_oggz == nullptr) {
p_oggz = oggz_new(OGGZ_READ | OGGZ_AUTO); // OGGZ_NONSTRICT
is_open = true;
// Callback to Replace standard IO
if (oggz_io_set_read(p_oggz, ogg_io_read, this) != 0) {
LOGE("oggz_io_set_read");
is_open = false;
}
// Callback
if (oggz_set_read_callback(p_oggz, -1, read_packet, this) != 0) {
LOGE("oggz_set_read_callback");
is_open = false;
}
if (oggz_set_read_page(p_oggz, -1, read_page, this) != 0) {
LOGE("oggz_set_read_page");
is_open = false;
}
}
return is_open;
}
void end() override {
TRACED();
flush();
out.end();
is_open = false;
oggz_close(p_oggz);
p_oggz = nullptr;
}
void flush() {
LOGD("oggz_read...");
while ((oggz_read(p_oggz, OGG_READ_SIZE)) > 0)
;
}
virtual size_t write(const uint8_t *data, size_t len) override {
LOGD("write: %d", (int)len);
// fill buffer
size_t size_consumed = buffer.writeArray((uint8_t *)data, len);
if (buffer.availableForWrite() == 0) {
// Read all bytes into oggz, calling any read callbacks on the fly.
flush();
}
// write remaining bytes
if (size_consumed < len) {
size_consumed += buffer.writeArray((uint8_t *)data + size_consumed,
len - size_consumed);
flush();
}
return size_consumed;
}
virtual operator bool() override { return is_open; }
protected:
EncodedAudioOutput out;
CopyDecoder dec_copy;
AudioDecoder *p_codec = nullptr;
RingBuffer<uint8_t> buffer{OGG_DEFAULT_BUFFER_SIZE};
OGGZ *p_oggz = nullptr;
bool is_open = false;
long pos = 0;
// Final Stream Callback -> provide data to ogg
static size_t ogg_io_read(void *user_handle, void *buf, size_t n) {
LOGD("ogg_io_read: %d", (int)n);
size_t result = 0;
OggContainerDecoder *self = (OggContainerDecoder *)user_handle;
if (self->buffer.available() >= n) {
OggContainerDecoder *self = (OggContainerDecoder *)user_handle;
result = self->buffer.readArray((uint8_t *)buf, n);
self->pos += result;
} else {
result = 0;
}
return result;
}
// Process full packet
static int read_packet(OGGZ *oggz, oggz_packet *zp, long serialno,
void *user_data) {
LOGD("read_packet: %d", (int)zp->op.bytes);
OggContainerDecoder *self = (OggContainerDecoder *)user_data;
ogg_packet *op = &zp->op;
int result = op->bytes;
if (op->b_o_s) {
self->beginOfSegment(op);
} else if (op->e_o_s) {
self->endOfSegment(op);
} else {
if (memcmp(op->packet, "OpusTags", 8) == 0) {
self->beginOfSegment(op);
} else {
LOGD("process audio packet");
int eff = self->out.write(op->packet, op->bytes);
if (eff != result) {
LOGE("Incomplere write");
}
}
}
// 0 = success
return 0;
}
static int read_page(OGGZ *oggz, const ogg_page *og, long serialno,
void *user_data) {
LOGD("read_page: %d", (int)og->body_len);
// 0 = success
return 0;
}
virtual void beginOfSegment(ogg_packet *op) {
LOGD("bos");
if (op->bytes == sizeof(AudioInfo)) {
AudioInfo cfg(*(AudioInfo*)op->packet);
cfg.logInfo();
if (cfg.bits_per_sample == 16 || cfg.bits_per_sample == 24 ||
cfg.bits_per_sample == 32) {
setAudioInfo(cfg);
} else {
LOGE("Invalid AudioInfo")
}
} else {
LOGE("Invalid Header")
}
}
virtual void endOfSegment(ogg_packet *op) {
// end segment not supported
LOGW("e_o_s");
}
};
/**
* @brief Output class for the OggContainerEncoder. Each
* write is ending up as container entry
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OggContainerOutput : public AudioOutput {
public:
// Empty Constructor - the output stream must be provided with begin()
OggContainerOutput() = default;
/// Defines the output Stream
void setOutput(Print &print) { p_out = &print; }
/// starts the processing using the actual AudioInfo
virtual bool begin() override {
TRACED();
assert(cfg.channels != 0);
assert(cfg.sample_rate != 0);
is_open = true;
if (p_oggz == nullptr) {
p_oggz = oggz_new(OGGZ_WRITE | OGGZ_NONSTRICT | OGGZ_AUTO);
serialno = oggz_serialno_new(p_oggz);
oggz_io_set_write(p_oggz, ogg_io_write, this);
packetno = 0;
granulepos = 0;
if (!writeHeader()) {
is_open = false;
LOGE("writeHeader");
}
}
return is_open;
}
/// stops the processing
void end() override {
TRACED();
writeFooter();
is_open = false;
oggz_close(p_oggz);
p_oggz = nullptr;
}
/// Writes raw data to be encoded and packaged
virtual size_t write(const uint8_t *data, size_t len) override {
if (data == nullptr) return 0;
LOGD("OggContainerOutput::write: %d", (int)len);
assert(cfg.channels != 0);
// encode the data
op.packet = (uint8_t *)data;
op.bytes = len;
if (op.bytes > 0) {
int bytes_per_sample = cfg.bits_per_sample / 8;
granulepos += op.bytes / bytes_per_sample; // sample
op.granulepos = granulepos;
op.b_o_s = false;
op.e_o_s = false;
op.packetno = packetno++;
is_audio = true;
if (!writePacket(op, OGGZ_FLUSH_AFTER)) {
return 0;
}
}
// trigger pysical write
while ((oggz_write(p_oggz, len)) > 0)
;
return len;
}
bool isOpen() { return is_open; }
protected:
Print *p_out = nullptr;
bool is_open = false;
OGGZ *p_oggz = nullptr;
ogg_packet op;
ogg_packet oh;
size_t granulepos = 0;
size_t packetno = 0;
long serialno = -1;
bool is_audio = false;
virtual bool writePacket(ogg_packet &op, int flag = 0) {
LOGD("writePacket: %d", (int)op.bytes);
long result = oggz_write_feed(p_oggz, &op, serialno, flag, NULL);
if (result < 0 && result != OGGZ_ERR_OUT_OF_MEMORY) {
LOGE("oggz_write_feed: %d", (int)result);
return false;
}
return true;
}
virtual bool writeHeader() {
TRACED();
oh.packet = (uint8_t *)&cfg;
oh.bytes = sizeof(AudioInfo);
oh.granulepos = 0;
oh.packetno = packetno++;
oh.b_o_s = true;
oh.e_o_s = false;
is_audio = false;
return writePacket(oh);
}
virtual bool writeFooter() {
TRACED();
op.packet = (uint8_t *)nullptr;
op.bytes = 0;
op.granulepos = granulepos;
op.packetno = packetno++;
op.b_o_s = false;
op.e_o_s = true;
is_audio = false;
return writePacket(op, OGGZ_FLUSH_AFTER);
}
// Final Stream Callback
static size_t ogg_io_write(void *user_handle, void *buf, size_t n) {
LOGD("ogg_io_write: %d", (int)n);
OggContainerOutput *self = (OggContainerOutput *)user_handle;
if (self == nullptr) {
LOGE("self is null");
return 0;
}
// self->out.write((uint8_t *)buf, n);
writeData<uint8_t>(self->p_out, (uint8_t *)buf, n);
// 0 = continue
return 0;
}
};
/**
* @brief Encoder for Ogg Container. Encodes a packet for an Ogg
* container. The Ogg begin segment contains the AudioInfo structure. You can
* subclass ond overwrite the writeHeader() method to implement your own header
* logic. When an optional encoder is specified in the constructor we package
* the encoded data.
* Dependency: https://github.com/pschatzmann/arduino-libopus
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class OggContainerEncoder : public AudioEncoder {
public:
// Empty Constructor - the output stream must be provided with begin()
OggContainerEncoder() = default;
OggContainerEncoder(AudioEncoder *encoder) { setEncoder(encoder); }
OggContainerEncoder(AudioEncoder &encoder) { setEncoder(&encoder); }
/// Defines the output Stream
void setOutput(Print &print) override { p_ogg->setOutput(print); }
/// Provides "audio/pcm"
const char *mime() override { return mime_pcm; }
/// We actually do nothing with this
virtual void setAudioInfo(AudioInfo info) override {
AudioEncoder::setAudioInfo(info);
p_ogg->setAudioInfo(info);
if (p_codec != nullptr) p_codec->setAudioInfo(info);
}
virtual bool begin(AudioInfo from) override {
setAudioInfo(from);
return begin();
}
/// starts the processing using the actual AudioInfo
virtual bool begin() override {
TRACED();
p_ogg->begin();
if (p_codec==nullptr) return false;
p_codec->setOutput(*p_ogg);
return p_codec->begin(p_ogg->audioInfo());
}
/// stops the processing
void end() override {
TRACED();
if (p_codec != nullptr) p_codec->end();
p_ogg->end();
}
/// Writes raw data to be encoded and packaged
virtual size_t write(const uint8_t *data, size_t len) override {
if (!p_ogg->isOpen() || data == nullptr) return 0;
LOGD("OggContainerEncoder::write: %d", (int)len);
size_t result = 0;
if (p_codec == nullptr) {
result = p_ogg->write((const uint8_t *)data, len);
} else {
result = p_codec->write(data, len);
}
return result;
}
operator bool() override { return p_ogg->isOpen(); }
bool isOpen() { return p_ogg->isOpen(); }
protected:
AudioEncoder *p_codec = nullptr;
OggContainerOutput ogg;
OggContainerOutput *p_ogg = &ogg;
void setEncoder(AudioEncoder *enc) { p_codec = enc; }
/// Replace the ogg output class
void setOggOutput(OggContainerOutput *out) { p_ogg = out; }
};
} // namespace audio_tools

View File

@@ -0,0 +1,51 @@
#pragma once
#include "AudioTools/AudioCodecs/CodecADTS.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
namespace audio_tools {
/**
* @brief AAC header parser to check if the data is a valid ADTS aac which
* can extract some relevant audio information.
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class HeaderParserAAC {
public:
/// parses the header string and returns true if this is a valid aac adts
/// stream
bool isValid(const uint8_t* data, int len) {
if (len < 7) return false;
parser.begin();
// regular validation
if (!parser.parse((uint8_t*)data)) return false;
// check if we have a valid 2nd frame
if (len > getFrameLength()) {
int pos = findSyncWord(data, len, getFrameLength());
if (pos == -1) return false;
}
return true;
}
int getSampleRate() { return parser.getSampleRate(); }
uint8_t getChannels() { return parser.data().channel_cfg; }
/// Determines the frame length
int getFrameLength() { return parser.getFrameLength(); }
/// Finds the mp3/aac sync word
int findSyncWord(const uint8_t* buf, int nBytes, int start = 0) {
return parser.findSyncWord(buf, nBytes, start);
}
ADTSParser::ADTSHeader getHeader() { return parser.data(); }
protected:
ADTSParser parser;
};
} // namespace audio_tools

View File

@@ -0,0 +1,722 @@
#pragma once
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
namespace audio_tools {
/**
* @brief MP3 header parser that processes MP3 data incrementally and
* extracts complete MP3 frames. Can validate MP3 data and extract audio
* information. When used with a Print output, it splits incoming data into
* complete MP3 frames and writes them to the output stream.
*
* Features:
* - Incremental processing of MP3 data in small chunks
* - Frame synchronization and validation
* - Extraction of audio information (sample rate, bit rate, etc.)
* - Output of complete MP3 frames only
* - Support for all MPEG versions (1, 2, 2.5) and layers
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class HeaderParserMP3 {
/// @brief MPEG audio frame header fields parsed from 4 serialized bytes
struct FrameHeader {
static const unsigned int SERIALIZED_SIZE = 4;
enum class MPEGVersionID : unsigned {
MPEG_2_5 = 0b00,
INVALID = 0b01, // reserved
MPEG_2 = 0b10,
MPEG_1 = 0b11,
};
enum class LayerID : unsigned {
INVALID = 0b00, // reserved
LAYER_3 = 0b01,
LAYER_2 = 0b10,
LAYER_1 = 0b11,
};
enum class ChannelModeID : unsigned {
STEREO = 0b00,
JOINT = 0b01, // joint stereo
DUAL = 0b10, // dual channel (2 mono channels)
SINGLE = 0b11, // single channel (mono)
};
enum class EmphasisID : unsigned {
NONE = 0b00,
MS_50_15 = 0b01,
INVALID = 0b10,
CCIT_J17 = 0b11,
};
enum SpecialBitrate { INVALID_BITRATE = -8000, ANY = 0 };
enum SpecialSampleRate { RESERVED = 0 };
// Parsed fields
MPEGVersionID audioVersion = MPEGVersionID::INVALID;
LayerID layer = LayerID::INVALID;
bool protection = false;
uint8_t bitrateIndex = 0; // 0..15
uint8_t sampleRateIndex = 0; // 0..3
bool padding = false;
bool isPrivate = false;
ChannelModeID channelMode = ChannelModeID::STEREO;
uint8_t extensionMode = 0; // 0..3
bool copyright = false;
bool original = false;
EmphasisID emphasis = EmphasisID::NONE;
// Decode 4 bytes into the fields above. Returns false if sync invalid.
static bool decode(const uint8_t* b, FrameHeader& out) {
if (b == nullptr) return false;
if (!(b[0] == 0xFF && (b[1] & 0xE0) == 0xE0))
return false; // 11-bit sync
uint8_t b1 = b[1];
uint8_t b2 = b[2];
uint8_t b3 = b[3];
out.audioVersion = static_cast<MPEGVersionID>((b1 >> 3) & 0x03);
out.layer = static_cast<LayerID>((b1 >> 1) & 0x03);
out.protection = !(b1 & 0x01); // 0 means protected (CRC present)
out.bitrateIndex = (b2 >> 4) & 0x0F;
out.sampleRateIndex = (b2 >> 2) & 0x03;
out.padding = (b2 >> 1) & 0x01;
out.isPrivate = (b2 & 0x01) != 0;
out.channelMode = static_cast<ChannelModeID>((b3 >> 6) & 0x03);
out.extensionMode = (b3 >> 4) & 0x03;
out.copyright = (b3 >> 3) & 0x01;
out.original = (b3 >> 2) & 0x01;
out.emphasis = static_cast<EmphasisID>(b3 & 0x03);
return true;
}
signed int getBitRate() const {
// version, layer, bit index
static const signed char rateTable[4][4][16] = {
// version[00] = MPEG_2_5
{
// layer[00] = INVALID
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// layer[01] = LAYER_3
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
// layer[10] = LAYER_2
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
// layer[11] = LAYER_1
{0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
},
// version[01] = INVALID
{
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
},
// version[10] = MPEG_2
{
// layer[00] = INVALID
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// layer[01] = LAYER_3
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
// layer[10] = LAYER_2
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
// layer[11] = LAYER_1
{0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
},
// version[11] = MPEG_1
{
// layer[00] = INVALID
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
// layer[01] = LAYER_3
{0, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, -1},
// layer[10] = LAYER_2
{0, 4, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, -1},
// layer[11] = LAYER_1
{0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, -1},
},
};
signed char rate_byte =
rateTable[(int)audioVersion][(int)layer][(int)bitrateIndex];
if (rate_byte == -1) {
LOGE("Unsupported bitrate");
return 0;
}
return rate_byte * 8000;
}
unsigned short getSampleRate() const {
// version, sample rate index
static const unsigned short rateTable[4][4] = {
// version[00] = MPEG_2_5
{11025, 12000, 8000, 0},
// version[01] = INVALID
{0, 0, 0, 0},
// version[10] = MPEG_2
{22050, 24000, 16000, 0},
// version[11] = MPEG_1
{44100, 48000, 32000, 0},
};
return rateTable[(int)audioVersion][(int)sampleRateIndex];
}
int getFrameLength() const {
int sample_rate = getSampleRate();
if (sample_rate == 0) return 0;
int value =
(audioVersion == FrameHeader::MPEGVersionID::MPEG_1) ? 144 : 72;
return int((value * getBitRate() / sample_rate) + (padding ? 1 : 0));
}
};
public:
/// Default constructor
HeaderParserMP3() = default;
/// Constructor for write support
HeaderParserMP3(Print& output, int bufferSize = 2048)
: p_output(&output), buffer_size(bufferSize) {}
void setOutput(Print& output) { p_output = &output; }
void resize(int size) { buffer_size = size; }
/// split up the data into mp3 segements and write to output
size_t write(const uint8_t* data, size_t len) {
if (buffer.size() < buffer_size) buffer.resize(buffer_size);
for (int i = 0; i < len; i++) {
buffer.write(data[i]);
if (buffer.isFull()) {
while (processBuffer());
}
}
return len;
}
void flush() {
if (p_output == nullptr) return;
while (processBuffer());
}
/// Returns true if a valid frame has been detected
bool isValid() { return last_frame_size > 0; }
/// parses the header string and returns true if this is a valid mp3 file
bool isValid(const uint8_t* data, int len) {
if (data == nullptr || len < 10) {
LOGE("Invalid input data or too small");
return false;
}
header = FrameHeader{};
int valid_frames_found = 0;
int consecutive_frames = 0;
const int MIN_FRAMES_TO_VALIDATE =
3; // Require at least 3 consecutive valid frames
const int MAX_SEARCH_DISTANCE =
8192; // Limit search to prevent endless loops
// Check for ID3v2 tag at beginning
if (len >= 10 && memcmp(data, "ID3", 3) == 0) {
LOGI("ID3v2 tag found");
// Skip ID3v2 tag to find actual audio data
int id3_size = ((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) |
((data[8] & 0x7F) << 7) | (data[9] & 0x7F);
int audio_start = 10 + id3_size;
if (audio_start < len) {
return isValid(data + audio_start, len - audio_start);
}
return true; // Valid ID3 tag, assume MP3
}
// Look for first frame sync
int sync_pos = seekFrameSync(data, min(len, MAX_SEARCH_DISTANCE));
if (sync_pos == -1) {
LOGE("No frame sync found in first %d bytes", MAX_SEARCH_DISTANCE);
return false;
}
// Quick check for VBR headers (Xing/Info/VBRI)
if (contains(data + sync_pos, "Xing", len - sync_pos) ||
contains(data + sync_pos, "Info", len - sync_pos) ||
contains(data + sync_pos, "VBRI", len - sync_pos)) {
LOGI("VBR header found (Xing/Info/VBRI)");
return true;
}
// Validate multiple consecutive frames for higher confidence
int current_pos = sync_pos;
FrameHeader first_header;
bool first_header_set = false;
while (current_pos < len &&
(current_pos - sync_pos) < MAX_SEARCH_DISTANCE) {
int len_available = len - current_pos;
// Need at least header size
if (len_available < (int)FrameHeader::SERIALIZED_SIZE) {
LOGD("Not enough data for header at position %d", current_pos);
break;
}
// Read and validate frame header
FrameHeader temp_header;
if (!FrameHeader::decode(data + current_pos, temp_header) ||
validateFrameHeader(temp_header) != FrameReason::VALID) {
LOGD("Invalid frame header at position %d", current_pos);
consecutive_frames = 0;
// Look for next sync
int next_sync_off =
seekFrameSync(data + current_pos + 1, len - current_pos - 1);
if (next_sync_off == -1) break;
current_pos = current_pos + 1 + next_sync_off; // Adjust for offset
continue;
}
// Calculate frame length
int frame_len = temp_header.getFrameLength();
if (frame_len <= 0 || frame_len > 4096) {
LOGD("Invalid frame length %d at position %d", frame_len, current_pos);
consecutive_frames = 0;
current_pos++;
continue;
}
// For first valid frame, store header for consistency checking
if (!first_header_set) {
first_header = temp_header;
first_header_set = true;
header = temp_header; // Store for external access
// For small buffers, do additional single-frame validation
if (len < 1024) {
// Verify this looks like a reasonable MP3 frame
if (temp_header.getSampleRate() == 0 ||
temp_header.getBitRate() <= 0) {
LOGD("Invalid audio parameters in frame at position %d",
current_pos);
first_header_set = false;
consecutive_frames = 0;
current_pos++;
continue;
}
// Check if frame length is reasonable for the given bitrate
int expected_frame_size =
(temp_header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1)
? (144 * temp_header.getBitRate() /
temp_header.getSampleRate())
: (72 * temp_header.getBitRate() /
temp_header.getSampleRate());
if (abs(frame_len - expected_frame_size) >
expected_frame_size * 0.1) { // Allow 10% variance
LOGD("Frame length %d doesn't match expected %d for bitrate",
frame_len, expected_frame_size);
first_header_set = false;
consecutive_frames = 0;
current_pos++;
continue;
}
}
} else {
// Check consistency with first frame (sample rate, version, layer
// should match in CBR)
if (temp_header.audioVersion != first_header.audioVersion ||
temp_header.layer != first_header.layer ||
temp_header.getSampleRate() != first_header.getSampleRate()) {
LOGD("Frame parameters inconsistent at position %d", current_pos);
// This might be VBR, but continue validation
}
}
valid_frames_found++;
consecutive_frames++;
// Check if we have enough data for the complete frame
if (len_available < frame_len) {
LOGD("Incomplete frame at position %d (need %d, have %d)", current_pos,
frame_len, len_available);
break;
}
// Look for next frame sync at expected position
int next_pos = current_pos + frame_len;
if (next_pos + 1 < len) {
if (seekFrameSync(data + next_pos, min(4, len - next_pos)) == 0) {
// Found sync at expected position
current_pos = next_pos;
continue;
} else {
LOGD("No sync at expected position %d", next_pos);
consecutive_frames = 0;
}
} else {
// End of data reached
break;
}
// If we lost sync, search for next frame
int next_sync =
seekFrameSync(data + current_pos + 1, len - current_pos - 1);
if (next_sync == -1) break;
current_pos = current_pos + 1 + next_sync;
}
// Adaptive validation criteria based on available data
bool is_valid_mp3 = false;
if (len >= 2048) {
// For larger buffers, require strict consecutive frame validation
is_valid_mp3 = (consecutive_frames >= MIN_FRAMES_TO_VALIDATE);
} else if (len >= 1024) {
// For 1KB+ buffers, require at least 2 consecutive frames OR 3 total
// valid frames
is_valid_mp3 = (consecutive_frames >= 2) ||
(valid_frames_found >= MIN_FRAMES_TO_VALIDATE);
} else {
// For smaller buffers, be more lenient - 1 good frame with proper
// validation
is_valid_mp3 = (valid_frames_found >= 1) && first_header_set;
}
if (is_valid_mp3 && first_header_set) {
LOGI("-------------------");
LOGI("MP3 validation: VALID");
LOGI("Data size: %d bytes", len);
LOGI("Valid frames found: %d", valid_frames_found);
LOGI("Consecutive frames: %d", consecutive_frames);
if (len >= 2048) {
LOGI("Validation mode: STRICT (large buffer)");
} else if (len >= 1024) {
LOGI("Validation mode: MODERATE (1KB+ buffer)");
} else {
LOGI("Validation mode: LENIENT (small buffer)");
}
LOGI("Frame size: %d", getFrameLength());
LOGI("Sample rate: %u", getSampleRate());
LOGI("Bit rate: %d", getBitRate());
LOGI("Padding: %d", getFrameHeader().padding);
LOGI("Layer: %s (0x%x)", getLayerStr(), (int)getFrameHeader().layer);
LOGI("Version: %s (0x%x)", getVersionStr(),
(int)getFrameHeader().audioVersion);
LOGI("-------------------");
} else {
LOGI("MP3 validation: INVALID (frames: %d, consecutive: %d, size: %d)",
valid_frames_found, consecutive_frames, len);
}
return is_valid_mp3;
}
/// Sample rate from mp3 header
uint16_t getSampleRate() const {
return frame_header_valid ? header.getSampleRate() : 0;
}
/// Bit rate from mp3 header
int getBitRate() const {
return frame_header_valid ? header.getBitRate() : 0;
}
/// Number of channels from mp3 header
int getChannels() const {
if (!frame_header_valid) return 0;
// SINGLE = mono (1 channel), all others = stereo (2 channels)
return (header.channelMode == FrameHeader::ChannelModeID::SINGLE) ? 1 : 2;
}
/// Frame length from mp3 header
int getFrameLength() {
return frame_header_valid ? header.getFrameLength() : 0;
}
/// Provides the estimated playing time in seconds based on the bitrate of the
/// first segment
size_t getPlayingTime(size_t fileSizeBytes) {
int bitrate = getBitRate();
if (bitrate == 0) return 0;
return fileSizeBytes / bitrate;
}
/// Provides a string representation of the MPEG version
const char* getVersionStr() const {
return header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1 ? "1"
: header.audioVersion == FrameHeader::MPEGVersionID::MPEG_2 ? "2"
: header.audioVersion == FrameHeader::MPEGVersionID::MPEG_2_5
? "2.5"
: "INVALID";
}
/// Provides a string representation of the MPEG layer
const char* getLayerStr() const {
return header.layer == FrameHeader::LayerID::LAYER_1 ? "1"
: header.layer == FrameHeader::LayerID::LAYER_2 ? "2"
: header.layer == FrameHeader::LayerID::LAYER_3 ? "3"
: "INVALID";
}
/// number of samples per mp3 frame
int getSamplesPerFrame() {
if (header.layer != FrameHeader::LayerID::LAYER_3) return 0;
// samples for layer 3 are fixed
return header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1 ? 1152
: 576;
}
/// playing time per frame in ms
size_t getTimePerFrameMs() {
int sample_rate = getSampleRate();
if (sample_rate == 0) return 0;
return (1000 * getSamplesPerFrame()) / sample_rate;
}
/// frame rate in Hz (frames per second)
size_t getFrameRateHz() {
int time_per_frame = getTimePerFrameMs();
if (time_per_frame == 0) return 0;
return 1000 / time_per_frame;
}
// provides the parsed MP3 frame header
FrameHeader getFrameHeader() {
return frame_header_valid ? header : FrameHeader{};
}
/// Returns true if we have parsed at least one valid frame
bool hasValidFrame() const { return frame_header_valid; }
/// Clears internal buffer and resets state
void reset() {
buffer.reset();
frame_header_valid = false;
header = FrameHeader{};
}
/// Finds the mp3/aac sync word
int findSyncWord(const uint8_t* buf, size_t nBytes, uint8_t synch = 0xFF,
uint8_t syncl = 0xF0) {
for (int i = 0; i < nBytes - 1; i++) {
if ((buf[i + 0] & synch) == synch && (buf[i + 1] & syncl) == syncl)
return i;
}
return -1;
}
protected:
FrameHeader header;
Print* p_output = nullptr;
SingleBuffer<uint8_t> buffer{0}; // Max MP3 frame ~4KB + reserves
bool frame_header_valid = false;
size_t buffer_size = 0;
size_t last_frame_size = 0;
/// Processes the internal buffer to extract complete mp3 frames
bool processBuffer() {
bool progress = false;
size_t available = buffer.available();
while (available >=
FrameHeader::SERIALIZED_SIZE) { // Need 4 bytes for header
// Get direct access to buffer data
uint8_t* temp_data = buffer.data();
// Find frame sync
int sync_pos = seekFrameSync(temp_data, available);
if (sync_pos == -1) {
// No sync found, keep last few bytes in case sync spans buffer boundary
size_t to_remove = (available > 3) ? available - 3 : 0;
if (to_remove > 0) {
buffer.clearArray(to_remove);
}
// Recompute available after mutation
available = buffer.available();
break;
}
// Remove any data before sync
if (sync_pos > 0) {
buffer.clearArray(sync_pos);
progress = true;
// Recompute available after mutation
available = buffer.available();
continue; // Check again from new position
}
// We have sync at position 0, try to read header
if (available < FrameHeader::SERIALIZED_SIZE) {
break; // Need more data for complete header
}
// Read and validate frame header
FrameHeader temp_header;
if (!FrameHeader::decode(temp_data, temp_header) ||
validateFrameHeader(temp_header) != FrameReason::VALID) {
// Invalid header, skip this sync and look for next
buffer.clearArray(1);
progress = true;
available = buffer.available();
continue;
}
// Calculate frame length
int frame_len = temp_header.getFrameLength();
if (frame_len <= 0 ||
frame_len > buffer_size) { // Sanity check on frame size
// Invalid frame length, skip this sync
buffer.clearArray(1);
progress = true;
available = buffer.available();
continue;
}
// Check if we have complete frame
if (available < frame_len) {
break; // Need more data for complete frame
}
// Verify next frame sync if we have enough data
if (available >= frame_len + 2) {
if (seekFrameSync(temp_data + frame_len, 2) != 0) {
// No sync at expected position, this might not be a valid frame
buffer.clearArray(1);
progress = true;
available = buffer.available();
continue;
}
}
// We have a complete valid frame, write it to output
if (p_output != nullptr) {
size_t written = p_output->write(temp_data, frame_len);
if (written != frame_len) {
// Output error, we still need to remove the frame from buffer
LOGE("Failed to write complete frame");
}
}
// Update header for external access
last_frame_size = frame_len;
header = temp_header;
frame_header_valid = true;
// Remove processed frame from buffer
buffer.clearArray(frame_len);
available = buffer.available();
progress = true;
}
return progress;
}
bool validate(const uint8_t* data, size_t len) {
(void)data;
(void)len;
return FrameReason::VALID == validateFrameHeader(header);
}
bool contains(const uint8_t* data, const char* toFind, size_t len) {
if (data == nullptr || len == 0) return false;
int find_str_len = strlen(toFind);
for (int j = 0; j < len - find_str_len; j++) {
if (memcmp(data + j, toFind, find_str_len) == 0) return true;
}
return false;
}
// Seeks to the byte at the end of the next continuous run of 11 set bits.
//(ie. after seeking the cursor will be on the byte of which its 3 most
// significant bits are part of the frame sync)
int seekFrameSync(const uint8_t* str, size_t len) {
for (int j = 0; j < static_cast<int>(len) - 1; j++) {
// Look for 11-bit sync: 0xFFE? (0xFF followed by next byte with 0xE0 set)
if (str[j] == 0xFF && (str[j + 1] & 0xE0) == 0xE0) {
return j;
}
}
return -1;
}
void readFrameHeader(const uint8_t* data) {
if (!FrameHeader::decode(data, header)) return;
LOGI("- sample rate: %u", getSampleRate());
LOGI("- bit rate: %d", getBitRate());
}
enum class FrameReason {
VALID,
INVALID_BITRATE_FOR_VERSION,
INVALID_SAMPLERATE_FOR_VERSION,
INVALID_MPEG_VERSION,
INVALID_LAYER,
INVALID_LAYER_II_BITRATE_AND_MODE,
INVALID_EMPHASIS,
INVALID_CRC,
};
FrameReason validateFrameHeader(const FrameHeader& header) {
if (header.audioVersion == FrameHeader::MPEGVersionID::INVALID) {
LOGI("invalid mpeg version");
return FrameReason::INVALID_MPEG_VERSION;
}
if (header.layer == FrameHeader::LayerID::INVALID) {
LOGI("invalid layer");
return FrameReason::INVALID_LAYER;
}
if (header.getBitRate() <= 0) {
LOGI("invalid bitrate");
return FrameReason::INVALID_BITRATE_FOR_VERSION;
}
if (header.getSampleRate() ==
(unsigned short)FrameHeader::SpecialSampleRate::RESERVED) {
LOGI("invalid samplerate");
return FrameReason::INVALID_SAMPLERATE_FOR_VERSION;
}
// For Layer II there are some combinations of bitrate and mode which are
// not allowed
if (header.layer == FrameHeader::LayerID::LAYER_2) {
if (header.channelMode == FrameHeader::ChannelModeID::SINGLE) {
if (header.getBitRate() >= 224000) {
LOGI("invalid bitrate >224000");
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
}
} else {
if (header.getBitRate() >= 32000 && header.getBitRate() <= 56000) {
LOGI("invalid bitrate >32000");
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
}
if (header.getBitRate() == 80000) {
LOGI("invalid bitrate >80000");
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
}
}
}
if (header.emphasis == FrameHeader::EmphasisID::INVALID) {
LOGI("invalid Emphasis");
return FrameReason::INVALID_EMPHASIS;
}
return FrameReason::VALID;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,131 @@
#pragma once
#include "AudioTools/AudioCodecs/M4ACommonDemuxer.h"
namespace audio_tools {
/**
* @brief A simple M4A audio data demuxer which is providing
* AAC, MP3 and ALAC frames.
*/
class M4AAudioDemuxer : public M4ACommonDemuxer {
public:
/**
* @brief Constructor. Sets up parser callbacks.
*/
M4AAudioDemuxer() { setupParser(); }
/**
* @brief Defines the callback that returns the audio frames.
* @param cb Frame callback function.
*/
void setCallback(FrameCallback cb) override {
sampleExtractor.setReference(ref);
sampleExtractor.setCallback(cb);
}
/**
* @brief Initializes the demuxer and resets state.
*/
bool begin() {
audio_config.codec = Codec::Unknown;
audio_config.alacMagicCookie.clear();
resize(default_size);
stsz_processed = false;
stco_processed = false;
// When codec/sampleSizes/callback/ref change, update the extractor:
parser.begin();
sampleExtractor.begin();
return true;
}
/**
* @brief Writes data to the demuxer for parsing.
* @param data Pointer to input data.
* @param len Length of input data.
*/
void write(const uint8_t* data, size_t len) { parser.write(data, len); }
/**
* @brief Returns the available space for writing.
* @return Number of bytes available for writing.
*/
int availableForWrite() { return parser.availableForWrite(); }
/**
* @brief Returns the ALAC magic cookie (codec config).
* @return Reference to the ALAC magic cookie vector.
*/
Vector<uint8_t>& getALACMagicCookie() { return audio_config.alacMagicCookie; }
/**
* @brief Sets a reference pointer for callbacks.
* @param ref Reference pointer.
*/
void setReference(void* ref) { this->ref = ref; }
void copyFrom(M4ACommonDemuxer& source) {
audio_config = source.getM4AAudioConfig();
}
protected:
void* ref = nullptr; ///< Reference pointer for callbacks.
/**
* @brief Setup all parser callbacks
*/
void setupParser() override {
// global box data callback to get sizes
parser.setReference(this);
// parsing for content of stsd (Sample Description Box)
parser.setCallback("stsd", [](MP4Parser::Box& box, void* ref) {
static_cast<M4AAudioDemuxer*>(ref)->onStsd(box);
});
// parsing for content of stsd (Sample Description Box)
parser.setCallback("esds", [](MP4Parser::Box& box, void* ref) {
static_cast<M4AAudioDemuxer*>(ref)->onEsds(box);
});
parser.setCallback("mp4a", [](MP4Parser::Box& box, void* ref) {
static_cast<M4AAudioDemuxer*>(ref)->onMp4a(box);
});
parser.setCallback("alac", [](MP4Parser::Box& box, void* ref) {
static_cast<M4AAudioDemuxer*>(ref)->onAlac(box);
});
// mdat
parser.setCallback(
"mdat",
[](MP4Parser::Box& box, void* ref) {
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
// mdat must not be buffered
LOGI("#%d Box: %s, size: %u of %u bytes", (unsigned) box.seq, box.type,(unsigned) box.available, (unsigned)box.size);
if (box.seq == 0) self.sampleExtractor.setMaxSize(box.size);
size_t written = self.sampleExtractor.write(box.data, box.available, box.is_complete);
assert(written == box.available);
},
false); // 'false' prevents the generic callback from being executed
// stsz
parser.setCallback(
"stsz",
[](MP4Parser::Box& box, void* ref) {
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
self.onStsz(box);
},
false); // 'false' prevents the generic callback from being executed
// parser.setCallback(
// "stco",
// [](MP4Parser::Box& box, void* ref) {
// M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
// self.onStco(box);
// },
// false); // 'false' prevents the generic callback from being executed
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,340 @@
#pragma once
#include <Arduino.h>
#include <SD.h>
#include "M4AAudioDemuxer.h"
namespace audio_tools {
/**
* @brief Demuxer for M4A/MP4 files to extract audio data using an Arduino File.
* This class locates the mdat and stsz boxes using MP4Parser.
*
* It provides a copy() method to extract frames from the file by reading
* sample sizes directly from the stsz box in the file. This class is quite
* memory efficient because no table of sample sizes are kept in memory. It just
* reads the sample sizes from the stsz box and uses the mdat offset to read the
* sample data directly from the file.
*
* The result is written to the provided decoder or alternatively will be
* provided via the frame_callback.
*
* @author Phil Schatzmann
*/
class M4AAudioFileDemuxer : public M4ACommonDemuxer {
public:
using M4ACommonDemuxer::Frame;
using M4ACommonDemuxer::FrameCallback;
/**
* @brief Default constructor. Sets up parser callbacks.
*/
M4AAudioFileDemuxer() { setupParser(); };
/**
* @brief Constructor with decoder.
* @param decoder Reference to MultiDecoder.
*/
M4AAudioFileDemuxer(MultiDecoder& decoder) : M4AAudioFileDemuxer() {
setDecoder(decoder);
}
/**
* @brief Sets the decoder to use for audio frames. Please note that
* calls setCallback() to register the decoder callback.
* @param decoder Reference to MultiDecoder.
* @return true if set successfully.
*/
bool setDecoder(MultiDecoder& decoder) {
this->p_decoder = &decoder;
if (decoder.getOutput() == nullptr) {
LOGE("No output defined for MultiDecoder");
return false;
}
setCallback([&decoder](const Frame& frame, void* /*ref*/) {
LOGI("Decoding frame: %s with %d bytes", frame.mime, (int)frame.size);
if (!decoder.selectDecoder(frame.mime)) {
LOGE("Failed to select decoder for %s", frame.mime);
return;
}
decoder.write(frame.data, frame.size);
});
return true;
}
/**
* @brief Sets the callback for extracted audio frames.
* @param cb Frame callback function.
*/
void setCallback(FrameCallback cb) override { frame_callback = cb; }
/**
* @brief Sets the size of the samples buffer (in bytes).
* @param size Buffer size in bytes.
*/
void setSamplesBufferSize(int size) {
stsz_bufsize = size / 4;
stsz_buf.resize(stsz_bufsize);
}
/**
* @brief Open and parse the given file.
* @param file Reference to an open Arduino File object.
* @return true on success, false on failure.
*/
bool begin(File& file) {
M4ACommonDemuxer::begin();
this->p_file = &file;
if (!file) return false;
parser.begin();
end();
if (p_decoder) p_decoder->begin();
if (!parseFile()) return false;
if (!readStszHeader()) return false;
if (!checkMdat()) return false;
mdat_sample_pos = mdat_offset + mdat_pos;
return true;
}
/**
* @brief End demuxing and reset state.
*/
void end() {
audio_config.codec = M4ACommonDemuxer::Codec::Unknown;
audio_config.alacMagicCookie.clear();
// resize(default_size);
sample_index = 0;
sample_count = 0;
mdat_pos = 0;
stsd_processed = false;
mdat_offset = 0;
mdat_size = 0;
stsz_offset = 0;
stsz_size = 0;
mdat_pos = 0;
fixed_sample_size = 0;
}
/**
* @brief Copies the next audio frame from the file using the sample size
* table and mdat offset. Calls the frame callback if set.
* @return true if a frame was copied and callback called, false if end of
* samples or error.
*/
bool copy() {
if (!p_file || sample_index >= sample_count) return false;
size_t currentSize = getNextSampleSize();
if (currentSize == 0) return false;
if (!p_file->seek(mdat_sample_pos)) return false;
if (buffer.size() < currentSize) buffer.resize(currentSize);
size_t bytesRead = p_file->read(buffer.data(), currentSize);
if (bytesRead != currentSize) return false;
buffer.setWritePos(bytesRead);
executeCallback(currentSize, buffer);
mdat_sample_pos += currentSize;
return true;
}
/// Returns true as long as there are samples to process.
operator bool() { return sample_count > 0 && sample_index < sample_count; }
uint32_t sampleIndex() const { return sample_index; }
uint32_t size() const { return sample_count; }
uint32_t getMdatOffset() const { return mdat_offset; }
/**
* @brief Provides the next sample size (= frame size) from the stsz box queue
* @return stsz sample size in bytes.
*/
uint32_t getNextSampleSize() {
assert(p_file != nullptr);
if (sample_index >= sample_count) return 0;
uint32_t currentSize = 0;
if (fixed_sample_size) {
currentSize = fixed_sample_size;
} else {
// if buffer is empty, fill it again
if (stsz_buf.isEmpty()) {
uint64_t pos = stsz_offset + 20 + sample_index * 4;
if (!p_file->seek(pos)) return false;
stsz_buf.clear();
size_t read_bytes = p_file->read(
reinterpret_cast<uint8_t*>(stsz_buf.data()), stsz_bufsize * 4);
stsz_buf.setWritePos(read_bytes / 4);
if (stsz_buf.isEmpty()) return 0;
}
// provide next size
uint32_t val = 0;
if (!stsz_buf.read(val)) return 0;
currentSize = readU32(val);
}
sample_index++;
return currentSize;
}
/**
* @brief Initializes the demuxer for reading sample sizes from the stsz box.
*
* This method sets the file pointer, resets the sample index, sets the total
* sample count, and records the offset of the stsz box in the file. It is
* typically called before reading sample sizes directly from the file,
* ensuring the demuxer is properly positioned.
*
* @param filePtr Pointer to the open file.
* @param sampleCount Total number of samples in the file.
* @param stszOffset Offset of the stsz box in the file.
*/
void beginSampleSizeAccess(File* filePtr, uint32_t sampleCount,
uint32_t stszOffset) {
p_file = filePtr;
sample_index = 0;
sample_count = sampleCount;
stsz_offset = stszOffset;
}
/**
* @brief Parses the file and feeds data to the parser until we have
* all the necessary data: 1) stsd box processed, 2) mdat offset found,
* 3) stsz offset found.
* Usually this method is not needed, but it comes in handy if you need
* to process a file which is not in streaming format!
* @param file Reference to the file to parse.
*/
bool parseFile() {
uint8_t buffer[1024];
p_file->seek(0);
while (p_file->available()) {
int to_read = min(sizeof(buffer), parser.availableForWrite());
size_t len = p_file->read(buffer, to_read);
parser.write(buffer, len);
// stop if we have all the data
if (stsd_processed && mdat_offset && stsz_offset) return true;
}
return false;
}
protected:
File* p_file = nullptr; ///< Pointer to the open file
uint64_t mdat_offset = 0; ///< Offset of mdat box payload
uint64_t mdat_size = 0; ///< Size of mdat box payload
uint64_t stsz_offset = 0; ///< Offset of stsz box
uint64_t stsz_size = 0; ///< Size of stsz box
uint32_t sample_index = 0; ///< Current sample index
uint64_t mdat_pos = 0; ///< Current position in mdat box
SingleBuffer<uint8_t> buffer; ///< Buffer for sample data
int stsz_bufsize = 256; ///< Number of sample sizes to buffer
SingleBuffer<uint32_t> stsz_buf{
stsz_bufsize}; ///< Buffer for stsz sample sizes
uint32_t fixed_sample_size = 0; ///< Fixed sample size (if nonzero)
MultiDecoder* p_decoder = nullptr; ///< Pointer to decoder
uint64_t mdat_sample_pos = 0;
/**
* @brief Sets up the MP4 parser and registers box callbacks.
*/
void setupParser() override {
parser.setReference(this);
// Callback for ESDS box (AAC config)
parser.setCallback(
"esds",
[](MP4Parser::Box& box, void* ref) {
static_cast<M4AAudioFileDemuxer*>(ref)->onEsds(box);
},
false);
// Callback for MP4A box (AAC sample entry)
parser.setCallback(
"mp4a",
[](MP4Parser::Box& box, void* ref) {
static_cast<M4AAudioFileDemuxer*>(ref)->onMp4a(box);
},
false);
// Callback for ALAC box (ALAC sample entry)
parser.setCallback(
"alac",
[](MP4Parser::Box& box, void* ref) {
static_cast<M4AAudioFileDemuxer*>(ref)->onAlac(box);
},
false);
// Callback for STSZ box (sample sizes)
parser.setCallback(
"stsz",
[](MP4Parser::Box& box, void* ref) {
auto* self = static_cast<M4AAudioFileDemuxer*>(ref);
if (box.seq == 0) {
self->stsz_offset = box.file_offset;
self->stsz_size = box.size;
}
},
false);
// Callback for MDAT box (media data)
parser.setCallback(
"mdat",
[](MP4Parser::Box& box, void* ref) {
auto* self = static_cast<M4AAudioFileDemuxer*>(ref);
if (box.seq == 0) {
self->mdat_offset = box.file_offset + 8; // skip box header
self->mdat_size = box.size;
}
},
false);
// Callback for STSD box (sample description)
parser.setCallback(
"stsd",
[](MP4Parser::Box& box, void* ref) {
auto* self = static_cast<M4AAudioFileDemuxer*>(ref);
self->onStsd(box); // for aac and alac
self->stsd_processed = true;
},
false);
}
/**
* @brief Executes the callback for a completed frame.
* @param size Size of the frame.
* @param buffer Buffer containing the frame data.
*/
void executeCallback(size_t size, SingleBuffer<uint8_t>& buffer) {
Frame frame = sampleExtractor.getFrame(size, buffer);
if (frame_callback)
frame_callback(frame, nullptr);
else
LOGW("No frame callback defined");
}
/**
* @brief Reads the stsz header (sample count and fixed sample size) from
* the file.
* @return true if successful, false otherwise.
*/
bool readStszHeader() {
if (!p_file || stsz_offset == 0) return false;
uint8_t buffer[20];
if (!p_file->seek(stsz_offset)) return false;
if (p_file->read(buffer, 20) != 20) return false;
if (!checkType(buffer, "stsz", 4)) return false;
uint8_t* cont = buffer + 8;
fixed_sample_size = readU32(cont + 4);
sample_count = readU32(cont + 8);
stsz_processed = true;
return true;
}
bool checkMdat() {
p_file->seek(mdat_offset - 8);
uint8_t buffer[8];
if (p_file->read(buffer, 8) != 8) return false;
return checkType(buffer, "mdat", 4);
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,695 @@
#pragma once
#include <cstdint>
#include <functional>
#include <string>
#include "AudioTools/AudioCodecs/MP4Parser.h"
#include "AudioTools/CoreAudio/Buffers.h"
#include "MP4Parser.h"
namespace audio_tools {
/// The stsz sample size type should usually be uint32_t: However for audio
/// we expect that the sample size is usually aound 1 - 2k, so uint16_t
/// should be more then sufficient! Microcontolles only have a limited
/// amount of RAM, so this makes a big difference!
/// @brief Sample size type optimized for microcontrollers
/// @ingroup codecs
using stsz_sample_size_t = uint16_t;
/**
* @brief Abstract base class for M4A/MP4 demuxers.
* Provides shared functionality for both file-based and stream-based demuxers.
*/
class M4ACommonDemuxer {
public:
enum class Codec { Unknown, AAC, ALAC, MP3 };
struct Frame {
Codec codec;
const char* mime = nullptr;
const uint8_t* data;
size_t size;
};
struct M4AAudioConfig {
Codec codec = Codec::Unknown; ///< Current codec.
// aac
int aacProfile = 2, sampleRateIdx = 4, channelCfg = 2; ///< AAC config.
// cookie
Vector<uint8_t> alacMagicCookie; ///< ALAC codec config.
};
/**
* @brief A parser for the ESDS segment to extract the relevant aac
* information.
*
*/
struct ESDSParser {
uint8_t audioObjectType;
uint8_t samplingRateIndex;
uint8_t channelConfiguration;
// Parses esds content to extract audioObjectType, frequencyIndex, and
// channelConfiguration
bool parse(const uint8_t* data, size_t size) {
const uint8_t* ptr = data;
const uint8_t* end = data + size;
if (ptr + 4 > end) return false;
ptr += 4; // skip version + flags
if (ptr >= end || *ptr++ != 0x03) return false;
size_t es_len = parse_descriptor_length(ptr, end);
if (ptr + es_len > end) return false;
ptr += 2; // skip ES_ID
ptr += 1; // skip flags
if (ptr >= end || *ptr++ != 0x04) return false;
size_t dec_len = parse_descriptor_length(ptr, end);
if (ptr + dec_len > end) return false;
ptr += 13; // skip objectTypeIndication, streamType, bufferSizeDB,
// maxBitrate, avgBitrate
if (ptr >= end || *ptr++ != 0x05) return false;
size_t dsi_len = parse_descriptor_length(ptr, end);
if (ptr + dsi_len > end || dsi_len < 2) return false;
uint8_t byte1 = ptr[0];
uint8_t byte2 = ptr[1];
audioObjectType = (byte1 >> 3) & 0x1F;
samplingRateIndex = ((byte1 & 0x07) << 1) | ((byte2 >> 7) & 0x01);
channelConfiguration = (byte2 >> 3) & 0x0F;
return true;
}
protected:
// Helper to decode variable-length descriptor lengths (e.g. 0x80 80 80 05)
inline size_t parse_descriptor_length(const uint8_t*& ptr,
const uint8_t* end) {
size_t len = 0;
for (int i = 0; i < 4 && ptr < end; ++i) {
uint8_t b = *ptr++;
len = (len << 7) | (b & 0x7F);
if ((b & 0x80) == 0) break;
}
return len;
}
};
/**
* @brief Extracts audio data based on the sample sizes defined in the stsz
* box. It collects the data from the mdat box and calls the callback with the
* extracted frames.
*/
class SampleExtractor {
public:
using Frame = M4ACommonDemuxer::Frame;
using Codec = M4ACommonDemuxer::Codec;
using M4AAudioConfig = M4ACommonDemuxer::M4AAudioConfig;
using FrameCallback = std::function<void(const Frame&, void*)>;
/**
* @brief Constructor. Initializes the extractor.
*/
SampleExtractor(M4AAudioConfig& cfg) : audio_config{cfg} { begin(); }
/**
* @brief Resets the extractor state.
*/
void begin() {
sampleIndex = 0;
buffer.clear();
p_chunk_offsets->clear();
p_sample_sizes->clear();
buffer.resize(1024);
current_size = 0;
box_pos = 0;
box_size = 0;
}
/**
* @brief Sets the callback to be called for each extracted frame.
* @param cb Callback function.
*/
void setCallback(FrameCallback cb) { callback = cb; }
/**
* @brief Sets a reference pointer passed to the callback.
* @param r Reference pointer.
*/
void setReference(void* r) { ref = r; }
/**
* @brief Sets the maximum box size (e.g., for mdat). This is called before
* the mdat data is posted. In order to be able to play a file multiple
* times we just reset the sampleIndex!
* @param size Maximum size in bytes.
*/
void setMaxSize(size_t size) {
box_size = size;
}
/**
* @brief Writes data to the extractor, extracting frames as sample sizes
* are met. Provides the data via the callback.
* @param data Pointer to input data.
* @param len Length of input data.
* @param is_final True if this is the last chunk of the box.
* @return Number of bytes processed.
*/
size_t write(const uint8_t* data, size_t len, bool is_final) {
// Resize buffer to the current sample size
size_t currentSize = currentSampleSize();
if (currentSize == 0) {
LOGE("No sample size defined: e.g. mdat before stsz!");
return 0;
}
resize(currentSize);
/// fill buffer up to the current sample size
for (int j = 0; j < len; j++) {
assert(buffer.write(data[j]));
if (buffer.available() >= currentSize) {
LOGI("Sample# %zu: size %zu bytes", sampleIndex, currentSize);
executeCallback(currentSize);
buffer.clear();
box_pos += currentSize;
++sampleIndex;
currentSize = currentSampleSize();
if (box_pos >= box_size) {
LOGI("Reached end of box: %s write",
is_final ? "final" : "not final");
return j;
}
if (currentSize == 0) {
LOGE("No sample size defined, cannot write data");
return j;
}
}
}
return len;
}
/**
* @brief Returns the buffer of sample sizes.
* @return Reference to the buffer of sample sizes.
*/
BaseBuffer<stsz_sample_size_t>& getSampleSizesBuffer() {
return *p_sample_sizes;
}
/**
* @brief Sets the buffer to use for sample sizes.
* @param buffer Reference to the buffer to use.
*/
void setSampleSizesBuffer(BaseBuffer<stsz_sample_size_t>& buffer) {
p_sample_sizes = &buffer;
}
/**
* @brief Returns the buffer of chunk offsets.
* @return Reference to the buffer of chunk offsets.
*/
BaseBuffer<uint32_t>& getChunkOffsetsBuffer() { return *p_chunk_offsets; }
/**
* @brief Sets the buffer to use for chunk offsets.
* @param buffer Reference to the buffer to use.
*/
void setChunkOffsetsBuffer(BaseBuffer<uint32_t>& buffer) {
p_chunk_offsets = &buffer;
}
/**
* @brief Sets a fixed sample size/count instead of using the sampleSizes
* table.
* @param sampleSize Size of each sample.
* @param sampleCount Number of samples.
*/
void setFixedSampleCount(uint32_t sampleSize, uint32_t sampleCount) {
fixed_sample_size = sampleSize;
fixed_sample_count = sampleCount;
}
/**
* @brief Constructs a Frame object for the current codec.
* @param size Size of the frame.
* @param buffer SingleBuffer with data.
* @return Frame object.
*/
Frame getFrame(size_t size, SingleBuffer<uint8_t>& buffer) {
Frame frame;
frame.codec = audio_config.codec;
frame.data = buffer.data();
frame.size = size;
switch (audio_config.codec) {
case Codec::AAC: {
// Prepare ADTS header + AAC frame
tmp.resize(size + 7);
writeAdtsHeader(tmp.data(), audio_config.aacProfile,
audio_config.sampleRateIdx, audio_config.channelCfg,
size);
memcpy(tmp.data() + 7, buffer.data(), size);
frame.data = tmp.data();
frame.size = size + 7;
frame.mime = "audio/aac";
break;
}
case Codec::ALAC:
frame.mime = "audio/alac";
break;
case Codec::MP3:
frame.mime = "audio/mpeg";
break;
default:
frame.mime = nullptr;
break;
}
return frame;
}
protected:
M4AAudioConfig& audio_config;
SingleBuffer<stsz_sample_size_t>
defaultSampleSizes; ///< Table of sample sizes.
SingleBuffer<uint32_t> defaultChunkOffsets; ///< Table of chunk offsets.
BaseBuffer<stsz_sample_size_t>* p_sample_sizes = &defaultSampleSizes;
BaseBuffer<uint32_t>* p_chunk_offsets = &defaultChunkOffsets;
Vector<uint8_t> tmp;
FrameCallback callback = nullptr; ///< Frame callback.
void* ref = nullptr; ///< Reference pointer for callback.
size_t sampleIndex = 0; ///< Current sample index.
SingleBuffer<uint8_t> buffer; ///< Buffer for accumulating sample data.
uint32_t fixed_sample_size = 0; ///< Fixed sample size (if used).
uint32_t fixed_sample_count = 0; ///< Fixed sample count (if used).
size_t current_size = 0; ///< Current sample size.
size_t box_size = 0; ///< Maximum size of the current sample.
size_t box_pos = 0; ///< Current position in the box.
/**
* @brief Executes the callback for a completed frame.
* @param size Size of the frame.
*/
void executeCallback(size_t size) {
Frame frame = getFrame(size, buffer);
if (callback)
callback(frame, ref);
else
LOGE("No callback defined for audio frame extraction");
}
/**
* @brief Resizes the internal buffer if needed.
* @param newSize New buffer size.
*/
void resize(size_t newSize) {
if (buffer.size() < newSize) {
buffer.resize(newSize);
}
}
/**
* @brief Returns the current sample size.
* @return Size of the current sample.
*/
size_t currentSampleSize() {
static size_t last_index = -1;
static size_t last_size = -1;
// Return cached size
if (sampleIndex == last_index) {
return last_size;
}
// using fixed sizes w/o table
if (fixed_sample_size > 0 && fixed_sample_count > 0 &&
sampleIndex < fixed_sample_count) {
return fixed_sample_size;
}
stsz_sample_size_t nextSize = 0;
if (p_sample_sizes->read(nextSize)) {
last_index = sampleIndex;
last_size = nextSize;
return nextSize;
}
return 0;
}
/**
* @brief Writes an ADTS header for an AAC frame.
* @param adts Output buffer for the header.
* @param aacProfile AAC profile.
* @param sampleRateIdx Sample rate index.
* @param channelCfg Channel configuration.
* @param frameLen Frame length.
*/
static void writeAdtsHeader(uint8_t* adts, int aacProfile,
int sampleRateIdx, int channelCfg,
int frameLen) {
adts[0] = 0xFF;
adts[1] = 0xF1;
adts[2] = ((aacProfile - 1) << 6) | (sampleRateIdx << 2) |
((channelCfg >> 2) & 0x1);
adts[3] = ((channelCfg & 0x3) << 6) | ((frameLen + 7) >> 11);
adts[4] = ((frameLen + 7) >> 3) & 0xFF;
adts[5] = (((frameLen + 7) & 0x7) << 5) | 0x1F;
adts[6] = 0xFC;
}
};
using FrameCallback = std::function<void(const Frame&, void* ref)>;
M4ACommonDemuxer() = default;
virtual ~M4ACommonDemuxer() = default;
/**
* @brief Sets the callback for extracted audio frames.
* @param cb Frame callback function.
*/
virtual void setCallback(FrameCallback cb) { frame_callback = cb; }
/**
* @brief Sets the buffer to use for sample sizes.
* @param buffer Reference to the buffer to use.
*/
void setSampleSizesBuffer(BaseBuffer<stsz_sample_size_t>& buffer) {
sampleExtractor.setSampleSizesBuffer(buffer);
}
/**
* @brief Sets the buffer to use for sample sizes.
* @param buffer Reference to the buffer to use.
*/
void setChunkOffsetsBuffer(BaseBuffer<uint32_t>& buffer) {
sampleExtractor.setChunkOffsetsBuffer(buffer);
}
void begin() {
stsz_processed = false;
stco_processed = false;
audio_config.alacMagicCookie.clear();
audio_config.codec = Codec::Unknown;
parser.begin();
sampleExtractor.begin();
chunk_offsets_count = 0;
sample_count = 0;
}
/**
* @brief Sets the AAC configuration for ADTS header generation.
* @param profile AAC profile.
* @param srIdx Sample rate index.
* @param chCfg Channel configuration.
*/
void setAACConfig(int profile, int srIdx, int chCfg) {
audio_config.aacProfile = profile;
audio_config.sampleRateIdx = srIdx;
audio_config.channelCfg = chCfg;
}
void setM4AAudioConfig(M4AAudioConfig cfg) { audio_config = cfg; }
M4AAudioConfig getM4AAudioConfig() { return audio_config; }
void resize(int size) {
default_size = size;
if (buffer.size() < size) {
buffer.resize(size);
}
}
/// File offset of stsz box
uint32_t getStszFileOffset() const {
return stsz_offset;
}
/// samples in stsz
uint32_t getSampleCount() const {
return sample_count;
}
virtual void setupParser() = 0;
protected:
FrameCallback frame_callback = nullptr;
SampleExtractor sampleExtractor{
audio_config}; ///< Extractor for audio samples.
MP4Parser parser; ///< Underlying MP4 parser.
bool stsz_processed = false; ///< Marks the stsz table as processed
bool stco_processed = false; ///< Marks the stco table as processed
bool stsd_processed = false;
M4AAudioConfig audio_config;
SingleBuffer<uint8_t> buffer; ///< Buffer for incremental data.
uint32_t sample_count = 0; ///< Number of samples in stsz
uint32_t stsz_offset = 0;
uint32_t chunk_offsets_count = 0;
size_t default_size = 2 * 1024; ///< Default buffer size.
/**
* @brief Reads a 32-bit big-endian unsigned integer from a buffer.
* @param p Pointer to buffer.
* @return 32-bit unsigned integer.
*/
static uint32_t readU32(const uint8_t* p) {
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
}
static uint32_t readU32(const uint32_t num) {
uint8_t* p = (uint8_t*)&num;
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
}
uint32_t readU32Buffer() {
uint32_t nextSize = 0;
buffer.readArray((uint8_t*)&nextSize, 4);
return readU32(nextSize);
}
/**
* @brief Checks if the buffer at the given offset matches the specified type.
* @param buffer Pointer to the buffer.
* @param type 4-character type string (e.g. "mp4a").
* @param offset Offset in the buffer to check.
* @return true if the type matches, false otherwise.
*/
bool checkType(uint8_t* buffer, const char* type, int offset) {
if (buffer == nullptr || type == nullptr) return false;
bool result = buffer[offset] == type[0] && buffer[offset + 1] == type[1] &&
buffer[offset + 2] == type[2] &&
buffer[offset + 3] == type[3];
return result;
}
void onStsd(const MP4Parser::Box& box) {
LOGI("Box: %s, size: %u bytes", box.type, (unsigned)box.available);
if (box.seq == 0) {
resize(box.size);
buffer.clear();
}
buffer.writeArray(box.data, box.data_size);
if (box.is_complete && buffer.available() >= 8) {
// printHexDump(box);
uint32_t entryCount = readU32(buffer.data() + 4);
// One or more sample entry boxes (e.g. mp4a, .mp3, alac)
parser.parseString(buffer.data() + 8, box.data_size - 8,
box.file_offset + 8 + 8, box.level + 1);
buffer.clear();
}
}
/**
* @brief Handles the mp4a box.
* @param box MP4 box.
*/
void onMp4a(const MP4Parser::Box& box) {
LOGI("onMp4a: %s, size: %zu bytes", box.type, box.data_size);
if (box.is_complete) {
// printHexDump(box);
// use default configuration
int aacProfile = 2; // Default: AAC LC
int sampleRateIdx = 4; // Default: 44100 Hz
int channelCfg = 2; // Default: Stereo
setAACConfig(aacProfile, sampleRateIdx, channelCfg);
audio_config.codec = Codec::AAC;
/// for mp4a we expect to contain a esds: child boxes start at 36
int pos = 36 - 8;
parser.parseString(box.data + pos, box.data_size - pos, box.level + 1);
}
}
/**
* @brief Handles the esds (Elementary Stream Descriptor) box.
* @param box MP4 box.
*/
void onEsds(const MP4Parser::Box& box) {
LOGI("onEsds: %s, size: %zu bytes", box.type, box.data_size);
// printHexDump(box);
ESDSParser esdsParser;
if (!esdsParser.parse(box.data, box.data_size)) {
LOGE("Failed to parse esds box");
return;
}
LOGI(
"-> esds: AAC objectType: %u, samplingRateIdx: %u, "
"channelCfg: %u",
esdsParser.audioObjectType, esdsParser.samplingRateIndex,
esdsParser.channelConfiguration);
setAACConfig(esdsParser.audioObjectType, esdsParser.samplingRateIndex,
esdsParser.channelConfiguration);
}
// void fixALACMagicCookie(uint8_t* cookie, size_t len) {
// if (len < 28) {
// return;
// }
// // Helper to read/write big-endian
// auto read32 = [](uint8_t* p) -> uint32_t {
// return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
// };
// auto write32 = [](uint8_t* p, uint32_t val) {
// p[0] = (val >> 24) & 0xFF;
// p[1] = (val >> 16) & 0xFF;
// p[2] = (val >> 8) & 0xFF;
// p[3] = val & 0xFF;
// };
// auto read16 = [](uint8_t* p) -> uint16_t { return (p[0] << 8) | p[1]; };
// auto write16 = [](uint8_t* p, uint16_t val) {
// p[0] = (val >> 8) & 0xFF;
// p[1] = val & 0xFF;
// };
// // Fix values if zero or invalid
// if (read32(cookie + 0) == 0) write32(cookie + 0, 4096); // frameLength
// if (cookie[6] == 0) cookie[6] = 16; // bitDepth
// if (cookie[7] == 0 || cookie[7] > 32) cookie[7] = 10; // pb
// if (cookie[8] == 0 || cookie[8] > 32) cookie[8] = 14; // mb
// if (cookie[9] == 0 || cookie[9] > 32) cookie[9] = 10; // kb
// if (cookie[10] == 0 || cookie[10] > 8) cookie[10] = 2; // numChannels
// if (read16(cookie + 11) == 0) write16(cookie + 11, 255); // maxRun
// if (read32(cookie + 13) == 0) write32(cookie + 13, 8192); // maxFrameBytes
// if (read32(cookie + 17) == 0) write32(cookie + 17, 512000); // avgBitRate
// if (read32(cookie + 21) == 0) write32(cookie + 21, 44100); // sampleRate
// }
/**
* @brief Handles the alac box.
* @param box MP4 box.
*/
void onAlac(const MP4Parser::Box& box) {
LOGI("onAlac: %s, size: %zu bytes", box.type, box.data_size);
audio_config.codec = Codec::ALAC;
// only alac box in alac contains magic cookie
MP4Parser::Box alac;
if (parser.findBox("alac", box.data, box.data_size, alac)) {
// fixALACMagicCookie((uint8_t*)alac.data, alac.data_size);
audio_config.alacMagicCookie.resize(alac.data_size - 4);
std::memcpy(audio_config.alacMagicCookie.data(), alac.data + 4,
alac.data_size - 4);
}
}
/**
* @brief Handles the stsz (Sample Size) box.
* @param box MP4 box.
*/
void onStsz(MP4Parser::Box& box) {
MP4Parser::defaultCallback(box,0);
LOGI("onStsz #%u: %s, size: %u of %u bytes", (unsigned) box.seq, box.type, (unsigned) box.available, (unsigned) box.data_size);
if (stsz_processed) return;
BaseBuffer<stsz_sample_size_t>& sampleSizes =
sampleExtractor.getSampleSizesBuffer();
buffer.resize(box.available);
size_t written = buffer.writeArray(box.data, box.available);
assert(written = box.available);
// get sample count and size from the box
if (sample_count == 0 && buffer.available() > 12) {
readU32Buffer(); // skip version + flags
uint32_t sampleSize = readU32Buffer();
uint32_t sampleCount = readU32Buffer();
sample_count = sampleCount;
stsz_offset = box.file_offset;
sampleSizes.resize(sample_count);
if (sampleSize != 0) {
sampleExtractor.setFixedSampleCount(sampleSize, sampleCount);
}
}
// incrementally process sampleSize
int count = 0;
while (buffer.available() >= 4) {
stsz_sample_size_t sampleSize = readU32Buffer();
assert(sampleSizes.write(sampleSize));
count += 4;
}
// Remove processed data
buffer.trim();
if (box.is_complete) {
stsz_processed = true;
}
}
// /**
// * @brief Handles the stco (Chunk Offset) box.
// * @param box MP4 box.
// */
// void onStco(MP4Parser::Box& box) {
// LOGI("onStco: %s, size: %zu bytes", box.type, box.data_size);
// if (stco_processed) return;
// BaseBuffer<uint32_t>& chunkOffsets =
// sampleExtractor.getChunkOffsetsBuffer();
// buffer.resize(box.available);
// buffer.writeArray(box.data, box.available);
// // get chunk_offsets_count from the box
// if (chunk_offsets_count == 0 && buffer.available() > 12) {
// chunk_offsets_count = readU32(buffer.data());
// buffer.clearArray(4); // clear version + flags
// }
// // incrementally process sampleSize
// int j = 0;
// for (j = 0; j < buffer.available(); j += 4) {
// uint32_t sampleSize = readU32(buffer.data() + j);
// chunkOffsets.write(sampleSize);
// }
// buffer.clearArray(j);
// if (box.is_complete) {
// stco_processed = true;
// }
// }
void printHexDump(const MP4Parser::Box& box) {
const uint8_t* data = box.data;
size_t len = box.data_size;
LOGI("===========================");
for (size_t i = 0; i < len; i += 16) {
char hex[49] = {0};
char ascii[17] = {0};
for (size_t j = 0; j < 16 && i + j < len; ++j) {
sprintf(hex + j * 3, "%02X ", data[i + j]);
ascii[j] = (data[i + j] >= 32 && data[i + j] < 127) ? data[i + j] : '.';
}
ascii[16] = 0;
LOGI("%04zx: %-48s |%s|", i, hex, ascii);
}
LOGI("===========================");
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,147 @@
#pragma once
#include "AudioTools/CoreAudio/AudioPlayer.h"
#include "AudioTools/CoreAudio/Buffers.h"
#include "M4AAudioFileDemuxer.h"
namespace audio_tools {
/**
* @brief A buffer that reads sample sizes from an M4A file using the
* M4AAudioFileDemuxer. No RAM is used to store the sample sizes as they are
* read directly from the file.
*
* This buffer is designed to be used with an AudioPlayer instance for audio
* sources which are file based only. It provides a read interface that fetches
* the next sample size directly from the file via the demuxer, avoiding the
* need to store the entire sample size table in RAM.
*
* @note This buffer is can not be used for streaming sources; it is intended for
* the use with file-based playback.
* @note This class registers a setOnStreamChangeCallback() with the player
*/
class M4AFileSampleSizeBuffer : public BaseBuffer<stsz_sample_size_t> {
public:
/**
* @brief Constructor.
* @param player Reference to the AudioPlayer instance.
* @param fileExt File extension to recognize as M4A (default ".m4a").
*/
M4AFileSampleSizeBuffer(AudioPlayer& player, ContainerM4A& container,
const char* fileExt = ".m4a") {
this->p_player = &player;
this->p_container = &container;
player.setReference(this);
player.setOnStreamChangeCallback(onFileChange);
addFileExtension(fileExt);
}
/**
* @brief Get the next sample size from the demuxer.
* @param data Reference to store the sample size.
* @return true if successful, false otherwise.
*/
bool read(stsz_sample_size_t& data) override {
if (p_file != nullptr && demuxer.getMdatOffset() == 0) {
uint32_t offset = p_container->getDemuxer().getStszFileOffset();
uint32_t s_count = p_container->getDemuxer().getSampleCount();
demuxer.beginSampleSizeAccess(p_file, s_count, offset);
}
size_t pos = p_file->position();
data = demuxer.getNextSampleSize();
p_file->seek(pos); // reset position after reading
return demuxer;
}
/**
* @brief Defines how many samples are buffered with each file read.
* @param size Number of bytes to buffer (will be divided by 4 for sample
* count).
*/
void setReadBufferSize(size_t size) { demuxer.setSamplesBufferSize(size); }
/**
* @brief Add a file extension to recognize as relevant for this buffer.
* @param fileExt File extension string (e.g., ".m4a").
*/
void addFileExtension(const char* fileExt) {
fileExtensions.push_back(fileExt);
}
void reset() {}
/**
* @brief Write is ignored; sample sizes are read directly from the file.
* @param data Sample size value (ignored).
* @return Always true. This buffer is read-only.
*/
bool write(stsz_sample_size_t data) { return true; }
/**
* @brief Peek is not supported for this buffer.
* @param result Reference to store the peeked value (unused).
* @return Always false. Peeking is not supported.
*/
bool peek(stsz_sample_size_t& result) { return false; }
/**
* @brief Returns the number of samples already read (i.e., the current sample
* index).
* @return Number of samples read so far.
*/
int available() { return demuxer.sampleIndex(); };
/**
* @brief Returns the available space for writing.
* @return Always 0, as this buffer does not support writing.
*/
int availableForWrite() override { return 0; } ///< No write buffer available
/**
* @brief Returns the total number of samples in the file.
* @return Total sample count.
*/
size_t size() override { return demuxer.size(); }
/**
* @brief Returns a pointer to the buffer's physical address.
* @return Always nullptr, as this buffer does not have a physical address.
*/
stsz_sample_size_t* address() override { return nullptr; }
protected:
AudioPlayer* p_player = nullptr; ///< Pointer to the AudioPlayer instance
File* p_file = nullptr; ///< Pointer to the currently open file
M4AAudioFileDemuxer demuxer; ///< Demuxer used to extract sample sizes
Vector<const char*> fileExtensions; ///< List of recognized file extensions
ContainerM4A* p_container = nullptr;
/**
* @brief Checks if the given file name matches any of the registered
* extensions.
* @param name File name to check.
* @return true if the file is relevant, false otherwise.
*/
bool isRelevantFile(const char* name) {
for (const auto& ext : fileExtensions) {
if (StrView(name).endsWith(name)) return true;
}
return false;
}
/**
* @brief Static callback for file change events.
* Updates the file pointer and re-parses the file if relevant.
* @param streamPtr Pointer to the new file stream.
* @param reference Pointer to the M4AFileSampleSizeBuffer instance.
*/
static void onFileChange(Stream* streamPtr, void* reference) {
M4AFileSampleSizeBuffer& self =
*static_cast<M4AFileSampleSizeBuffer*>(reference);
self.p_file = (File*)streamPtr;
LOGI("===> M4AFileSampleSizeBuffer onFileChange: %s",
self.p_file ? self.p_file->name() : "nullptr");
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,64 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/HeaderParserMP3.h"
namespace audio_tools {
/**
* @brief Parses MP3 frames, extracts audio info, and outputs complete frames.
* The frame duration is determined e.g. for RTSP streaming.
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MP3ParserEncoder : public AudioEncoder {
public:
MP3ParserEncoder(int bufferSize = 1024 * 2) { buffer_size = bufferSize; }
bool begin() override {
TRACEI();
mp3.resize(buffer_size); // 10KB buffer
mp3.reset();
return true;
}
void end() override {
TRACEI();
mp3.flush();
mp3.reset();
mp3.resize(0);
}
size_t write(const uint8_t* data, size_t len) override {
LOGI("write: %d", (int)len);
return mp3.write(data, len);
}
void setOutput(Print& out_stream) override {
TRACEI();
AudioEncoder::setOutput(out_stream);
mp3.setOutput(out_stream);
}
AudioInfo audioInfo() override {
AudioInfo info;
info.sample_rate = mp3.getSampleRate();
info.channels = mp3.getChannels();
info.bits_per_sample = 16;
return info;
}
uint32_t frameDurationUs() override { return mp3.getTimePerFrameMs() * 1000; }
uint16_t samplesPerFrame() override { return mp3.getSamplesPerFrame(); }
operator bool() override { return true; }
virtual const char* mime() override { return "audio/mpeg"; }
protected:
HeaderParserMP3 mp3;
int buffer_size = 0;
};
} // namespace audio_tools

View File

@@ -0,0 +1,603 @@
#pragma once
#include <cstdint>
#include <cstring>
#include <functional>
#include <string>
#include "AudioTools/CoreAudio/Buffers.h"
namespace audio_tools {
/**
* @brief MP4Parser is a class that parses MP4 container files and extracts
* boxes (atoms). It provides a callback mechanism to process each box as it is
* parsed. You can define specific callbacks for individual box types or use a
* generic callback for the undefined boxes: By default it just prints the box
* information to Serial. If a container box contains data, it will be processed
* recursively and if it contains data itself, it might be reported in a second
* callback call.
* @note This parser expect the mdat box to be the last box in the file. This
* can be achieve with the following ffmpeg commands:
* - ffmpeg -i ../sine.wav -c:a alac -movflags +faststart alac.m4a
* - ffmpeg -i ../sine.wav -c:a aac -movflags +faststart aac.m4a
*
* @ingroup codecs
* @author Phil Schatzmann
*/
class MP4Parser {
public:
/**
* @brief Represents an individual box in the MP4 file.
*/
struct Box {
friend class MP4Parser; ///< Allow MP4Parser to access private members
friend class MP4ParserExt; ///< Allow MP4ParserExt to access private
///< members
size_t id = 0; ///< Unique box ID
size_t seq = 0; ///< Sequence number for the box per id
char type[5]; ///< 4-character box type (null-terminated)
const uint8_t* data =
nullptr; ///< Pointer to box payload (not including header)
size_t data_size = 0; ///< Size of payload (not including header)
size_t size =
0; ///< Size of payload including subboxes (not including header)
int level = 0; ///< Nesting depth
uint64_t file_offset = 0; ///< File offset where box starts
int available = 0; ///< Number of bytes available as data
bool is_complete = false; ///< True if the box data is complete
bool is_incremental = false; ///< True if the box is being parsed incrementally
bool is_container = false; ///< True if the box is a container
};
using BoxCallback = std::function<void(Box&, void* ref)>;
/**
* @brief Structure for type-specific callbacks.
*/
struct CallbackEntry {
char type[5]; ///< 4-character box type
BoxCallback cb; ///< Callback function
bool callGeneric =
true; ///< If true, also call the generic callback after this one
};
/**
* @brief Defines an optional reference. By default it is the parser itself.
* @param ref Pointer to reference object.
*/
void setReference(void* ref) { this->ref = ref; }
/**
* @brief Defines the generic callback for all boxes.
* @param cb Callback function for all boxes.
*/
void setCallback(BoxCallback cb) { callback = cb; }
/**
* @brief Defines a specific callback for a box type.
* @param type 4-character box type (e.g. "moov", "mdat").
* @param cb Callback function for this box type.
* @param callGeneric If true, the generic callback will also be called after
* the type-specific callback.
*/
void setCallback(const char* type, BoxCallback cb, bool callGeneric = true) {
CallbackEntry entry;
strncpy(entry.type, type, 4);
entry.type[4] = '\0'; // Ensure null-termination
entry.cb = cb;
entry.callGeneric = callGeneric;
callbacks.push_back(entry);
};
/**
* @brief Defines a specific buffer size.
* @param size Buffer size in bytes.
* @return true if the buffer was resized successfully.
*/
bool resize(size_t size) {
buffer.resize(size);
return buffer.size() == size;
}
/**
* @brief Initializes the parser.
* @return true on success.
*/
bool begin() {
buffer.clear();
if (buffer.size() == 0) buffer.resize(2 * 1024);
parseOffset = 0;
fileOffset = 0;
levelStack.clear();
box.is_complete = true; // Start with no open box
box.data = nullptr;
box.size = 0;
box.level = 0;
box.file_offset = 0;
box.id = 0;
box.is_incremental = false;
box.is_complete = true;
return true;
}
/**
* @brief Provide the data to the parser (in chunks if needed).
* @param data Pointer to input data.
* @param len Length of input data.
* @return Number of bytes written to the buffer.
*/
size_t write(const uint8_t* data, size_t len) {
if (is_error) return len; // If an error occurred, skip writing
size_t result = buffer.writeArray(data, len);
parse();
return result;
}
/**
* @brief Provide the data to the parser (in chunks if needed).
* @param data Pointer to input data (char*).
* @param len Length of input data.
* @return Number of bytes written to the buffer.
*/
size_t write(const char* data, size_t len) {
return write(reinterpret_cast<const uint8_t*>(data), len);
}
/**
* @brief Returns the available space for writing.
* @return Number of bytes available for writing.
*/
int availableForWrite() { return buffer.availableForWrite(); }
/**
* @brief Adds a box name that will be interpreted as a container.
* @param name Name of the container box.
* @param start Offset of child boxes (default 0).
*/
void addContainer(const char* name, int start = 0) {
ContainerInfo info;
info.name = name;
info.start = start; // offset of child boxes
}
/**
* @brief Trigger separate parsing (and callbacks) on the indicated string.
* @param str Pointer to the string data.
* @param len Length of the string data.
* @return Number of bytes parsed.
*/
int parseString(const uint8_t* str, int len, int fileOffset = 0,
int level = 0) {
char type[5];
int idx = 0;
Box box;
while (true) {
if (!isValidType((const char*)str + idx + 4)) {
return idx;
}
size_t box_size = readU32(str + idx) - 8;
box.data = str + 8 + idx;
box.size = box_size;
box.level = level;
box.data_size = box.size;
box.file_offset = fileOffset + idx;
box.is_complete = true;
box.is_incremental = false;
strncpy(box.type, (char*)(str + idx + 4), 4);
box.type[4] = '\0';
idx += box.size;
processCallback(box);
if (idx >= len) break; // No more data to parse
}
return idx;
}
/// find box in box
bool findBox(const char* name, const uint8_t* data, size_t len, Box& result) {
for (int j = 0; j < len - 4; j++) {
if (!isValidType((const char*)data + j + 4)) {
continue; // Skip invalid types
}
size_t box_size = readU32(data + j) - 8;
if (box_size < 8) continue; // Invalid box size
Box box;
box.data = data + j + 8;
box.size = box_size;
box.data_size = box.size;
strncpy(box.type, (char*)(data + j + 4), 4);
box.type[4] = '\0';
if (StrView(box.type) == name) {
result = box;
return true; // Found the box
}
}
return false;
}
/**
* @brief Default callback that prints box information to Serial.
* @param box The box being processed.
* @param ref Optional reference pointer.
*/
static void defaultCallback(const Box& box, void* ref) {
char space[box.level * 2 + 1];
char str_buffer[200];
memset(space, ' ', box.level * 2);
space[box.level * 2] = '\0'; // Null-terminate the string
snprintf(str_buffer, sizeof(str_buffer),
"%s- #%u %u) %s, Offset: %u, Size: %u, Data Size: %u, Available: %u", space,
(unsigned)box.id, (unsigned) box.seq, box.type, (unsigned)box.file_offset,
(unsigned)box.size, (unsigned) box.data_size, (unsigned) box.available);
#ifdef ARDUINO
Serial.println(str_buffer);
#else
printf("%s\n", str_buffer);
#endif
}
protected:
BoxCallback callback = defaultCallback; ///< Generic callback for all boxes
Vector<CallbackEntry> callbacks; ///< List of type-specific callbacks
SingleBuffer<uint8_t> buffer; ///< Buffer for incoming data
Vector<size_t> levelStack; ///< Stack for container box levels
size_t parseOffset = 0; ///< Current parse offset in buffer
uint64_t fileOffset = 0; ///< Current file offset
void* ref = this; ///< Reference pointer for callbacks
Box box; ///< Current box being processed
bool is_error = false; ///< True if an error occurred
/**
* @brief Structure for container box information.
*/
struct ContainerInfo {
const char* name = nullptr; ///< Name of the container box
int start = 0; ///< Offset of child boxes
};
Vector<ContainerInfo> containers; ///< List of container box info
protected:
bool box_in_progress =
false; ///< True if currently parsing a box incrementally
size_t box_bytes_received = 0; ///< Bytes received so far for the current box
size_t box_bytes_expected = 0; ///< Total expected bytes for the current box
char box_type[5] = {0}; ///< Current box type
int box_level = 0; ///< Current box level (nesting)
int box_seq = 0;
size_t incremental_offset = 0;
/**
* @brief Main parsing loop. Handles incremental and complete boxes.
*/
void parse() {
while (true) {
size_t bufferSize = buffer.available();
if (!box_in_progress) {
if (!tryStartNewBox(bufferSize)) break;
} else {
if (!continueIncrementalBox()) break;
}
popLevels();
}
finalizeParse();
}
/**
* @brief Try to start parsing a new box. Returns false if not enough data.
* @param bufferSize Number of bytes available in the buffer.
* @return True if a box was started, false otherwise.
*/
bool tryStartNewBox(size_t bufferSize) {
if (parseOffset + 8 > bufferSize) return false;
char type[5];
box_seq = 0;
// get basic box information
parseOffset = checkParseOffset();
const uint8_t* p = buffer.data() + parseOffset;
uint32_t size32 = readU32(p);
strncpy(type, (char*)(p + 4), 4);
type[4] = '\0';
uint64_t boxSize = size32;
size_t headerSize = 8;
if (boxSize < headerSize) return false;
int level = static_cast<int>(levelStack.size());
bool is_container = isContainerBox(type);
if (is_container) {
handleContainerBox(type, boxSize, level);
return true;
}
size_t payload_size = static_cast<size_t>(boxSize - headerSize);
if (parseOffset + boxSize <= bufferSize) {
// start with full buffer!
handleCompleteBox(type, p, headerSize, payload_size, level);
parseOffset += boxSize;
} else {
startIncrementalBox(type, p, headerSize, payload_size, level, bufferSize);
return false; // Wait for more data
}
return true;
}
/**
* @brief Handles a container box (box with children).
* @param type Box type string.
* @param boxSize Size of the box.
* @param level Nesting level of the box.
*/
void handleContainerBox(const char* type, uint64_t boxSize, int level) {
strcpy(box.type, type);
box.id = ++this->box.id;
box.data = nullptr;
box.size = static_cast<size_t>(boxSize - 8);
box.data_size = 0;
box.available = 0;
box.level = level;
box.file_offset = fileOffset + parseOffset;
box.is_incremental = false;
box.is_complete = true;
box.is_container = true;
box.seq = 0;
processCallback(box);
uint64_t absBoxOffset = fileOffset + parseOffset;
levelStack.push_back(absBoxOffset + boxSize);
parseOffset += 8;
}
/**
* @brief Handles a complete (non-incremental) box.
* @param type Box type string.
* @param p Pointer to the start of the box in the buffer.
* @param headerSize Size of the box header.
* @param payload_size Size of the box payload.
* @param level Nesting level of the box.
*/
void handleCompleteBox(const char* type, const uint8_t* p, size_t headerSize,
size_t payload_size, int level) {
strcpy(box.type, type);
box.id = ++this->box.id;
box.data = p + headerSize;
box.size = payload_size;
box.data_size = payload_size;
box.level = level;
box.file_offset = fileOffset + parseOffset;
box.is_complete = true;
box.is_container = false;
box.available = payload_size;
box.is_incremental = false;
box.seq = 0;
processCallback(box);
}
/**
* @brief Starts parsing a box incrementally.
* @param type Box type string.
* @param p Pointer to the start of the box in the buffer.
* @param headerSize Size of the box header.
* @param payload_size Size of the box payload.
* @param level Nesting level of the box.
* @param bufferSize Number of bytes available in the buffer.
*/
void startIncrementalBox(const char* type, const uint8_t* p,
size_t headerSize, size_t payload_size, int level,
size_t bufferSize) {
box_in_progress = true;
box_bytes_received = 0;
box_bytes_expected = payload_size;
strncpy(box_type, type, 5);
box_level = level;
box_seq = 0;
size_t available_payload = bufferSize - parseOffset - headerSize;
incremental_offset = fileOffset + parseOffset;
if (available_payload > 0) {
box_bytes_received += available_payload;
strcpy(box.type, box_type);
box.id = ++this->box.id;
box.data = p + headerSize;
box.size = box_bytes_expected;
box.data_size = box_bytes_expected;
box.available = available_payload;
box.level = box_level;
box.file_offset = incremental_offset;
box.seq = 0;
box.is_incremental = true;
box.is_complete = false;
box.is_container = false;
processCallback(box);
}
// fileOffset += (bufferSize - buffer.available());
fileOffset += (parseOffset + payload_size + 8);
incremental_offset += available_payload;
buffer.clear();
parseOffset = 0;
}
/**
* @brief Continue filling an incremental box. Returns false if not enough
* data.
* @return False if more data was processed, true otherwise.
*/
bool continueIncrementalBox() {
size_t to_read = std::min((size_t)box_bytes_expected - box_bytes_received,
(size_t)buffer.available());
if (to_read == 0) return true;
strcpy(box.type, box_type);
box.id = ++this->box.id;
box.data = buffer.data();
box.size = box_bytes_expected;
box.data_size = box_bytes_expected;
box.available = to_read;
box.level = box_level;
box.file_offset = incremental_offset;
box.is_complete = (box_bytes_received + to_read == box_bytes_expected);
box.is_container = false;
box.is_incremental = true;
box.seq = ++box_seq;
processCallback(box);
box_bytes_received += to_read;
// fileOffset += to_read;
buffer.clearArray(to_read);
incremental_offset += to_read;
if (box_bytes_received >= box_bytes_expected) {
box_in_progress = false;
}
return false;
}
/**
* @brief Finalizes parsing, updating file offset and clearing buffer.
*/
void finalizeParse() {
if (parseOffset > 0) {
fileOffset += parseOffset;
buffer.clearArray(parseOffset);
parseOffset = 0;
}
}
/**
* @brief Returns the current file offset (absolute position in file).
* @return Current file offset.
*/
uint64_t currentFileOffset() { return fileOffset + parseOffset; }
/**
* @brief Reads a 32-bit big-endian unsigned integer from a buffer.
* @param p Pointer to buffer.
* @return 32-bit unsigned integer.
*/
static uint32_t readU32(const uint8_t* p) {
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
}
/**
* @brief Reads a 64-bit big-endian unsigned integer from a buffer.
* @param p Pointer to buffer.
* @return 64-bit unsigned integer.
*/
static uint64_t readU64(const uint8_t* p) {
return ((uint64_t)readU32(p) << 32) | readU32(p + 4);
}
/**
* @brief Pops levels from the stack if we've passed their bounds.
*/
void popLevels() {
// Pop levels if we've passed their bounds (absolute file offset)
while (!levelStack.empty() &&
(fileOffset + parseOffset) >= levelStack.back()) {
levelStack.pop_back();
}
}
/**
* @brief Processes the callback for a box.
* Calls the type-specific callback if present, and the generic callback if
* allowed.
* @param box The box being processed.
*/
void processCallback(Box& box) {
bool is_called = false;
bool call_generic = true;
for (const auto& entry : callbacks) {
if (strncmp(entry.type, box.type, 4) == 0) {
entry.cb(box, ref);
is_called = true;
if (!entry.callGeneric) call_generic = false;
}
}
/// call generic callback if allowed
if ((!is_called || call_generic) && callback) callback(box, ref);
}
/**
* @brief Checks if a box type is a container box.
* @param type Box type string.
* @return true if container box, false otherwise.
*/
bool isContainerBox(const char* type) {
// fill with default values if nothing has been defined
if (containers.empty()) {
// pure containers
static const char* containers_str[] = {
"moov", "trak", "mdia", "minf", "stbl", "edts", "dinf", "udta",
"ilst", "moof", "traf", "mfra", "tref", "iprp", "sinf", "schi"};
for (const char* c : containers_str) {
ContainerInfo info;
info.name = c;
info.start = 0;
containers.push_back(info);
}
// container with data
ContainerInfo info;
info.name = "meta";
info.start = 4; // 4 bytes: version (1 byte) + flags (3 bytes)
containers.push_back(info);
}
// find the container by name
for (auto& cont : containers) {
if (StrView(type) == cont.name) return true;
}
return false;
}
/**
* @brief Gets the start offset for a subcontainer.
* @param type Box type string.
* @return Offset of the subcontainer.
*/
int getContainerDataLength(const char* type) {
for (auto& cont : containers) {
if (StrView(type) == cont.name) return cont.start;
}
return 0;
}
/**
* @brief Checks if a type string is a valid 4-character box type.
* @param type Pointer to type string.
* @param offset Offset in the string.
* @return true if valid, false otherwise.
*/
bool isValidType(const char* type, int offset = 0) const {
// Check if the type is a valid 4-character string
return (type != nullptr && isalnum(type[offset]) &&
isalnum(type[offset + 1]) && isalnum(type[offset + 2]) &&
isalnum(type[offset + 3]));
}
/**
* @brief Checks and adjusts the parse offset for valid box types.
* @return Adjusted parse offset.
*/
size_t checkParseOffset() {
size_t current = parseOffset;
const char* type = (char*)(buffer.data() + parseOffset + 4);
for (int j = 0; j < buffer.available() - parseOffset - 4; j += 4) {
if (isValidType(type, j)) {
if (j != 0) {
// report the data under the last valid box
box.size = 0;
box.data_size = j;
box.level = static_cast<int>(levelStack.size()) + 1;
box.data = buffer.data() + parseOffset;
processCallback(box);
}
return j + parseOffset;
}
}
return parseOffset;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,378 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
#include "AudioTools/Communication/HTTP/AbstractURLStream.h"
#include "AudioTools/CoreAudio/AudioMetaData/MimeDetector.h"
#include "AudioTools/AudioCodecs/StreamingDecoder.h"
namespace audio_tools {
/**
* @brief Manage multiple AudioDecoders with automatic format detection
*
* This class automatically detects the audio format from incoming data and
* selects the appropriate decoder from a collection of registered decoders.
* The format detection is performed using the MimeDetector on the first chunk
* of data written to the decoder.
*
* Key features:
* - Automatic format detection using MimeDetector
* - Support for multiple decoder registration
* - Custom MIME type detection logic support
* - External MIME source integration (e.g., HTTP headers)
* - Lazy decoder initialization for memory efficiency
* - Seamless integration with existing AudioDecoder architecture
*
* The actual decoder is only opened when it has been selected, which allows
* for memory-efficient operation when dealing with multiple possible formats.
* The relevant decoder is determined dynamically at the first write() call
* based on the determined MIME type.
*
* @note This class uses a write-based interface, unlike StreamingDecoder
* which uses a pull-based approach. For streaming scenarios with direct
* access to input/output streams, consider using MultiStreamingDecoder.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MultiDecoder : public AudioDecoder {
public:
/**
* @brief Default constructor
*/
MultiDecoder() = default;
/**
* @brief Constructor with external MIME source
*
* Creates a MultiDecoder that uses an external source for MIME type
* determination, such as HTTP Content-Type headers. This can be more
* efficient than automatic detection as it avoids analyzing data content.
*
* @param mimeSource Reference to a MimeSource that provides MIME type information
*/
MultiDecoder(MimeSource& mimeSource) { setMimeSource(mimeSource); }
#ifdef USE_EXPERIMENTAL
/**
* @brief Destructor
*
* Cleans up any internally created DecoderAdapter instances.
*/
~MultiDecoder() {
// Clean up any adapters we created
for (auto* adapter : adapters) {
delete adapter;
}
adapters.clear();
}
#endif
/**
* @brief Starts the processing and enables automatic MIME type determination
*
* Initializes the MIME detector and prepares the MultiDecoder for format
* detection. This method must be called before any write() operations.
*
* @return true if initialization was successful, false if no output is defined
*/
bool begin() override {
mime_detector.begin();
is_first = true;
if (p_print == nullptr) {
LOGE("No output defined");
return false;
}
return true;
}
/**
* @brief Releases resources and closes the active decoder
*
* Stops the currently active decoder and resets the MultiDecoder state
* for potential reuse. After calling end(), begin() must be called again
* before the decoder can process new data.
*/
void end() override {
if (actual_decoder.decoder != nullptr && actual_decoder.is_open) {
actual_decoder.decoder->end();
}
actual_decoder.is_open = false;
actual_decoder.decoder = nullptr;
actual_decoder.mime = nullptr;
is_first = true;
}
/**
* @brief Adds a decoder that will be selected by its MIME type
*
* Registers an AudioDecoder that will be automatically selected when
* the corresponding MIME type is detected in the input data.
*
* @param decoder The AudioDecoder to register
* @param mime The MIME type string to associate with this decoder
*/
void addDecoder(AudioDecoder& decoder, const char* mime) {
DecoderInfo info{mime, &decoder};
decoder.addNotifyAudioChange(*this);
decoders.push_back(info);
}
/**
* @brief Adds a decoder with custom MIME detection logic
*
* Registers an AudioDecoder with a specific MIME type and provides custom
* logic for detecting that MIME type from raw data. This allows for
* specialized format detection beyond the standard MimeDetector capabilities.
*
* @param decoder The AudioDecoder to register
* @param mime The MIME type string to associate with this decoder
* @param check Custom function that analyzes data to detect this MIME type.
* Should return true if the data matches this format.
*/
void addDecoder(AudioDecoder& decoder, const char* mime,
bool (*check)(uint8_t* data, size_t len)) {
addDecoder(decoder, mime);
mime_detector.setCheck(mime, check);
}
/**
* @brief Sets the output stream for decoded audio data
*
* Defines where the decoded PCM audio data will be written to.
* This output will be automatically configured for the selected decoder.
*
* @param out_stream The Print stream to write decoded audio data to
*/
void setOutput(Print& out_stream) override {
p_print = &out_stream;
}
/**
* @brief Sets an external MIME source for format detection
*
* Provides an alternative to automatic MIME detection by allowing an external
* source to provide the MIME type information. This is particularly useful
* when the MIME type is available from HTTP headers or other metadata sources.
*
* When a MIME source is set, it takes precedence over automatic detection,
* making the decoder selection process more efficient.
*
* @param mimeSource Reference to a MimeSource that provides MIME type information
*
* @note The MimeSource object must remain valid for the lifetime of this
* MultiDecoder instance, as only a reference is stored.
*/
void setMimeSource(MimeSource& mimeSource) { p_mime_source = &mimeSource; }
/**
* @brief Selects the actual decoder by MIME type
*
* Searches through registered decoders to find one that matches the
* specified MIME type, then initializes it for use. This method is
* usually called automatically from the determined MIME type during
* the first write() operation.
*
* @param mime The MIME type string to match against registered decoders
* @return true if a matching decoder was found and initialized, false otherwise
*/
bool selectDecoder(const char* mime) {
bool result = false;
if (mime == nullptr) return false;
// do nothing if no change
if (StrView(mime).equals(actual_decoder.mime)) {
is_first = false;
return true;
}
// close actual decoder
if (actual_decoder.decoder != this) end();
// find the corresponding decoder
selected_mime = nullptr;
for (int j = 0; j < decoders.size(); j++) {
DecoderInfo info = decoders[j];
if (StrView(info.mime).equals(mime)) {
LOGI("Using decoder for %s (%s)", info.mime, mime);
actual_decoder = info;
// define output if it has not been defined
if (p_print != nullptr && actual_decoder.decoder != this
&& actual_decoder.decoder->getOutput() == nullptr) {
actual_decoder.decoder->setOutput(*p_print);
}
if (!*actual_decoder.decoder) {
actual_decoder.decoder->begin();
LOGI("Decoder %s started", actual_decoder.mime);
}
result = true;
selected_mime = mime;
break;
}
}
is_first = false;
return result;
}
/**
* @brief Returns the MIME type that was detected and selected
*
* @return The MIME type string that was detected and used to select
* the current decoder, or nullptr if no decoder has been selected
*/
const char* selectedMime() { return selected_mime; }
/**
* @brief Writes encoded audio data to be decoded
*
* On the first call, this method performs MIME type detection to select
* the appropriate decoder. Subsequent calls delegate to the selected
* decoder's write() method to process the audio data.
*
* The MIME detection process uses either an external MIME source (if set)
* or analyzes the provided data to determine the audio format.
*
* @param data Buffer containing encoded audio data
* @param len Number of bytes to write
* @return Number of bytes actually written to the selected decoder
*/
size_t write(const uint8_t* data, size_t len) override {
if (is_first) {
const char* mime = nullptr;
if (p_mime_source != nullptr) {
// get content type from http header
mime = p_mime_source->mime();
if (mime) LOGI("mime from http request: %s", mime);
}
if (mime == nullptr) {
// use the mime detector
mime_detector.write((uint8_t*)data, len);
mime = mime_detector.mime();
if (mime) LOGI("mime from mime_detector: %s", mime);
}
if (mime != nullptr) {
// select the decoder based on the detemined mime type
if (!selectDecoder(mime)) {
LOGE("The decoder could not be found for %s", mime);
actual_decoder.decoder = &nop;
actual_decoder.is_open = true;
}
}
is_first = false;
}
// check if we have a decoder
if (actual_decoder.decoder == nullptr) return 0;
// decode the data
return actual_decoder.decoder->write(data, len);
}
/**
* @brief Checks if the decoder is active and ready
*
* @return true if a decoder is selected and active, or if format detection
* hasn't been performed yet; false if no suitable decoder was found
*/
virtual operator bool() override {
if (actual_decoder.decoder == &nop) return false;
return is_first || actual_decoder.is_open;
};
/**
* @brief Sets codec-specific configuration data
*
* Forwards codec configuration data to the currently selected decoder.
* This method can only be called after a decoder has been selected.
*
* @param data Buffer containing codec configuration data
* @param len Length of the configuration data
* @return true if the configuration was successfully applied, false otherwise
*/
bool setCodecConfig(const uint8_t* data, size_t len) override {
if (actual_decoder.decoder == nullptr) {
LOGE("No decoder defined, cannot set codec config");
return false;
}
return actual_decoder.decoder->setCodecConfig(data, len);
}
/**
* @brief Provides access to the internal MIME detector
*
* Returns a reference to the MimeDetector instance used for automatic
* format detection. This allows direct access to configure custom MIME
* detection logic or to query detection results.
*
* @return Reference to the internal MimeDetector instance
*/
MimeDetector& mimeDetector() { return mime_detector; }
#ifdef USE_EXPERIMENTAL
/**
* @brief Adds a StreamingDecoder that will be selected by its MIME type
*
* Registers a StreamingDecoder that will be automatically selected when
* the corresponding MIME type is detected in the input data. The
* StreamingDecoder is wrapped in a DecoderAdapter to provide compatibility
* with the write-based AudioDecoder interface used by MultiDecoder.
*
* @param decoder The StreamingDecoder to register
* @param mime The MIME type string to associate with this decoder
* @param bufferSize Buffer size for the adapter (default: 1024 bytes)
*/
void addDecoder(StreamingDecoder& decoder, const char* mime,
int bufferSize = 1024) {
if (mime != nullptr) {
// Create a DecoderAdapter to wrap the StreamingDecoder
decoder.addNotifyAudioChange(*this);
auto adapter = new DecoderAdapter(decoder, bufferSize);
adapters.push_back(adapter); // Store for cleanup
// Add the adapter as a regular AudioDecoder
addDecoder(*adapter, mime);
} else {
LOGE("MIME type is nullptr - cannot add StreamingDecoder");
}
}
#endif
protected:
/**
* @brief Information about a registered decoder
*/
struct DecoderInfo {
const char* mime = nullptr; ///< MIME type for this decoder
AudioDecoder* decoder = nullptr; ///< Pointer to the decoder instance
bool is_open = false; ///< Whether the decoder is currently active
/**
* @brief Default constructor
*/
DecoderInfo() = default;
/**
* @brief Constructor with parameters
*
* @param mime MIME type string
* @param decoder Pointer to AudioDecoder instance
*/
DecoderInfo(const char* mime, AudioDecoder* decoder) {
this->mime = mime;
this->decoder = decoder;
}
} actual_decoder; ///< Currently active decoder information
Vector<DecoderInfo> decoders{0}; ///< Collection of registered decoders
#ifdef USE_EXPERIMENTAL
Vector<DecoderAdapter*> adapters{0}; ///< Collection of internally created adapters
#endif
MimeDetector mime_detector; ///< MIME type detection engine
CodecNOP nop; ///< No-operation codec for unsupported formats
MimeSource* p_mime_source = nullptr; ///< Optional external MIME source
bool is_first = true; ///< Flag for first write() call
const char* selected_mime = nullptr; ///< MIME type that was selected
};
} // namespace audio_tools

View File

@@ -0,0 +1,39 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/VorbisDecoder.h"
#include "AudioTools/AudioCodecs/ContainerOgg.h"
namespace audio_tools {
/**
* @brief Ogg Vorbis Decoder
*
* This class wraps VorbisDecoder in an Ogg container decoder, allowing
* decoding of Ogg Vorbis streams with automatic packet extraction.
*
* Usage:
* 1. Instantiate OggVorbisDecoder.
* 2. Feed Ogg Vorbis data to the decoder.
* 3. PCM output is provided via the underlying VorbisDecoder.
*
* @author Phil Schatzmann
* @ingroup codecs
* @ingroup decoder
* @copyright GPLv3
*/
class OggVorbisDecoder : public OggContainerDecoder {
public:
/**
* @brief Constructor for OggVorbisDecoder
* Initializes the decoder and sets the underlying VorbisDecoder.
*/
OggVorbisDecoder() : OggContainerDecoder() { setDecoder(&vorbis); }
protected:
/** @brief Underlying Vorbis decoder */
VorbisDecoder vorbis;
};
} // namespace audio_tools

View File

@@ -0,0 +1,3 @@
This directory contains different alternative API implementations for encoders and decoders.
Usaually you need to install some additional libraries.

View File

@@ -0,0 +1,992 @@
#pragma once
#include <new>
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
#include "AudioTools/CoreAudio/AudioMetaData/MimeDetector.h"
#include "AudioTools/CoreAudio/AudioOutput.h"
#include "AudioTools/CoreAudio/BaseStream.h"
namespace audio_tools {
/**
* @brief A Streaming Decoder where we provide both the input and output
* as streams.
*
* This is the base class for all streaming decoders that process audio data
* by reading from an input stream and writing decoded PCM data to an output
* stream. Unlike AudioDecoder which uses a write-based interface,
* StreamingDecoder uses a pull-based approach where you call copy() to process
* data.
*
* @note This is more efficient than the write-based AudioDecoder interface
* for streaming scenarios where you have direct access to input and output
* streams.
*
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class StreamingDecoder : public AudioInfoSource, public AudioInfoSupport {
public:
virtual ~StreamingDecoder() = default;
/**
* @brief Starts the processing
*
* Initializes the decoder and prepares it for processing audio data.
* Must be called before any copy() operations.
*
* @return true if initialization was successful, false otherwise
*/
virtual bool begin() = 0;
/**
* @brief Releases the reserved memory
*
* Cleans up any resources allocated by the decoder and stops processing.
*/
virtual void end() = 0;
/**
* @brief Defines the output Stream
*
* Sets where the decoded PCM audio data will be written to.
*
* @param out_stream The Print stream to write decoded audio data to
*/
virtual void setOutput(Print& out_stream) { p_print = &out_stream; }
/**
* @brief Defines the output streams and register to be notified
*
* Sets the output stream and registers for audio info change notifications.
*
* @param out_stream The AudioStream to write decoded audio data to
*/
virtual void setOutput(AudioStream& out_stream) {
Print* p_print = &out_stream;
setOutput(*p_print);
addNotifyAudioChange(out_stream);
}
/**
* @brief Defines the output streams and register to be notified
*
* Sets the output stream and registers for audio info change notifications.
*
* @param out_stream The AudioOutput to write decoded audio data to
*/
virtual void setOutput(AudioOutput& out_stream) {
Print* p_print = &out_stream;
setOutput(*p_print);
addNotifyAudioChange(out_stream);
}
/**
* @brief Stream Interface: Decode directly by taking data from the stream
*
* This is more efficient than feeding the decoder with write: just call
* copy() in the loop to process data from the input stream.
*
* @param inStream The input stream containing encoded audio data
*/
void setInput(Stream& inStream) { this->p_input = &inStream; }
/**
* @brief Provides the audio information for the current stream
*
* Returns audio format information such as sample rate, channels, and
* bits per sample that was determined from the decoded audio stream.
*
* @return AudioInfo structure containing format information
*/
virtual AudioInfo audioInfo() = 0;
/**
* @brief Checks if the class is active
*
* @return true if the decoder is ready and active, false otherwise
*/
virtual operator bool() = 0;
/**
* @brief Process a single read operation - to be called in the loop
*
* Reads a chunk of data from the input stream, decodes it, and writes
* the decoded PCM data to the output stream.
*
* @return true if data was processed successfully, false if no more data
* is available or an error occurred
*/
virtual bool copy() = 0;
/**
* @brief Process all available data
*
* Convenience method that calls copy() repeatedly until all available
* data has been processed.
*
* @return true if any data was processed, false if no data was available
*/
bool copyAll() {
bool result = false;
while (copy()) {
result = true;
}
return result;
}
/**
* @brief Provides the MIME type of the audio format handled by this decoder
*
* @return C-string containing the MIME type (e.g., "audio/mpeg",
* "audio/flac")
*/
virtual const char* mime() = 0;
protected:
/**
* @brief Reads bytes from the input stream
*
* Derived classes must implement this to read data from their input source.
*
* @param data Buffer to store the read data
* @param len Maximum number of bytes to read
* @return Number of bytes actually read
*/
virtual size_t readBytes(uint8_t* data, size_t len) = 0;
void setAudioInfo(AudioInfo newInfo) override {
TRACED();
if (this->info != newInfo) {
this->info = newInfo;
notifyAudioChange(info);
}
}
Print* p_print = nullptr; ///< Output stream for decoded PCM data
Stream* p_input = nullptr; ///< Input stream for encoded audio data
AudioInfo info;
};
/**
* @brief Converts any AudioDecoder to a StreamingDecoder
*
* This adapter class allows you to use any existing AudioDecoder with the
* StreamingDecoder interface. It handles the conversion between the write-based
* AudioDecoder API and the stream-based StreamingDecoder API by using an
* internal buffer.
*
* @note The adapter reads data from the input stream into a buffer, then
* feeds that data to the wrapped AudioDecoder using its write() method.
*
* @ingroup codecs
* @author Phil Schatzmann
* @copyright GPLv3
*/
class StreamingDecoderAdapter : public StreamingDecoder {
public:
/**
* @brief Constructor
*
* @param decoder The AudioDecoder to wrap
* @param mimeStr The MIME type string for this decoder
* @param copySize Buffer size for data transfer (default:
* DEFAULT_BUFFER_SIZE)
*/
StreamingDecoderAdapter(AudioDecoder& decoder, const char* mimeStr,
int copySize = DEFAULT_BUFFER_SIZE) {
p_decoder = &decoder;
p_decoder->addNotifyAudioChange(*this);
mime_str = mimeStr;
if (copySize > 0) resize(copySize);
}
/**
* @brief Starts the processing
*
* Initializes the wrapped decoder.
*
* @return true if initialization was successful, false otherwise
*/
bool begin() override {
TRACED();
if (p_decoder == nullptr) return false;
if (p_input == nullptr) return false;
return p_decoder->begin();
}
/**
* @brief Releases the reserved memory
*
* Calls end() on the wrapped decoder to clean up resources.
*/
void end() override { p_decoder->end(); }
/**
* @brief Defines the output Stream
*
* Sets the output stream for the wrapped decoder.
*
* @param out_stream The output stream for decoded audio data
*/
void setOutput(Print& out_stream) override {
p_decoder->setOutput(out_stream);
}
/**
* @brief Provides the audio information
*
* Delegates to the wrapped decoder's audioInfo() method.
*
* @return AudioInfo from the wrapped decoder
*/
AudioInfo audioInfo() override { return p_decoder->audioInfo(); }
/**
* @brief Checks if the class is active
*
* @return true if the wrapped decoder is active, false otherwise
*/
virtual operator bool() override { return *p_decoder; }
/**
* @brief Process a single read operation - to be called in the loop
*
* Reads data from the input stream into the internal buffer, then feeds
* it to the wrapped AudioDecoder for processing.
*
* @return true if data was processed successfully, false otherwise
*/
virtual bool copy() override {
int read = readBytes(buffer.data(), buffer.size());
int written = 0;
if (read > 0) written = p_decoder->write(&buffer[0], read);
bool rc = written > 0;
LOGI("copy: %s", rc ? "success" : "failure");
return rc;
}
/**
* @brief Adjust the buffer size
*
* Changes the internal buffer size. The existing content of the buffer is
* lost!
*
* @param bufferSize New buffer size in bytes
*/
void resize(int bufferSize) { buffer.resize(bufferSize); }
/**
* @brief Provides the MIME type
*
* Returns the MIME type that was defined in the constructor.
*
* @return MIME type string
*/
const char* mime() override { return mime_str; }
protected:
AudioDecoder* p_decoder = nullptr; ///< Wrapped AudioDecoder instance
Vector<uint8_t> buffer{0}; ///< Internal buffer for data transfer
const char* mime_str = nullptr; ///< MIME type string
/**
* @brief Reads bytes from the input stream
*
* @param data Buffer to store the read data
* @param len Maximum number of bytes to read
* @return Number of bytes actually read
*/
size_t readBytes(uint8_t* data, size_t len) override {
if (p_input == nullptr) return 0;
return p_input->readBytes(data, len);
}
};
/**
* @brief Manage multiple StreamingDecoders with automatic format detection
*
* This class automatically detects the audio format from incoming streaming
* data and selects the appropriate decoder from a collection of registered
* decoders. The format detection is performed using the MimeDetector on the
* first chunk of data, and the detected data is preserved for the selected
* decoder using a buffered stream approach.
*
* Key features:
* - Automatic format detection using MimeDetector
* - Support for multiple decoder registration
* - Data preservation during format detection
* - Custom mime type detection logic support
* - Seamless integration with existing streaming architecture
*
* @note The first call to copy() will consume some data for format detection,
* but this data is preserved and made available to the selected decoder through
* a BufferedPrefixStream mechanism.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MultiStreamingDecoder : public StreamingDecoder {
public:
/**
* @brief Default constructor
*/
MultiStreamingDecoder() = default;
/**
* @brief Destructor
*
* Cleans up any internally created StreamingDecoderAdapter instances.
*/
~MultiStreamingDecoder() {
// Clean up any adapters we created
for (auto* adapter : adapters) {
delete adapter;
}
adapters.clear();
}
/**
* @brief Starts the processing
*
* Initializes the MIME detector and prepares for format detection.
*
* @return true if initialization was successful, false if no output is
* defined
*/
bool begin() override {
mime_detector.begin();
is_first = true;
if (p_print == nullptr) {
LOGE("No output defined");
return false;
}
return true;
}
/**
* @brief Releases the reserved memory
*
* Stops the currently active decoder and resets the state for next use.
*/
void end() override {
if (actual_decoder.decoder != nullptr && actual_decoder.is_open) {
actual_decoder.decoder->end();
}
actual_decoder.is_open = false;
actual_decoder.decoder = nullptr;
actual_decoder.mime = nullptr;
is_first = true;
}
/**
* @brief Defines the output Stream
*
* @param out_stream The output stream for decoded audio data
*/
void setOutput(Print& out_stream) override {
StreamingDecoder::setOutput(out_stream);
}
/**
* @brief Defines the output streams and register to be notified
*
* @param out_stream The AudioStream for decoded audio data
*/
void setOutput(AudioStream& out_stream) override {
StreamingDecoder::setOutput(out_stream);
}
/**
* @brief Defines the output streams and register to be notified
*
* @param out_stream The AudioOutput for decoded audio data
*/
void setOutput(AudioOutput& out_stream) override {
StreamingDecoder::setOutput(out_stream);
}
/**
* @brief Stream Interface: Decode directly by taking data from the stream
*
* @param inStream The input stream containing encoded audio data
*/
void setInput(Stream& inStream) {
StreamingDecoder::setInput(inStream);
}
/**
* @brief Adds a decoder that will be selected by its MIME type
*
* Registers a StreamingDecoder that will be automatically selected when
* the corresponding MIME type is detected in the input stream.
*
* @param decoder The StreamingDecoder to register
*/
void addDecoder(StreamingDecoder& decoder) {
decoder.addNotifyAudioChange(*this);
const char* mime = decoder.mime();
if (mime != nullptr) {
DecoderInfo info{mime, &decoder};
decoders.push_back(info);
} else {
LOGE("Decoder mime() returned nullptr - cannot add decoder");
}
}
/**
* @brief Adds a decoder with explicit MIME type
*
* Registers a StreamingDecoder with a specific MIME type, which may be
* different from what the decoder's mime() method returns.
*
* @param decoder The StreamingDecoder to register
* @param mime The MIME type string to associate with this decoder
*/
void addDecoder(StreamingDecoder& decoder, const char* mime) {
if (mime != nullptr) {
decoder.addNotifyAudioChange(*this);
DecoderInfo info{mime, &decoder};
decoders.push_back(info);
} else {
LOGE("Decoder mime() returned nullptr - cannot add decoder");
}
}
/**
* @brief Adds an AudioDecoder with explicit MIME type
*
* Wraps an AudioDecoder in a StreamingDecoderAdapter and registers it with
* the specified MIME type. This allows using traditional AudioDecoder
* instances with the MultiStreamingDecoder's automatic format detection.
*
* @param decoder The AudioDecoder to wrap and register
* @param mime The MIME type string to associate with this decoder
* @param bufferSize Buffer size for the adapter (default:
* DEFAULT_BUFFER_SIZE)
*
* @note The created StreamingDecoderAdapter is stored internally and will be
* automatically managed by the MultiStreamingDecoder.
*/
void addDecoder(AudioDecoder& decoder, const char* mime,
int bufferSize = DEFAULT_BUFFER_SIZE) {
if (mime != nullptr) {
// Create a StreamingDecoderAdapter to wrap the AudioDecoder
decoder.addNotifyAudioChange(*this);
auto adapter = new StreamingDecoderAdapter(decoder, mime, bufferSize);
adapters.push_back(adapter); // Store for cleanup
DecoderInfo info{mime, adapter};
decoders.push_back(info);
} else {
LOGE("MIME type is nullptr - cannot add AudioDecoder");
}
}
/**
* @brief Checks if the class is active
*
* @return true if a decoder is selected and active, or if format detection
* hasn't been performed yet
*/
virtual operator bool() override {
if (actual_decoder.decoder == nullptr) return false;
return is_first || actual_decoder.is_open;
}
/**
* @brief Process a single read operation - to be called in the loop
*
* On the first call, this method reads data for format detection, selects
* the appropriate decoder, and sets up a buffered stream. Subsequent calls
* delegate to the selected decoder's copy() method.
*
* @return true if data was processed successfully, false if no data is
* available or format detection/decoding failed
*/
virtual bool copy() override {
if (p_input == nullptr) return false;
// Automatically select decoder if not already selected
if (is_first) {
// determine the mime and select the decoder
if (!selectDecoder()) {
return false;
}
is_first = false;
}
// Check if we have a decoder
if (actual_decoder.decoder == nullptr) return false;
// Use the selected decoder to process data
return actual_decoder.decoder->copy();
}
/**
* @brief Selects the actual decoder by MIME type
*
* Searches through registered decoders to find one that matches the
* detected MIME type, then initializes it for use.
*
* @param mime The MIME type string to match
* @return true if a matching decoder was found and initialized, false
* otherwise
*/
bool selectDecoder(const char* mime) {
TRACEI();
bool result = false;
// Guard against null MIME type - cannot proceed without valid MIME
if (mime == nullptr) {
LOGE("mime is null");
return false;
}
// Optimization: Check if the requested MIME type is already active
// This avoids unnecessary decoder switching when the same format is detected
if (StrView(mime).equals(actual_decoder.mime)) {
is_first = false; // Mark initialization as complete
return true; // Already using the correct decoder
}
// Clean shutdown of currently active decoder before switching
// This ensures proper resource cleanup and state reset
if (actual_decoder.decoder != nullptr) {
actual_decoder.decoder->end();
actual_decoder.is_open = false; // Mark as inactive
}
// Search through all registered decoders to find one that handles this MIME type
selected_mime = nullptr; // Clear previous selection
for (int j = 0; j < decoders.size(); j++) {
DecoderInfo info = decoders[j];
// Check if this decoder supports the detected MIME type
if (StrView(info.mime).equals(mime)) {
LOGI("Using Decoder %s for %s", toStr(info.mime), toStr(mime));
// Switch to the matching decoder
actual_decoder = info;
// Configure the decoder's output stream to match our output
// This ensures decoded audio data flows to the correct destination
if (p_print != nullptr) {
actual_decoder.decoder->setOutput(*p_print);
}
// Initialize the selected decoder and mark it as active
LOGI("available: %d", p_data_source->available());
assert(p_data_source != nullptr);
actual_decoder.decoder->setInput(*p_data_source);
actual_decoder.decoder->clearNotifyAudioChange();
actual_decoder.decoder->addNotifyAudioChange(*this);
if (actual_decoder.decoder->begin()) {
actual_decoder.is_open = true;
LOGI("StreamingDecoder %s started", toStr(actual_decoder.mime));
} else {
// Decoder failed to start - this is a critical error
LOGE("Failed to start StreamingDecoder %s", toStr(actual_decoder.mime));
return false;
}
// Successfully found and initialized a decoder
result = true;
selected_mime = mime; // Store the MIME type that was selected
break; // Stop searching once we find a match
}
}
// Mark initialization phase as complete regardless of success/failure
is_first = false;
return result; // true if decoder was found and started, false otherwise
}
/**
* @brief Provides the MIME type of the selected decoder
* @return MIME type string of the currently active decoder, or nullptr
* if no decoder is selected
*/
const char* mime() override {
// fallback to actual decoder
if (actual_decoder.decoder != nullptr) {
return actual_decoder.decoder->mime();
}
return nullptr;
}
/**
* @brief Returns the MIME type that was detected and selected
*
* @return The MIME type string that was detected by the MimeDetector
*/
const char* selectedMime() { return selected_mime; }
/**
* @brief Provides the audio information from the selected decoder
*
* @return AudioInfo from the currently active decoder, or empty AudioInfo
* if no decoder is selected
*/
AudioInfo audioInfo() override {
if (actual_decoder.decoder != nullptr) {
return actual_decoder.decoder->audioInfo();
}
AudioInfo empty;
return empty;
}
/**
* @brief Provides access to the internal MIME detector
*
* Returns a reference to the MimeDetector instance used for automatic
* format detection. This allows access to advanced features such as:
* - Adding custom MIME type detection logic
* - Setting custom detection callbacks
* - Configuring default MIME types
* - Accessing detection statistics
*
* @note This method should typically only be used for advanced configuration
* before calling begin(). Modifying the detector after format detection
* has occurred may lead to unexpected behavior.
*
* @return Reference to the internal MimeDetector instance
*
* @see MimeDetector::setCheck() for adding custom detection logic
* @see MimeDetector::setMimeCallback() for detection notifications
*/
MimeDetector& mimeDetector() { return mime_detector; }
/**
* @brief Sets an external MIME source for format detection
*
* Provides an alternative to automatic MIME detection by allowing an external
* source to provide the MIME type information. This is particularly useful
* when the MIME type is already known from other sources such as:
* - HTTP Content-Type headers
* - File extensions
* - Metadata from containers or playlists
* - User-specified format preferences
*
* When a MIME source is set, the automatic detection process (which requires
* reading and analyzing stream data) is bypassed, making the decoder
* initialization more efficient and faster.
*
* @param mimeSource Reference to a MimeSource object that provides the
* MIME type through its mime() method
*
* @note The MimeSource object must remain valid for the lifetime of this
* MultiStreamingDecoder instance, as only a reference is stored.
*
* @note Setting a MIME source takes precedence over automatic detection.
* To revert to automatic detection, the MIME source would need to
* return nullptr from its mime() method.
*
* @see MimeSource interface for implementing custom MIME providers
* @see selectDecoder() for how MIME type detection and selection works
*
* @since This feature allows integration with external metadata sources
*/
void setMimeSource(MimeSource& mimeSource) { p_mime_source = &mimeSource; }
protected:
/**
* @brief Information about a registered decoder
*/
struct DecoderInfo {
const char* mime = nullptr; ///< MIME type for this decoder
StreamingDecoder* decoder = nullptr; ///< Pointer to the decoder instance
bool is_open = false; ///< Whether the decoder is currently active
/**
* @brief Default constructor
*/
DecoderInfo() = default;
/**
* @brief Constructor with parameters
*
* @param mime MIME type string
* @param decoder Pointer to StreamingDecoder instance
*/
DecoderInfo(const char* mime, StreamingDecoder* decoder) {
this->mime = mime;
this->decoder = decoder;
}
} actual_decoder; ///< Currently active decoder information
Vector<DecoderInfo> decoders{0}; ///< Collection of registered decoders
Vector<StreamingDecoderAdapter*> adapters{
0}; ///< Collection of internally created adapters
MimeDetector mime_detector; ///< MIME type detection engine
Vector<uint8_t> detection_buffer{0}; ///< Buffer for format detection data
bool is_first = true; ///< Flag for first copy() call
const char* selected_mime = nullptr; ///< MIME type that was selected
MimeSource* p_mime_source =
nullptr; ///< Optional MIME source for custom logic
Stream *p_data_source = nullptr; ///< effective data source for decoder
BufferedStream buffered_stream{0}; ///< Buffered stream for data preservation
const char* toStr(const char* str){
return str == nullptr ? "" : str;
}
/**
* @brief Automatically detects MIME type and selects appropriate decoder
*
* This method performs automatic format detection and decoder selection when
* no decoder is currently active. It supports two modes of operation:
* 1. External MIME source - Uses a provided MimeSource for format information
* 2. Auto-detection - Analyzes stream content to determine the audio format
*
* The method reads a small sample of data (80 bytes) from the input stream
* for format detection, then preserves this data in a buffered stream so it
* remains available to the selected decoder. This ensures no audio data is
* lost during the detection process.
*
* @note This method is automatically called by copy() on the first invocation.
* Subsequent calls will return immediately if a decoder is already selected.
*
* @note The detection data is preserved using BufferedPrefixStream, allowing
* the selected decoder to process the complete stream including the bytes
* used for format identification.
*
* @return true if a decoder was successfully selected and initialized, or if
* a decoder was already active; false if MIME detection failed or no
* matching decoder was found
*
* @see selectDecoder(const char* mime) for explicit decoder selection
* @see setMimeSource() for providing external MIME type information
* @see MimeDetector for details on automatic format detection
*/
bool selectDecoder() {
// Only perform MIME detection and decoder selection if no decoder is active yet
// This prevents re-detection on subsequent calls during the same stream
if (actual_decoder.decoder == nullptr) {
const char* mime = nullptr;
p_data_source = nullptr;
// Two methods for MIME type determination: external source or auto-detection
if (p_mime_source != nullptr) {
// Option 1: Use externally provided MIME source (e.g., from HTTP headers)
// This is more efficient as it avoids reading and analyzing stream data
mime = p_mime_source->mime();
LOGI("mime from source: %s", toStr(mime));
assert(p_input != nullptr);
p_data_source = p_input;
} else {
// Option 2: Auto-detect MIME type by analyzing stream content
// Redirect the decoder to use the buffered stream
// we use the buffered stream as input
assert(p_input != nullptr);
buffered_stream.setStream(*p_input);
buffered_stream.resize(DEFAULT_BUFFER_SIZE);
p_data_source = &buffered_stream;
// This requires reading a sample of data to identify the format
detection_buffer.resize(160);
size_t bytesRead = buffered_stream.peekBytes(detection_buffer.data(), detection_buffer.size()); // If no data is available, we cannot proceed with detection
if (bytesRead == 0) return false;
// Feed the sample data to the MIME detector for format analysis
// The detector examines file headers, magic numbers, etc.
mime_detector.write(detection_buffer.data(), bytesRead);
mime = mime_detector.mime();
LOGI("mime from detector: %s", toStr(mime));
}
// Process the detected/provided MIME type
if (mime != nullptr) {
// Delegate to the overloaded selectDecoder(mime) method to find
// and initialize the appropriate decoder for this MIME type
if (!selectDecoder(mime)) {
LOGE("The decoder could not be selected for %s", toStr(mime));
return false; // No registered decoder can handle this format
}
} else {
// MIME detection failed - format is unknown or unsupported
LOGE("Could not determine mime type");
return false;
}
} else {
LOGI("Decoder already selected: %s", toStr(actual_decoder.mime));
assert(p_input != nullptr);
actual_decoder.decoder->setInput(*p_input);
}
// Success: either decoder was already selected or selection completed successfully
return true;
}
/**
* @brief Reads bytes from the input stream
*
* @param data Buffer to store read data
* @param len Maximum number of bytes to read
* @return Number of bytes actually read
*/
size_t readBytes(uint8_t* data, size_t len) override {
if (p_input == nullptr) return 0;
return p_input->readBytes(data, len);
}
};
/**
* @brief Adapter class which allows the AudioDecoder API on a StreamingDecoder
*
* This adapter provides the reverse functionality of StreamingDecoderAdapter:
* it allows you to use a StreamingDecoder with the write-based AudioDecoder
* API. It uses a ring buffer and queue to convert write() calls into a stream
* that the StreamingDecoder can read from.
*
* @note This is useful when you have a StreamingDecoder but need to integrate
* it into code that expects the AudioDecoder write-based interface.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class DecoderAdapter : public AudioDecoder {
public:
/**
* @brief Constructor
*
* @param dec The StreamingDecoder to wrap
* @param bufferSize Size of the internal ring buffer for data transfer
*/
DecoderAdapter(StreamingDecoder& dec, int bufferSize) {
TRACED();
p_dec = &dec;
p_dec->setInput(queue);
resize(bufferSize);
}
/**
* @brief Defines the output Stream
*
* Sets the output stream for the wrapped StreamingDecoder.
*
* @param out The output stream for decoded audio data
*/
void setOutput(Print& out) override { p_dec->setOutput(out); }
/**
* @brief Sets the input stream for the wrapped decoder
*
* @param in The input stream containing encoded audio data
*/
void setInput(Stream& in) { p_dec->setInput(in); }
/**
* @brief Starts the processing
*
* Initializes the wrapped StreamingDecoder and marks this adapter as active.
*
* @return true if the StreamingDecoder was started successfully
*/
bool begin() override {
TRACED();
active = true;
bool rc = p_dec->begin();
return rc;
}
/**
* @brief Stops the processing
*
* Marks this adapter as inactive. The wrapped StreamingDecoder is not
* explicitly stopped to allow continued use.
*/
void end() override {
TRACED();
active = false;
}
/**
* @brief Resizes the internal buffer
*
* Changes the size of the ring buffer used for data transfer.
* The buffer is only allocated when first needed (lazy setup).
*
* @param size New buffer size in bytes
*/
void resize(int size) {
buffer_size = size;
// setup the buffer only if needed
if (is_setup) rbuffer.resize(size);
}
/**
* @brief Writes encoded audio data to be decoded
*
* The data is written to an internal queue, which is then processed
* by calling copy() on the wrapped StreamingDecoder.
*
* @param data Buffer containing encoded audio data
* @param len Number of bytes to write
* @return Number of bytes actually written
*/
size_t write(const uint8_t* data, size_t len) override {
TRACED();
setupLazy();
size_t result = queue.write((uint8_t*)data, len);
// Trigger processing - process all available data
while (p_dec->copy());
return result;
}
/**
* @brief Gets the wrapped StreamingDecoder
*
* Provides direct access to the underlying StreamingDecoder for
* advanced use cases.
*
* @return Pointer to the wrapped StreamingDecoder
*/
StreamingDecoder* getStreamingDecoder() { return p_dec; }
/**
* @brief Checks if the adapter is active
*
* @return true if the adapter is active, false otherwise
*/
operator bool() override { return active; }
protected:
bool active = false; ///< Whether the adapter is active
bool is_setup = false; ///< Whether lazy setup has been performed
int buffer_size; ///< Size of the ring buffer
StreamingDecoder* p_dec = nullptr; ///< Wrapped StreamingDecoder instance
RingBuffer<uint8_t> rbuffer{0}; ///< Ring buffer for data storage
QueueStream<uint8_t> queue{rbuffer}; ///< Stream interface to the ring buffer
/**
* @brief Performs lazy initialization of the ring buffer
*
* The ring buffer is only allocated when first needed to save memory.
*/
void setupLazy() {
if (!is_setup) {
rbuffer.resize(buffer_size);
queue.begin();
is_setup = true;
}
}
};
/**
* @brief Type alias for DecoderAdapter
*
* Provides an alternative name for backward compatibility.
*/
using DecoderFromStreaming = DecoderAdapter;
} // namespace audio_tools

View File

@@ -0,0 +1,231 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/CoreAudio/Buffers.h"
#include <vorbis.h>
namespace audio_tools {
/**
* @brief Vorbis Audio Decoder using low-level libvorbis API
*
* This decoder expects Ogg Vorbis packets to be provided via the write()
* method. It parses the Vorbis headers, initializes the decoder, and outputs
* PCM audio.
*
* Usage:
* 1. Call begin() to reset the decoder.
* 2. Feed the first three Vorbis header packets via write().
* 3. Feed subsequent audio packets via write().
* 4. Use setOutput() to set the PCM output destination.
* 5. Call audioInfo() to retrieve stream parameters after header parsing.
*
* @author Phil Schatzmann
* @ingroup codecs
* @ingroup decoder
* @copyright GPLv3
*/
class VorbisDecoder : public AudioDecoder {
public:
/**
* @brief Constructor for VorbisDecoder
* @param buffer_size Size of the PCM output buffer (default: 256)
* @param header_packets Number of Vorbis header packets (default: 3)
*
* Initializes the decoder and allocates the PCM output buffer.
*/
VorbisDecoder(size_t buffer_size = 256, int header_packets = 3)
: pcm_buffer_size(buffer_size), num_header_packets(header_packets) {}
/**
* @brief Destructor for VorbisDecoder
*
* Cleans up all decoder resources.
*/
~VorbisDecoder() { end(); }
/**
* @brief Resets decoder state and prepares for new Vorbis stream
*
* This method clears all decoder state, resizes the PCM output buffer,
* and initializes Vorbis structures. Call this before feeding header packets.
* @return true if successful
*/
bool begin() override {
end();
pcmout_buffer.resize(pcm_buffer_size);
vorbis_info_init(&vi);
vorbis_comment_init(&vc);
active = true;
return true;
}
/**
* @brief Cleans up all Vorbis decoder structures
*/
void end() override {
vorbis_block_clear(&vb);
vorbis_dsp_clear(&vd);
vorbis_comment_clear(&vc);
vorbis_info_clear(&vi);
header_packets = 0;
decoder_initialized = false;
active = false;
}
/**
* @brief Feeds a Vorbis packet (header or audio) to the decoder
*
* The first three packets must be Vorbis headers. Subsequent packets are
* audio. PCM output is written to the Print stream set via setOutput().
*
* @param data Pointer to packet data
* @param len Length of packet data
* @return Number of PCM bytes written to output
*/
size_t write(const uint8_t *data, size_t len) override {
ogg_packet packet;
packet.packet = (unsigned char *)data;
packet.bytes = len;
packet.b_o_s = (header_packets == 0) ? 1 : 0;
packet.e_o_s = 0;
packet.granulepos = 0;
packet.packetno = header_packets;
if (num_header_packets == 0 && !decoder_initialized) {
if (!initDecoder()) return 0;
decoder_initialized = true;
}
if (header_packets < num_header_packets) {
if (!parseHeaderPacket(packet, header_packets)) return 0;
header_packets++;
if (header_packets == num_header_packets) {
if (!initDecoder()) return 0;
decoder_initialized = true;
}
return 0;
}
if (header_packets == num_header_packets) {
notifyAudioChange(audioInfo());
}
if (!decoder_initialized) return 0;
return decodeAudioPacket(packet);
}
/**
* @brief Returns audio stream info (sample rate, channels, bits per sample)
* @return AudioInfo struct with stream parameters
*/
AudioInfo audioInfo() override {
AudioInfo info;
if (vi.channels > 0 && vi.rate > 0) {
info.sample_rate = vi.rate;
info.channels = vi.channels;
info.bits_per_sample = 16;
}
return info;
}
/**
* @brief Returns true if decoder is active
*/
operator bool() override { return active; }
protected:
/** @brief Vorbis stream info (channels, sample rate, etc.) */
vorbis_info vi{};
/** @brief Vorbis comment metadata */
vorbis_comment vc{};
/** @brief Decoder state for synthesis */
vorbis_dsp_state vd{};
/** @brief Block structure for synthesis */
vorbis_block vb{};
/** @brief Output stream for PCM audio */
Print *p_print = nullptr;
/** @brief Decoder active state */
bool active = false;
/** @brief PCM output buffer size */
size_t pcm_buffer_size = 256;
/** @brief Number of Vorbis header packets */
int num_header_packets = 3;
/** @brief Buffer for interleaved PCM output */
Vector<int16_t> pcmout_buffer;
int header_packets = 0;
bool decoder_initialized = false;
/**
* @brief Parses a Vorbis header packet
* @param packet Ogg Vorbis header packet
* @param header_packets Index of header packet (0, 1, 2)
* @return true if successful
*/
bool parseHeaderPacket(ogg_packet &packet, int header_packets) {
if (vorbis_synthesis_headerin(&vi, &vc, &packet) != 0) {
LOGE("Header packet %d invalid", header_packets);
return false;
}
return true;
}
/**
* @brief Initializes the Vorbis decoder after header parsing
* @return true if successful
*/
bool initDecoder() {
if (vorbis_synthesis_init(&vd, &vi) != 0) {
LOGE("vorbis_synthesis_init failed");
return false;
}
vorbis_block_init(&vd, &vb);
return true;
}
/**
* @brief Decodes an audio packet and writes PCM to output
* @param packet Ogg Vorbis audio packet
* @return Number of PCM bytes written
*/
size_t decodeAudioPacket(ogg_packet &packet) {
size_t total_written = 0;
if (vorbis_synthesis(&vb, &packet) == 0) {
vorbis_synthesis_blockin(&vd, &vb);
float **pcm = nullptr;
int samples = vorbis_synthesis_pcmout(&vd, &pcm);
while (samples > 0 && pcm) {
int chunk = (samples > pcm_buffer_size) ? pcm_buffer_size : samples;
convertFloatToInt16PCM(pcm, chunk, vi.channels);
if (!pcmout_buffer.empty() && p_print) {
p_print->write((uint8_t *)pcmout_buffer.data(),
pcmout_buffer.size() * sizeof(int16_t));
total_written += pcmout_buffer.size() * sizeof(int16_t);
pcmout_buffer.clear();
}
vorbis_synthesis_read(&vd, chunk);
samples = vorbis_synthesis_pcmout(&vd, &pcm);
}
}
return total_written;
}
/**
* @brief Converts float PCM to interleaved int16 PCM and stores in
* pcmout_buffer
* @param pcm Pointer to float PCM array [channels][samples]
* @param samples Number of samples
* @param channels Number of channels
*/
void convertFloatToInt16PCM(float **pcm, int samples, int channels) {
for (int i = 0; i < samples; ++i) {
for (int ch = 0; ch < channels; ++ch) {
float val = pcm[ch][i];
int16_t sample = (int16_t)(val * 32767.0f);
if (sample > 32767) sample = 32767;
if (sample < -32768) sample = -32768;
pcmout_buffer.push_back(sample);
}
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,3 @@
#pragma once
#warning("obsolete: use AudioTools/Communication/A2DPStream.h")
#include "AudioTools/Communication/A2DPStream.h"

View File

@@ -0,0 +1,421 @@
#pragma once
#include "AudioToolsConfig.h"
#include "AudioTools/AudioLibs/I2SCodecStream.h"
#include "AudioTools/CoreAudio/AudioActions.h"
namespace audio_tools {
/**
* @brief New functionality which replaces the AudioKitStream that is based on
* the legacy AudioKit library. This functionality uses the new
* arduino-audio-driver library! It is the same as I2SCodecStream extended by
* some AudioActions and some method calls to determine defined pin values.
* See https://github.com/pschatzmann/arduino-audio-driver
* @ingroup io
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioBoardStream : public I2SCodecStream {
struct AudioBoardAction : public AudioActions::Action {
AudioBoardAction(AudioBoard &board, AudioDriverKey key) {
this->key = key;
this->p_board = &board;
}
AudioDriverKey key;
AudioBoard *p_board;
int id() override { return key | 0x400; }
bool readValue() override { return p_board->isKeyPressed(key); }
};
public:
/**
* @brief Default constructor: for available AudioBoard values check
* the audioboard variables in
* https://pschatzmann.github.io/arduino-audio-driver/html/group__audio__driver.html
* Further information can be found in
* https://github.com/pschatzmann/arduino-audio-driver/wiki
*/
AudioBoardStream(audio_driver::AudioBoard &board) : I2SCodecStream(board) {
// pin mode already set up by driver library
actions.setPinMode(false);
}
bool begin() override { return I2SCodecStream::begin(); }
bool begin(I2SCodecConfig cfg) override { return I2SCodecStream::begin(cfg); }
/**
* @brief Process input keys and pins
*
*/
void processActions() {
// TRACED();
actions.processActions();
delay(1);
}
/**
* @brief Defines a new action that is executed when the Button is pressed
*/
void addAction(AudioDriverKey key, void (*action)(bool, int, void *),
void *ref = nullptr) {
AudioBoardAction *abo = new AudioBoardAction(board(), key);
abo->actionOn = action;
abo->ref = (ref == nullptr) ? this : ref;
actions.add(*abo);
}
/**
* @brief Defines a new action that is executed when the Button is pressed and released
*/
void addAction(AudioDriverKey key, void (*actionOn)(bool, int, void *),
void (*actionOff)(bool, int, void *),
void *ref = nullptr) {
AudioBoardAction *abo = new AudioBoardAction(board(), key);
abo->actionOn = actionOn;
abo->actionOn = actionOff;
abo->ref = (ref == nullptr) ? this : ref;
actions.add(*abo);
}
/**
* @brief Defines a new action that is executed when the indicated pin is
* active
*
* @param pin
* @param action
* @param ref
*/
void addAction(int pin, void (*action)(bool, int, void *),
void *ref = nullptr) {
TRACEI();
// determine logic from config
AudioActions::ActiveLogic activeLogic = getActionLogic(pin);
actions.add(pin, action, activeLogic, ref == nullptr ? this : ref);
}
/**
* @brief Defines a new action that is executed when the indicated pin is
* active
*
* @param pin
* @param action
* @param activeLogic
* @param ref
*/
void addAction(int pin, void (*action)(bool, int, void *),
AudioActions::ActiveLogic activeLogic, void *ref = nullptr) {
TRACEI();
actions.add(pin, action, activeLogic, ref == nullptr ? this : ref);
}
/// Provides access to the AudioActions
AudioActions &audioActions() { return actions; }
AudioActions &getActions() { return actions; }
/**
* @brief Relative volume control
*
* @param vol
*/
void incrementVolume(float inc) {
float current_volume = getVolume();
float new_volume = current_volume + inc;
LOGI("incrementVolume: %f -> %f", current_volume, new_volume);
setVolume(new_volume);
}
/**
* @brief Increase the volume
*
*/
static void actionVolumeUp(bool, int, void *ref) {
TRACEI();
AudioBoardStream *self = (AudioBoardStream *)ref;
self->incrementVolume(+self->actionVolumeIncrementValue());
}
/**
* @brief Decrease the volume
*
*/
static void actionVolumeDown(bool, int, void *ref) {
TRACEI();
AudioBoardStream *self = (AudioBoardStream *)ref;
self->incrementVolume(-self->actionVolumeIncrementValue());
}
/**
* @brief Toggle start stop
*
*/
static void actionStartStop(bool, int, void *ref) {
TRACEI();
AudioBoardStream *self = (AudioBoardStream *)ref;
self->active = !self->active;
self->setActive(self->active);
}
/**
* @brief Start
*
*/
static void actionStart(bool, int, void *ref) {
TRACEI();
AudioBoardStream *self = (AudioBoardStream *)ref;
self->active = true;
self->setActive(self->active);
}
/**
* @brief Stop
*/
static void actionStop(bool, int, void *ref) {
TRACEI();
AudioBoardStream *self = (AudioBoardStream *)ref;
self->active = false;
self->setActive(self->active);
}
/**
* @brief Switch off the PA if the headphone in plugged in
* and switch it on again if the headphone is unplugged.
* This method complies with the
*/
static void actionHeadphoneDetection(bool, int, void *ref) {
AudioBoardStream *self = (AudioBoardStream *)ref;
if (self->pinHeadphoneDetect() >= 0) {
// detect changes
bool isConnected = self->headphoneStatus();
if (self->headphoneIsConnected != isConnected) {
self->headphoneIsConnected = isConnected;
// update if things have stabilized
bool powerActive = !isConnected;
LOGW("Headphone jack has been %s",
isConnected ? "inserted" : "removed");
self->setSpeakerActive(powerActive);
}
}
delay(1);
}
/**
* @brief Get the gpio number for auxin detection
*
* @return -1 non-existent
* Others gpio number
*/
GpioPin pinAuxin() { return getPinID(PinFunction::AUXIN_DETECT); }
/**
* @brief Get the gpio number for headphone detection
*
* @return -1 non-existent
* Others gpio number
*/
GpioPin pinHeadphoneDetect() {
return getPinID(PinFunction::HEADPHONE_DETECT);
}
/**
* @brief Get the gpio number for PA enable
*
* @return -1 non-existent
* Others gpio number
*/
GpioPin pinPaEnable() { return getPinID(PinFunction::PA); }
// /**
// * @brief Get the gpio number for adc detection
// *
// * @return -1 non-existent
// * Others gpio number
// */
// GpioPin pinAdcDetect() { return getPin(AUXIN_DETECT); }
/**
* @brief Get the record-button id for adc-button
*
* @return -1 non-existent
* Others button id
*/
GpioPin pinInputRec() { return getPinID(PinFunction::KEY, 1); }
/**
* @brief Get the number for mode-button
*
* @return -1 non-existent
* Others number
*/
GpioPin pinInputMode() { return getPinID(PinFunction::KEY, 2); }
/**
* @brief Get number for set function
*
* @return -1 non-existent
* Others number
*/
GpioPin pinInputSet() { return getPinID(PinFunction::KEY, 4); }
/**
* @brief Get number for play function
*
* @return -1 non-existent
* Others number
*/
GpioPin pinInputPlay() { return getPinID(PinFunction::KEY, 3); }
/**
* @brief number for volume up function
*
* @return -1 non-existent
* Others number
*/
GpioPin pinVolumeUp() { return getPinID(PinFunction::KEY, 6); }
/**
* @brief Get number for volume down function
*
* @return -1 non-existent
* Others number
*/
GpioPin pinVolumeDown() { return getPinID(PinFunction::KEY, 5); }
/**
* @brief Get LED pin
*
* @return -1 non-existent
* Others gpio number
*/
GpioPin pinLed(int idx) { return getPinID(PinFunction::LED, idx); }
/// the same as setPAPower()
void setSpeakerActive(bool active) { setPAPower(active); }
/**
* @brief Returns true if the headphone was detected
*
* @return true
* @return false
*/
bool headphoneStatus() {
int headphoneGpioPin = pinHeadphoneDetect();
return headphoneGpioPin > 0 ? !digitalRead(headphoneGpioPin) : false;
}
/**
* @brief The oposite of setMute(): setActive(true) calls setMute(false)
*/
void setActive(bool active) { setMute(!active); }
/// add start/stop on inputMode
void addStartStopAction() {
// pin conflicts for pinInputMode() with the SD CS pin for AIThinker and
// buttons
int sd_cs = getSdCsPin();
int input_mode = pinInputMode();
if (input_mode != -1 && (input_mode != sd_cs || !cfg.sd_active)) {
LOGD("actionInputMode")
addAction(input_mode, actionStartStop);
}
}
/// add volume up and volume down action
void addVolumeActions() {
// pin conflicts with SD Lyrat SD CS GpioPin and buttons / Conflict on
// Audiokit V. 2957
int sd_cs = getSdCsPin();
int vol_up = pinVolumeUp();
int vol_down = pinVolumeDown();
if ((vol_up != -1 && vol_down != -1) &&
(!cfg.sd_active || (vol_down != sd_cs && vol_up != sd_cs))) {
LOGD("actionVolumeDown")
addAction(vol_down, actionVolumeDown);
LOGD("actionVolumeUp")
addAction(vol_up, actionVolumeUp);
} else {
LOGW("Volume Buttons ignored because of conflict: %d ", pinVolumeDown());
}
}
/// Adds headphone determination
void addHeadphoneDetectionAction() {
// pin conflicts with AIThinker A101: key6 and headphone detection
int head_phone = pinHeadphoneDetect();
if (head_phone != -1 && (getPinID(PinFunction::KEY, 6) != head_phone)) {
actions.add(head_phone, actionHeadphoneDetection,
AudioActions::ActiveChange, this);
}
}
/**
* @brief Setup the supported default actions (volume, start/stop, headphone
* detection)
*/
void addDefaultActions() {
TRACEI();
addHeadphoneDetectionAction();
addStartStopAction();
addVolumeActions();
}
/// Defines the increment value used by actionVolumeDown/actionVolumeUp
void setActionVolumeIncrementValue(float value) {
action_increment_value = value;
}
float actionVolumeIncrementValue() { return action_increment_value; }
bool isKeyPressed(int key) {
if (!board()) return false;
return board().isKeyPressed(key);
}
protected:
AudioActions actions;
bool headphoneIsConnected = false;
bool active = true;
float action_increment_value = 0.02;
int getSdCsPin() {
static GpioPin sd_cs = -2;
// execute only once
if (sd_cs != -2) return sd_cs;
auto sd_opt = getPins().getSPIPins(PinFunction::SD);
if (sd_opt) {
sd_cs = sd_opt.value().cs;
} else {
// no spi -> no sd
LOGI("No sd defined -> sd_active=false")
cfg.sd_active = false;
sd_cs = -1;
}
return sd_cs;
}
/// Determines the action logic (ActiveLow or ActiveTouch) for the pin
AudioActions::ActiveLogic getActionLogic(int pin) {
auto opt = board().getPins().getPin(pin);
PinLogic logic = PinLogic::Input;
if (opt) logic = opt.value().pin_logic;
switch (logic) {
case PinLogic::Input:
case PinLogic::InputActiveLow:
return AudioActions::ActiveLow;
case PinLogic::InputActiveHigh:
return AudioActions::ActiveHigh;
case PinLogic::InputActiveTouch:
return AudioActions::ActiveTouch;
default:
return AudioActions::ActiveLow;
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,3 @@
#pragma once
#WARNING("Obsolete: Use AudioTools/Communication/AudioClientRTSP555.h")
#include "AudioTools/Communication/AudioClientRTSP555.h"

View File

@@ -0,0 +1,138 @@
#pragma once
#include "AudioFFT.h"
#ifdef STM32
# include "CMSIS_DSP.h"
#endif
#if defined(ARDUINO_ARCH_RENESAS) || defined(ARDUINO_ARCH_RP2040)
# include "arm_vec_fft.h"
#endif
/**
* @defgroup fft-cmsis CMSIS
* @ingroup fft
* @brief FFT using CMSIS
**/
namespace audio_tools {
/**
* @brief Driver for Cmsis-FFT see https://arm-software.github.io/CMSIS_5/DSP
* @ingroup fft-cmsis
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FFTDriverCmsisFFT : public FFTDriver {
public:
bool begin(int len) override {
TRACEI();
this->len = len;
input = new float[len];
output = new float[len*2];
output_magn = new float[len];
status = arm_rfft_fast_init_f32(&fft_instance, len);
if (status!=ARM_MATH_SUCCESS){
LOGE("arm_rfft_fast_init_f32: %d", status);
}
assert(input!=nullptr);
assert(output!=nullptr);
assert(output_magn != nullptr);
return input!=nullptr && output != nullptr && output_magn != nullptr;
}
void end()override{
TRACEI();
if (input!=nullptr) delete input;
if (output!=nullptr) delete output;
if (output_magn!=nullptr) delete output_magn;
input = nullptr;
output = nullptr;
output_magn = nullptr;
}
void setValue(int idx, float value) override{
input[idx] = value;
}
void fft() override {
TRACED();
arm_rfft_fast_f32(&fft_instance, input, output, false);
arm_cmplx_mag_f32(output, output_magn, len / 2);
/* Calculates maxValue and returns corresponding BIN value */
arm_max_f32(output_magn, len / 2, &result_max_value, &result_index);
TRACED();
};
void rfft() override {
arm_rfft_fast_f32(&fft_instance, output, input, true);
}
float magnitude(int idx) override {
return output_magn[idx];
}
/// same as magnitude
float magnitudeFast(int idx) override {
return output_magn[idx];
}
float getValue(int idx) override { return input[idx];}
bool setBin(int pos, float real, float img) override {
if (pos>=len) return false;
output[pos*2] = real;
output[pos*2+1] = img;
return true;
}
bool getBin(int pos, FFTBin &bin) override {
if (pos>=len) return false;
bin.real = output[pos*2];
bin.img = output[pos*2+1];
return true;
}
bool isReverseFFT() override {return true;}
bool isValid() override{ return status==ARM_MATH_SUCCESS; }
arm_rfft_fast_instance_f32 fft_instance;
arm_status status;
int len;
float *input=nullptr;
float *output_magn=nullptr;
float *output=nullptr;
float result_max_value;
uint32_t result_index = 0;
};
/**
* @brief AudioFFT for ARM processors that provided Cmsis DSP
* @ingroup fft-cmsis
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioCmsisFFT : public AudioFFTBase {
public:
AudioCmsisFFT():AudioFFTBase(new FFTDriverCmsisFFT()) {}
/// Provides the result array returned by CMSIS FFT
float* array() {
return driverEx()->output;
}
float* magnitudes() {
return driverEx()->output_magn;
}
AudioFFTResult result() {
AudioFFTResult ret_value;
ret_value.magnitude = driverEx()->result_max_value;
ret_value.bin = driverEx()->result_index;
return ret_value;
}
FFTDriverCmsisFFT* driverEx() {
return (FFTDriverCmsisFFT*)driver();
}
};
}

View File

@@ -0,0 +1,98 @@
#pragma once
#include "AudioFFT.h"
#include "fft.h"
/**
* @defgroup fft-esp32 esp32-fft
* @ingroup fft
* @brief FFT using esp32-fft
**/
namespace audio_tools {
/**
* @brief Driver for ESP32-FFT https://github.com/pschatzmann/esp32-fft
* @ingroup fft-esp32
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FFTDriverESP32FFT : public FFTDriver {
public:
bool begin(int len) override {
this->len = len;
if (p_fft_object==nullptr) p_fft_object = fft_init(len, FFT_REAL, FFT_FORWARD, NULL, NULL);
assert(p_fft_object!=nullptr);
return p_fft_object!=nullptr;
}
void end()override{
if (p_fft_object!=nullptr) fft_destroy(p_fft_object);
}
void setValue(int idx, float value) override{
p_fft_object->input[idx] = value;
}
void fft() override{
fft_execute(p_fft_object);
};
void rfft() override {
irfft(p_fft_object->input, p_fft_object->output, p_fft_object->twiddle_factors, p_fft_object->size);
}
float magnitude(int idx) override {
return sqrt(magnitudeFast(idx));
}
/// magnitude w/o sqrt
float magnitudeFast(int idx) override {
return (pow(p_fft_object->output[2*idx],2) + pow(p_fft_object->output[2*idx+1],2));
}
float getValue(int idx) { return p_fft_object->input[idx];}
bool setBin(int pos, float real, float img) override {
if (pos>=len) return false;
p_fft_object->output[2*pos] = real;
p_fft_object->output[2*pos+1] = img;
return true;
}
bool getBin(int pos, FFTBin &bin) override {
if (pos>=len) return false;
bin.real = p_fft_object->output[2*pos];
bin.img = p_fft_object->output[2*pos+1];
return true;
}
bool isReverseFFT() override {return true;}
bool isValid() override{ return p_fft_object!=nullptr; }
fft_config_t *p_fft_object=nullptr;
int len;
};
/**
* @brief AudioFFT using https://github.com/pschatzmann/esp32-fft
* @ingroup fft-esp32
* @author Phil Schatzmann
* Warning: This does not work as expected yet: I did not get the expected results...
* @copyright GPLv3
*/
class AudioESP32FFT : public AudioFFTBase {
public:
AudioESP32FFT():AudioFFTBase(new FFTDriverESP32FFT()) {}
/// Provides the result array returned by the FFT: The real part of a magnitude at a frequency is followed by the corresponding imaginary part in the output*/
float* array() {
return driverEx()->p_fft_object->output;
}
FFTDriverESP32FFT* driverEx() {
return (FFTDriverESP32FFT*)driver();
}
};
}

View File

@@ -0,0 +1,363 @@
/**
* @file AudioEsp32ULP.h
* @author Phil Schatzmann
* @brief Outputs to ESP32 DAC through the ULP, freeing I2S for other uses
* @version 0.1
* @date 2023-03-26
* @copyright (C) 2020 Martin Laclaustra, based on bitluni's code
*
*/
#pragma once
#ifndef ESP32
#error Only the ESP32 supports ULP audio output
#endif
#include "AudioLogger.h"
#include "AudioTools/CoreAudio/AudioTypes.h"
#include "AudioTools/CoreAudio/AudioOutput.h"
#include <driver/dac.h>
#include <driver/rtc_io.h>
#include <esp32/ulp.h>
#include <math.h>
#include <soc/rtc.h>
#include "soc/rtc_io_reg.h"
namespace audio_tools {
enum UlpDac { ULP_DAC1 = 1, ULP_DAC2 = 2 };
/**
* @brief Outputs to ESP32 DAC through the ULP (Ultra> Low Power coprocessor),
* freeing I2S for other uses. Connect left channel on pin 25 Connect right
* channel on pin 26
* @ingroup io
* @version 0.1
* @date 2023-03-26
* @copyright (C) 2020 Martin Laclaustra, based on bitluni's code
*/
class AudioESP32ULP : public AudioOutput {
public:
AudioInfo defaultConfig() {
AudioInfo cfg(44100, 2, 16);
return cfg;
}
/// Selects the DAC when we have a mono signal
void setMonoDAC(UlpDac dac){
selected_mono_dac = dac;
}
/// Selects the limit for the availableForWrite to report the data
void setMinWriteBytes(int bytes){
min_write_bytes = bytes;
}
/// Starts the processing. I the output is mono, we can determine the output pin by selecting DAC1 (gpio25) or DAC2 (gpio26)
bool begin(AudioInfo info) {
TRACEI();
cfg = info;
stereoOutput = info.channels == 2;
activeDACs = stereoOutput ? 3 : selected_mono_dac;
hertz = cfg.sample_rate;
if (info.bits_per_sample != 16) {
LOGE("Unsupported bits_per_sample: %d", info.bits_per_sample);
return false;
}
return setup();
}
size_t write(const uint8_t *data, size_t len) {
TRACED();
int16_t *data_16 = (int16_t *)data;
size_t result = 0;
int16_t stereo[2];
int frameSize = cfg.channels * sizeof(int16_t);
int frames = len / frameSize;
for (int j = 0; j < frames; j++) {
int pos = j * cfg.channels;
stereo[0] = data_16[pos];
stereo[1] = stereoOutput ? data_16[pos + 1] : data_16[pos];
// blocking write
while (!writeFrame(stereo)) {
delay(20);
}
result += frameSize;
}
return result;
}
int availableForWrite() {
int result = totalSampleWords-lastFilledWord;
return result < min_write_bytes ? 0 : result;
}
void end() {
TRACEI();
const ulp_insn_t stopulp[] = {// stop the timer
I_END(),
// end the program
I_HALT()};
size_t load_addr = 0;
size_t size = sizeof(stopulp) / sizeof(ulp_insn_t);
ulp_process_macros_and_load(load_addr, stopulp, &size);
// start
ulp_run(0);
if (activeDACs & 1) {
dac_output_voltage(DAC_CHANNEL_1, 128);
}
if (activeDACs & 2) {
dac_output_voltage(DAC_CHANNEL_2, 128);
}
}
protected:
int lastFilledWord = 0;
int hertz;
int min_write_bytes = 128;
UlpDac selected_mono_dac = ULP_DAC1;
uint8_t bufferedOddSample = 128;
bool waitingOddSample = true; // must be set to false for mono output
int activeDACs = 3; // 1:DAC1; 2:DAC2; 3:both;
bool stereoOutput = true;
const int opcodeCount = 20;
const uint32_t dacTableStart1 = 2048 - 512;
const uint32_t dacTableStart2 = dacTableStart1 - 512;
uint32_t totalSampleWords =
2048 - 512 - 512 - (opcodeCount + 1); // add 512 for mono
const int totalSamples = totalSampleWords * 2;
const uint32_t indexAddress = opcodeCount;
const uint32_t bufferStart = indexAddress + 1;
bool setup() {
TRACED();
if (!stereoOutput) {
waitingOddSample = false;
// totalSampleWords += 512;
// dacTableStart2 = dacTableStart1;
}
// calculate the actual ULP clock
unsigned long rtc_8md256_period = rtc_clk_cal(RTC_CAL_8MD256, 1000);
unsigned long rtc_fast_freq_hz =
1000000ULL * (1 << RTC_CLK_CAL_FRACT) * 256 / rtc_8md256_period;
// initialize DACs
if (activeDACs & 1) {
dac_output_enable(DAC_CHANNEL_1);
dac_output_voltage(DAC_CHANNEL_1, 128);
}
if (activeDACs & 2) {
dac_output_enable(DAC_CHANNEL_2);
dac_output_voltage(DAC_CHANNEL_2, 128);
}
int retAddress1 = 9;
int retAddress2 = 14;
int loopCycles = 134;
int loopHalfCycles1 = 90;
int loopHalfCycles2 = 44;
LOGI("Real RTC clock: %d", rtc_fast_freq_hz);
uint32_t dt = (rtc_fast_freq_hz / hertz) - loopCycles;
uint32_t dt2 = 0;
if (!stereoOutput) {
dt = (rtc_fast_freq_hz / hertz) - loopHalfCycles1;
dt2 = (rtc_fast_freq_hz / hertz) - loopHalfCycles2;
}
LOGI("dt: %d", dt);
LOGI("dt2: %d", dt2);
const ulp_insn_t stereo[] = {
// reset offset register
I_MOVI(R3, 0),
// delay to get the right sampling rate
I_DELAY(dt), // 6 + dt
// reset sample index
I_MOVI(R0, 0), // 6
// write the index back to memory for the main cpu
I_ST(R0, R3, indexAddress), // 8
// load the samples
I_LD(R1, R0, bufferStart), // 8
// mask the lower 8 bits
I_ANDI(R2, R1, 0x00ff), // 6
// multiply by 2
I_LSHI(R2, R2, 1), // 6
// add start position
I_ADDI(R2, R2, dacTableStart1), // 6
// jump to the dac opcode
I_BXR(R2), // 4
// back from first dac
// delay between the two samples in mono rendering
I_DELAY(dt2), // 6 + dt2
// mask the upper 8 bits
I_ANDI(R2, R1, 0xff00), // 6
// shift the upper bits to right and multiply by 2
I_RSHI(R2, R2, 8 - 1), // 6
// add start position of second dac table
I_ADDI(R2, R2, dacTableStart2), // 6
// jump to the dac opcode
I_BXR(R2), // 4
// here we get back from writing the second sample
// load 0x8080 as sample
I_MOVI(R1, 0x8080), // 6
// write 0x8080 in the sample buffer
I_ST(R1, R0, indexAddress), // 8
// increment the sample index
I_ADDI(R0, R0, 1), // 6
// if reached end of the buffer, jump relative to index reset
I_BGE(-16, totalSampleWords), // 4
// wait to get the right sample rate (2 cycles more to compensate the
// index reset)
I_DELAY((unsigned int)dt + 2), // 8 + dt
// if not, jump absolute to where index is written to memory
I_BXI(3) // 4
};
// write io and jump back another 12 + 4 + 12 + 4
size_t load_addr = 0;
size_t size = sizeof(stereo) / sizeof(ulp_insn_t);
ulp_process_macros_and_load(load_addr, stereo, &size);
// this is how to get the opcodes
// for(int i = 0; i < size; i++)
// Serial.println(RTC_SLOW_MEM[i], HEX);
// create DAC opcode tables
switch (activeDACs) {
case 1:
for (int i = 0; i < 256; i++) {
RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
}
break;
case 2:
for (int i = 0; i < 256; i++) {
RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
RTC_IO_PAD_DAC2_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
RTC_IO_PAD_DAC2_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
}
break;
case 3:
for (int i = 0; i < 256; i++) {
RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
}
break;
}
// set all samples to 128 (silence)
for (int i = 0; i < totalSampleWords; i++)
RTC_SLOW_MEM[bufferStart + i] = 0x8080;
// start
RTC_SLOW_MEM[indexAddress] = 0;
ulp_run(0);
// wait until ULP starts using samples and the index of output sample
// advances
while (RTC_SLOW_MEM[indexAddress] == 0)
delay(1);
return true;
}
bool writeFrame(int16_t sample[2]) {
TRACED();
int16_t ms[2];
ms[0] = sample[0];
ms[1] = sample[1];
// TODO: needs improvement (counting is different here with respect to ULP
// code)
int currentSample = RTC_SLOW_MEM[indexAddress] & 0xffff;
int currentWord = currentSample >> 1;
for (int i = 0; i < 2; i++) {
ms[i] = ((ms[i] >> 8) + 128) & 0xff;
}
if (!stereoOutput) // mix both channels
ms[0] =
(uint16_t)(((uint32_t)((int32_t)(ms[0]) + (int32_t)(ms[1])) >> 1) &
0xff);
if (waitingOddSample) { // always true for stereo because samples are
// consumed in pairs
if (lastFilledWord !=
currentWord) // accept sample if writing index lastFilledWord has not
// reached index of output sample
{
unsigned int w;
if (stereoOutput) {
w = ms[0];
w |= ms[1] << 8;
} else {
w = bufferedOddSample;
w |= ms[0] << 8;
bufferedOddSample = 128;
waitingOddSample = false;
}
RTC_SLOW_MEM[bufferStart + lastFilledWord] = w;
lastFilledWord++;
if (lastFilledWord == totalSampleWords)
lastFilledWord = 0;
return true;
} else {
return false;
}
} else {
bufferedOddSample = ms[0];
waitingOddSample = true;
return true;
}
}
uint32_t create_I_WR_REG(uint32_t reg, uint32_t low_bit, uint32_t high_bit,
uint32_t val) {
typedef union {
ulp_insn_t ulp_ins;
uint32_t ulp_bin;
} ulp_union;
const ulp_insn_t singleinstruction[] = {
I_WR_REG(reg, low_bit, high_bit, val)};
ulp_union recover_ins;
recover_ins.ulp_ins = singleinstruction[0];
return (uint32_t)(recover_ins.ulp_bin);
}
uint32_t create_I_BXI(uint32_t imm_pc) {
typedef union {
ulp_insn_t ulp_ins;
uint32_t ulp_bin;
} ulp_union;
const ulp_insn_t singleinstruction[] = {I_BXI(imm_pc)};
ulp_union recover_ins;
recover_ins.ulp_ins = singleinstruction[0];
return (uint32_t)(recover_ins.ulp_bin);
}
};
}

View File

@@ -0,0 +1,100 @@
#pragma once
#include "AudioToolsConfig.h"
#include "AudioTools/CoreAudio/AudioOutput.h"
#include "AudioTools/CoreAudio/AudioStreams.h"
#include "AudioTools/CoreAudio/Buffers.h"
#include "AudioOutput.h"
#include "SoundData.h"
namespace audio_tools {
/**
* @brief ESP8266Audio AudioOutput class which stores the data in a temporary
* buffer. The buffer can be consumed e.g. by a callback function by calling
* read();
* Dependencies: ESP8266Audio Library
* Dependencies: ESP32-A2DP Library
*
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioOutputWithCallback : public ::AudioOutput, public BufferedStream {
public:
// Default constructor
AudioOutputWithCallback(int bufferSize, int bufferCount)
: BufferedStream(bufferSize) {
callback_buffer_ptr = new NBuffer<Frame>(bufferSize, bufferCount);
}
virtual ~AudioOutputWithCallback() { delete callback_buffer_ptr; }
/// Activates the output
virtual bool begin() {
active = true;
return true;
}
/// puts the sample into a buffer
virtual bool ConsumeSample(int16_t sample[2]) {
Frame c;
c.channel1 = sample[0];
c.channel2 = sample[1];
return callback_buffer_ptr->write(c);
};
/// stops the processing
virtual bool stop() {
active = false;
return true;
};
/// Provides the data from the internal buffer to the callback
size_t read(Frame *src, size_t len) {
return active ? this->callback_buffer_ptr->readArray(src, len) : 0;
}
protected:
NBuffer<Frame> *callback_buffer_ptr;
bool active;
virtual size_t writeExt(const uint8_t *data, size_t len) {
return callback_buffer_ptr->writeArray((Frame *)data, len / sizeof(Frame));
}
virtual size_t readExt(uint8_t *data, size_t len) {
return callback_buffer_ptr->readArray((Frame *)data, len / sizeof(Frame));
;
}
};
/**
* @brief Stream Adapter for ESP8288-Audio AudioOutput
*
*/
class ESP3288AudioOutput : public AudioStream {
public:
ESP3288AudioOutput(::AudioOutput &out, int channels) {
p_out = &out;
this->channels = channels;
}
virtual size_t write(const uint8_t *data, size_t len) {
size_t result = 0;
int16_t *v = (int16_t *)data;
if (channels == 2) {
result = p_out->ConsumeSamples(v, len / 2);
} else {
LOGE("Only 2 Channels are supported");
result = 0;
}
return result;
}
protected:
::AudioOutput *p_out = nullptr;
int channels;
};
} // namespace audio_tools

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,151 @@
#pragma once
#include "AudioFFT.h"
#include "esp_dsp.h"
/**
* @defgroup fft-dsp esp32-dsp
* @ingroup fft
* @brief FFT using esp32 esp-dsp library
**/
namespace audio_tools {
/**
* @brief fft Driver for espressif dsp library: https://espressif-docs.readthedocs-hosted.com/projects/esp-dsp/en/latest/esp-dsp-apis.html
* @ingroup fft-dsp
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FFTDriverEspressifFFT : public FFTDriver {
public:
bool begin(int len) override {
this->len = len;
int alloc_size = len * 2;
fft_data.resize(alloc_size);
table_buffer.resize(CONFIG_DSP_MAX_FFT_SIZE);
assert(table_buffer.data() != nullptr);
assert(fft_data.data() != nullptr);
ret = dsps_fft2r_init_fc32(table_buffer.data(), CONFIG_DSP_MAX_FFT_SIZE);
if (ret != ESP_OK){
LOGE("dsps_fft2r_init_fc32 %d", ret);
}
return fft_data.data()!=nullptr && ret == ESP_OK;
}
void end() override {
dsps_fft2r_deinit_fc32();
fft_data.resize(0);
table_buffer.resize(0);
}
void setValue(int idx, float value) override {
if (idx<len){
fft_data[idx*2] = value;
fft_data[idx*2 + 1] = 0.0f;
}
}
float getValue(int idx) override { return fft_data[idx * 2]; }
void fft() override {
ret = dsps_fft2r_fc32(fft_data.data(), len);
if (ret != ESP_OK){
LOGE("dsps_fft2r_fc32 %d", ret);
}
// Bit reverse
ret = dsps_bit_rev_fc32(fft_data.data(), len);
if (ret != ESP_OK){
LOGE("dsps_bit_rev_fc32 %d", ret);
}
// Convert one complex vector to two complex vectors
ret = dsps_cplx2reC_fc32(fft_data.data(), len);
if (ret != ESP_OK){
LOGE("dsps_cplx2reC_fc32 %d", ret);
}
};
void rfft() override {
conjugate();
ret = dsps_fft2r_fc32(fft_data.data(), len);
if (ret != ESP_OK){
LOGE("dsps_fft2r_fc32 %d", ret);
}
conjugate();
// Bit reverse
ret = dsps_bit_rev_fc32(fft_data.data(), len);
if (ret != ESP_OK){
LOGE("dsps_bit_rev_fc32 %d", ret);
}
// Convert one complex vector to two complex vectors
ret = dsps_cplx2reC_fc32(fft_data.data(), len);
if (ret != ESP_OK){
LOGE("dsps_cplx2reC_fc32 %d", ret);
}
}
void conjugate(){
FFTBin bin;
for (int j=0;j<len;j++){
getBin(j, bin);
bin.conjugate();
setBin(j, bin);
}
}
float magnitude(int idx) override {
return sqrt(magnitudeFast(idx));
}
/// magnitude w/o sqrt
float magnitudeFast(int idx) override {
return (fft_data[idx*2] * fft_data[idx*2] + fft_data[idx*2+1] * fft_data[idx*2+1]);
}
bool setBin(int pos, float real, float img) override {
if (pos>=len) return false;
fft_data[pos*2] = real;
fft_data[pos*2+1] = img;
return true;
}
bool setBin(int pos, FFTBin &bin) { return FFTDriver::setBin(pos, bin);}
bool getBin(int pos, FFTBin &bin) override {
if (pos>=len) return false;
bin.real = fft_data[pos*2];
bin.img = fft_data[pos*2+1];
return true;
}
bool isReverseFFT() override {return true;}
bool isValid() override{ return fft_data.data()!=nullptr && ret==ESP_OK; }
esp_err_t ret;
Vector<float> fft_data{0};
Vector<float> table_buffer{0};
int len=0;
};
/**
* @brief AudioFFT using FFTReal. The only specific functionality is the access to the dataArray
* @ingroup fft-dsp
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioEspressifFFT : public AudioFFTBase {
public:
AudioEspressifFFT():AudioFFTBase(new FFTDriverEspressifFFT()) {}
/// Provides the complex array returned by the FFT
float *dataArray() {
return driverEx()->fft_data.data();
}
FFTDriverEspressifFFT* driverEx() {
return (FFTDriverEspressifFFT*)driver();
}
};
}

View File

@@ -0,0 +1,793 @@
#pragma once
#include "AudioTools/AudioLibs/FFT/FFTWindows.h"
#include "AudioTools/CoreAudio/AudioStreams.h"
#include "AudioTools/CoreAudio/MusicalNotes.h"
/**
* @defgroup fft FFT
* @ingroup dsp
* @brief Fast Fourier Transform
**/
namespace audio_tools {
// forward declaration
class AudioFFTBase;
static MusicalNotes AudioFFTNotes;
/**
* @brief Result of the FFT
* @ingroup fft
*/
struct AudioFFTResult {
int bin = 0;
float magnitude = 0.0f;
float frequency = 0.0f;
int frequencyAsInt() { return round(frequency); }
const char *frequencyAsNote() { return AudioFFTNotes.note(frequency); }
const char *frequencyAsNote(float &diff) {
return AudioFFTNotes.note(frequency, diff);
}
};
/**
* @brief Configuration for AudioFFT. If there are more then 1 channel the
* channel_used is defining which channel is used to perform the fft on.
* @ingroup fft
*/
struct AudioFFTConfig : public AudioInfo {
AudioFFTConfig() {
channels = 2;
bits_per_sample = 16;
sample_rate = 44100;
}
/// Callback method which is called after we got a new result
void (*callback)(AudioFFTBase &fft) = nullptr;
/// Channel which is used as input
uint8_t channel_used = 0;
int length = 8192;
int stride = 0;
/// Optional window function for both fft and ifft
WindowFunction *window_function = nullptr;
/// Optional window function for fft only
WindowFunction *window_function_fft = nullptr;
/// Optional window function for ifft only
WindowFunction *window_function_ifft = nullptr;
/// TX_MODE = FFT, RX_MODE = IFFT
RxTxMode rxtx_mode = TX_MODE;
/// caller
void *ref = nullptr;
};
/// And individual FFT Bin
struct FFTBin {
float real;
float img;
FFTBin() = default;
FFTBin(float r, float i) {
real = r;
img = i;
}
void multiply(float f) {
real *= f;
img *= f;
}
void conjugate() { img = -img; }
void clear() { real = img = 0.0f; }
};
/// Inverse FFT Overlapp Add
class FFTInverseOverlapAdder {
public:
FFTInverseOverlapAdder(int size = 0) {
if (size > 0) resize(size);
}
/// Initilze data by defining new size
void resize(int size) {
// reset max for new scaling
rfft_max = 0.0;
// define new size
len = size;
data.resize(size);
for (int j = 0; j < data.size(); j++) {
data[j] = 0.0;
}
}
// adds the values to the array (by applying the window function)
void add(float value, int pos, WindowFunction *window_function) {
float add_value = value;
if (window_function != nullptr) {
add_value = value * window_function->factor(pos);
}
assert(pos < len);
data[pos] += add_value;
}
// gets the scaled audio data as result
void getStepData(float *result, int stride, float maxResult) {
for (int j = 0; j < stride; j++) {
// determine max value to scale
if (data[j] > rfft_max) rfft_max = data[j];
}
for (int j = 0; j < stride; j++) {
result[j] = data[j] / rfft_max * maxResult;
// clip
if (result[j] > maxResult) {
result[j] = maxResult;
}
if (result[j] < -maxResult) {
result[j] = -maxResult;
}
}
// copy data to head
for (int j = 0; j < len - stride; j++) {
data[j] = data[j + stride];
}
// clear tail
for (int j = len - stride; j < len; j++) {
data[j] = 0.0;
}
}
/// provides the actual size
int size() { return data.size(); }
protected:
Vector<float> data{0};
int len = 0;
float rfft_max = 0;
};
/**
* @brief Abstract Class which defines the basic FFT functionality
* @ingroup fft
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FFTDriver {
public:
virtual bool begin(int len) = 0;
virtual void end() = 0;
/// Sets the real value
virtual void setValue(int pos, float value) = 0;
/// Perform FFT
virtual void fft() = 0;
/// Calculate the magnitude (fft result) at index (sqr(i² + r²))
virtual float magnitude(int idx) = 0;
/// Calculate the magnitude w/o sqare root
virtual float magnitudeFast(int idx) = 0;
virtual bool isValid() = 0;
/// Returns true if reverse FFT is supported
virtual bool isReverseFFT() { return false; }
/// Calculate reverse FFT
virtual void rfft() { LOGE("Not implemented"); }
/// Get result value from Reverse FFT
virtual float getValue(int pos) = 0;
/// sets the value of a bin
virtual bool setBin(int idx, float real, float img) { return false; }
/// sets the value of a bin
bool setBin(int pos, FFTBin &bin) { return setBin(pos, bin.real, bin.img); }
/// gets the value of a bin
virtual bool getBin(int pos, FFTBin &bin) { return false; }
};
/**
* @brief Executes FFT using audio data privded by write() and/or an inverse FFT
* where the samples are made available via readBytes(). The Driver which is
* passed in the constructor selects a specifc FFT implementation.
* @ingroup fft
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioFFTBase : public AudioStream {
public:
/// Default Constructor. The len needs to be of the power of 2 (e.g. 512,
/// 1024, 2048, 4096, 8192)
AudioFFTBase(FFTDriver *driver) { p_driver = driver; }
~AudioFFTBase() { end(); }
/// Provides the default configuration
AudioFFTConfig defaultConfig(RxTxMode mode = TX_MODE) {
AudioFFTConfig info;
info.rxtx_mode = mode;
return info;
}
/// starts the processing
bool begin(AudioFFTConfig info) {
cfg = info;
return begin();
}
/// starts the processing
bool begin() override {
bins = cfg.length / 2;
// define window functions
if (cfg.window_function_fft == nullptr)
cfg.window_function_fft = cfg.window_function;
if (cfg.window_function_ifft == nullptr)
cfg.window_function_ifft = cfg.window_function;
// define default stride value if not defined
if (cfg.stride == 0) cfg.stride = cfg.length;
if (!isPowerOfTwo(cfg.length)) {
LOGE("Len must be of the power of 2: %d", cfg.length);
return false;
}
if (!p_driver->begin(cfg.length)) {
LOGE("Not enough memory");
}
if (cfg.window_function_fft != nullptr) {
cfg.window_function_fft->begin(cfg.length);
}
if (cfg.window_function_ifft != nullptr &&
cfg.window_function_ifft != cfg.window_function_fft) {
cfg.window_function_ifft->begin(cfg.length);
}
bool is_valid_rxtx = false;
if (cfg.rxtx_mode == TX_MODE || cfg.rxtx_mode == RXTX_MODE) {
// holds last N bytes that need to be reprocessed
stride_buffer.resize((cfg.length) * bytesPerSample());
is_valid_rxtx = true;
}
if (cfg.rxtx_mode == RX_MODE || cfg.rxtx_mode == RXTX_MODE) {
rfft_data.resize(cfg.channels * bytesPerSample() * cfg.stride);
rfft_add.resize(cfg.length);
step_data.resize(cfg.stride);
is_valid_rxtx = true;
}
if (!is_valid_rxtx) {
LOGE("Invalid rxtx_mode");
return false;
}
current_pos = 0;
return p_driver->isValid();
}
/// Just resets the current_pos e.g. to start a new cycle
void reset() {
current_pos = 0;
if (cfg.window_function_fft != nullptr) {
cfg.window_function_fft->begin(cfg.length);
}
if (cfg.window_function_ifft != nullptr) {
cfg.window_function_ifft->begin(cfg.length);
}
}
operator bool() override {
return p_driver != nullptr && p_driver->isValid();
}
/// Notify change of audio information
void setAudioInfo(AudioInfo info) override {
cfg.bits_per_sample = info.bits_per_sample;
cfg.sample_rate = info.sample_rate;
cfg.channels = info.channels;
begin(cfg);
}
/// Release the allocated memory
void end() override {
p_driver->end();
l_magnitudes.resize(0);
rfft_data.resize(0);
rfft_add.resize(0);
step_data.resize(0);
}
/// Provide the audio data as FFT input
size_t write(const uint8_t *data, size_t len) override {
size_t result = 0;
if (p_driver->isValid()) {
result = len;
switch (cfg.bits_per_sample) {
case 8:
processSamples<int8_t>(data, len);
break;
case 16:
processSamples<int16_t>(data, len / 2);
break;
case 24:
processSamples<int24_t>(data, len / 3);
break;
case 32:
processSamples<int32_t>(data, len / 4);
break;
default:
LOGE("Unsupported bits_per_sample: %d", cfg.bits_per_sample);
break;
}
}
return result;
}
/// Provides the result of a reverse FFT
size_t readBytes(uint8_t *data, size_t len) override {
TRACED();
if (rfft_data.size() == 0) return 0;
// get data via callback if there is no more data
if (cfg.rxtx_mode == RX_MODE && cfg.callback != nullptr &&
rfft_data.available() == 0) {
cfg.callback(*this);
}
// execute rfft when we consumed all data
if (has_rfft_data && rfft_data.available() == 0) {
rfft();
}
return rfft_data.readArray(data, len);
}
/// We try to fill the buffer at once
int availableForWrite() override {
return cfg.length * cfg.channels * bytesPerSample();
}
/// Data available for reverse fft
int available() override {
assert(cfg.stride != 0);
return cfg.stride * cfg.channels * bytesPerSample();
}
/// The number of bins used by the FFT which are relevant for the result
int size() { return bins; }
/// The number of samples
int length() { return cfg.length; }
/// time after the fft: time when the last result was provided - you can poll
/// this to check if we have a new result
unsigned long resultTime() { return timestamp; }
/// time before the fft
unsigned long resultTimeBegin() { return timestamp_begin; }
/// Determines the result values in the max magnitude bin
AudioFFTResult result() {
AudioFFTResult ret_value;
ret_value.magnitude = 0.0f;
ret_value.bin = 0;
// find max value and index
for (int j = 0; j < size(); j++) {
float m = magnitude(j);
if (m > ret_value.magnitude) {
ret_value.magnitude = m;
ret_value.bin = j;
}
}
ret_value.frequency = frequency(ret_value.bin);
return ret_value;
}
/// Determines the N biggest result values
template <int N>
void resultArray(AudioFFTResult (&result)[N]) {
// initialize to negative value
for (int j = 0; j < N; j++) {
result[j].magnitude = -1000000;
}
// find top n values
AudioFFTResult act;
for (int j = 0; j < size(); j++) {
act.magnitude = magnitude(j);
act.bin = j;
act.frequency = frequency(j);
insertSorted<N>(result, act);
}
}
/// Convert the FFT result to MEL spectrum
float *toMEL(int n_bins, float min_freq = 0.0f, float max_freq = 0.0f) {
// calculate mel bins
if (n_bins <= 0) n_bins = size();
if (min_freq <= 0.0f) min_freq = frequency(0);
if (max_freq <= 0.0f) max_freq = frequency(size() - 1);
mel_bins.resize(n_bins);
// Convert min and max frequencies to MEL scale
float min_mel = 2595.0f * log10(1.0f + (min_freq / 700.0f));
float max_mel = 2595.0f * log10(1.0f + (max_freq / 700.0f));
// Create equally spaced points in the MEL scale
Vector<float> mel_points;
mel_points.resize(n_bins + 2); // +2 for the endpoints
float mel_step = (max_mel - min_mel) / (n_bins + 1);
for (int i = 0; i < n_bins + 2; i++) {
mel_points[i] = min_mel + i * mel_step;
}
// Convert MEL points back to frequency
Vector<float> freq_points;
freq_points.resize(n_bins + 2);
for (int i = 0; i < n_bins + 2; i++) {
freq_points[i] = 700.0f * (pow(10.0f, mel_points[i] / 2595.0f) - 1.0f);
}
// Convert frequency points to FFT bin indices
Vector<int> bin_indices;
bin_indices.resize(n_bins + 2);
for (int i = 0; i < n_bins + 2; i++) {
bin_indices[i] = round(freq_points[i] * cfg.length / cfg.sample_rate);
// Ensure bin index is within valid range
if (bin_indices[i] >= bins) bin_indices[i] = bins - 1;
if (bin_indices[i] < 0) bin_indices[i] = 0;
}
// Create and apply triangular filters
for (int i = 0; i < n_bins; i++) {
float mel_sum = 0.0f;
int start_bin = bin_indices[i];
int mid_bin = bin_indices[i + 1];
int end_bin = bin_indices[i + 2];
// Apply first half of triangle filter (ascending)
for (int j = start_bin; j < mid_bin; j++) {
if (j >= bins) break;
float weight = (j - start_bin) / float(mid_bin - start_bin);
mel_sum += magnitude(j) * weight;
}
// Apply second half of triangle filter (descending)
for (int j = mid_bin; j < end_bin; j++) {
if (j >= bins) break;
float weight = (end_bin - j) / float(end_bin - mid_bin);
mel_sum += magnitude(j) * weight;
}
mel_bins[i] = mel_sum;
}
return mel_bins.data();
}
/**
* @brief Convert MEL spectrum back to linear frequency spectrum
*
* @param values Pointer to MEL spectrum values
* @param n_bins Number of MEL bins
* @return bool Success status
*/
bool fromMEL(float *values, int n_bins, float min_freq = 0.0f,
float max_freq = 0.0f) {
if (n_bins <= 0 || values == nullptr) return false;
// Use default frequency range if not specified
if (min_freq <= 0.0f) min_freq = frequency(0);
if (max_freq <= 0.0f) max_freq = frequency(size() - 1);
// Clear the current magnitude array
for (int i = 0; i < bins; i++) {
FFTBin bin;
bin.clear();
setBin(i, bin);
}
// Convert min and max frequencies to MEL scale
float min_mel = 2595.0f * log10(1.0f + (min_freq / 700.0f));
float max_mel = 2595.0f * log10(1.0f + (max_freq / 700.0f));
// Create equally spaced points in the MEL scale
Vector<float> mel_points;
mel_points.resize(n_bins + 2); // +2 for the endpoints
float mel_step = (max_mel - min_mel) / (n_bins + 1);
for (int i = 0; i < n_bins + 2; i++) {
mel_points[i] = min_mel + i * mel_step;
}
// Convert MEL points back to frequency
Vector<float> freq_points;
freq_points.resize(n_bins + 2);
for (int i = 0; i < n_bins + 2; i++) {
freq_points[i] = 700.0f * (pow(10.0f, mel_points[i] / 2595.0f) - 1.0f);
}
// Convert frequency points to FFT bin indices
Vector<int> bin_indices;
bin_indices.resize(n_bins + 2);
for (int i = 0; i < n_bins + 2; i++) {
bin_indices[i] = round(freq_points[i] * cfg.length / cfg.sample_rate);
// Ensure bin index is within valid range
if (bin_indices[i] >= bins) bin_indices[i] = bins - 1;
if (bin_indices[i] < 0) bin_indices[i] = 0;
}
// Distribute MEL energy back to linear frequency bins
Vector<float> linear_magnitudes;
linear_magnitudes.resize(bins);
for (int i = 0; i < n_bins; i++) {
int start_bin = bin_indices[i];
int mid_bin = bin_indices[i + 1];
int end_bin = bin_indices[i + 2];
// Apply first half of triangle (ascending)
for (int j = start_bin; j < mid_bin; j++) {
if (j >= bins) break;
float weight = (j - start_bin) / float(mid_bin - start_bin);
linear_magnitudes[j] += values[i] * weight;
}
// Apply second half of triangle (descending)
for (int j = mid_bin; j < end_bin; j++) {
if (j >= bins) break;
float weight = (end_bin - j) / float(end_bin - mid_bin);
linear_magnitudes[j] += values[i] * weight;
}
}
// Set magnitude values and create simple phase (all zeros)
for (int i = 0; i < bins; i++) {
if (linear_magnitudes[i] > 0) {
FFTBin bin;
bin.real = linear_magnitudes[i];
bin.img = 0.0f;
setBin(i, bin);
}
}
return true;
}
/// provides access to the FFTDriver which implements the basic FFT
/// functionality
FFTDriver *driver() { return p_driver; }
/// Determines the frequency of the indicated bin
float frequency(int bin) {
if (bin >= bins) {
LOGE("Invalid bin %d", bin);
return 0;
}
return static_cast<float>(bin) * cfg.sample_rate / cfg.length;
}
/// Determine the bin number from the frequency
int frequencyToBin(int freq) {
int max_freq = cfg.sample_rate / 2;
return map(freq, 0, max_freq, 0, size());
}
/// Calculates the magnitude of the fft result to determine the max value (bin
/// is 0 to size())
float magnitude(int bin) {
if (bin >= bins) {
LOGE("Invalid bin %d", bin);
return 0;
}
return p_driver->magnitude(bin);
}
float magnitudeFast(int bin) {
if (bin >= bins) {
LOGE("Invalid bin %d", bin);
return 0;
}
return p_driver->magnitudeFast(bin);
}
/// calculates the phase
float phase(int bin) {
FFTBin fft_bin;
getBin(bin, fft_bin);
return atan2(fft_bin.img, fft_bin.real);
}
/// Provides the magnitudes as array of size size(). Please note that this
/// method is allocating additinal memory!
float *magnitudes() {
if (l_magnitudes.size() == 0) {
l_magnitudes.resize(size());
}
for (int j = 0; j < size(); j++) {
l_magnitudes[j] = magnitude(j);
}
return l_magnitudes.data();
}
/// Provides the magnitudes w/o calling the square root function as array of
/// size size(). Please note that this method is allocating additinal memory!
float *magnitudesFast() {
if (l_magnitudes.size() == 0) {
l_magnitudes.resize(size());
}
for (int j = 0; j < size(); j++) {
l_magnitudes[j] = magnitudeFast(j);
}
return l_magnitudes.data();
}
/// sets the value of a bin
bool setBin(int idx, float real, float img) {
has_rfft_data = true;
if (idx < 0 || idx >= size()) return false;
bool rc_first_half = p_driver->setBin(idx, real, img);
bool rc_2nd_half = p_driver->setBin(cfg.length - idx, real, img);
return rc_first_half && rc_2nd_half;
}
/// sets the value of a bin
bool setBin(int pos, FFTBin &bin) { return setBin(pos, bin.real, bin.img); }
/// gets the value of a bin
bool getBin(int pos, FFTBin &bin) { return p_driver->getBin(pos, bin); }
/// clears the fft data
void clearBins() {
FFTBin empty{0, 0};
for (int j = 0; j < size(); j++) {
setBin(j, empty);
}
}
/// Provides the actual configuration
AudioFFTConfig &config() { return cfg; }
protected:
FFTDriver *p_driver = nullptr;
int current_pos = 0;
int bins = 0;
unsigned long timestamp_begin = 0l;
unsigned long timestamp = 0l;
AudioFFTConfig cfg;
FFTInverseOverlapAdder rfft_add{0};
Vector<float> l_magnitudes{0};
Vector<float> step_data{0};
Vector<float> mel_bins{0};
SingleBuffer<uint8_t> stride_buffer{0};
RingBuffer<uint8_t> rfft_data{0};
bool has_rfft_data = false;
// Add samples to input data p_x - and process them if full
template <typename T>
void processSamples(const void *data, size_t count) {
T *dataT = (T *)data;
T sample;
for (int j = 0; j < count; j += cfg.channels) {
sample = dataT[j + cfg.channel_used];
if (writeStrideBuffer((uint8_t *)&sample, sizeof(T))) {
// process data if buffer is full
T *samples = (T *)stride_buffer.data();
int sample_count = stride_buffer.size() / sizeof(T);
assert(sample_count == cfg.length);
for (int j = 0; j < sample_count; j++) {
T out_sample = samples[j];
T windowed_sample = windowedSample(out_sample, j);
float scaled_sample =
1.0f / NumberConverter::maxValueT<T>() * windowed_sample;
p_driver->setValue(j, scaled_sample);
}
fft<T>();
// remove stride samples
stride_buffer.clearArray(cfg.stride * sizeof(T));
// validate available data in stride buffer
if (cfg.stride == cfg.length) assert(stride_buffer.available() == 0);
}
}
}
template <typename T>
T windowedSample(T sample, int pos) {
T result = sample;
if (cfg.window_function_fft != nullptr) {
result = cfg.window_function_fft->factor(pos) * sample;
}
return result;
}
template <typename T>
void fft() {
timestamp_begin = millis();
p_driver->fft();
has_rfft_data = true;
timestamp = millis();
if (cfg.callback != nullptr) {
cfg.callback(*this);
}
}
/// reverse fft
void rfft() {
TRACED();
// execute reverse fft
p_driver->rfft();
has_rfft_data = false;
// add data to sum buffer
for (int j = 0; j < cfg.length; j++) {
float value = p_driver->getValue(j);
rfft_add.add(value, j, cfg.window_function_ifft);
}
// get result data from sum buffer
rfftWriteData(rfft_data);
}
/// write reverse fft result to buffer to make it available for readBytes
void rfftWriteData(BaseBuffer<uint8_t> &data) {
// get data to result buffer
// for (int j = 0; j < cfg.stride; j++) {
// step_data[j] = 0.0;
// }
rfft_add.getStepData(step_data.data(), cfg.stride,
NumberConverter::maxValue(cfg.bits_per_sample));
switch (cfg.bits_per_sample) {
case 8:
writeIFFT<int8_t>(step_data.data(), cfg.stride);
break;
case 16:
writeIFFT<int16_t>(step_data.data(), cfg.stride);
break;
case 24:
writeIFFT<int24_t>(step_data.data(), cfg.stride);
break;
case 32:
writeIFFT<int32_t>(step_data.data(), cfg.stride);
break;
default:
LOGE("Unsupported bits: %d", cfg.bits_per_sample);
}
}
template <typename T>
void writeIFFT(float *data, int len) {
for (int j = 0; j < len; j++) {
T sample = data[j];
T out_data[cfg.channels];
for (int ch = 0; ch < cfg.channels; ch++) {
out_data[ch] = sample;
}
int result = rfft_data.writeArray((uint8_t *)out_data, sizeof(out_data));
assert(result == sizeof(out_data));
}
}
inline int bytesPerSample() { return cfg.bits_per_sample / 8; }
/// make sure that we do not reuse already found results
template <int N>
void insertSorted(AudioFFTResult (&result)[N], AudioFFTResult tmp) {
// find place where we need to insert new record
for (int j = 0; j < N; j++) {
// insert when biggen then current record
if (tmp.magnitude > result[j].magnitude) {
// shift existing values right
for (int i = N - 2; i >= j; i--) {
result[i + 1] = result[i];
}
// insert new value
result[j] = tmp;
// stop after we found the correct index
break;
}
}
}
// adds samples to stride buffer, returns true if the buffer is full
bool writeStrideBuffer(uint8_t *buffer, size_t len) {
assert(stride_buffer.availableForWrite() >= len);
stride_buffer.writeArray(buffer, len);
return stride_buffer.isFull();
}
bool isPowerOfTwo(uint16_t x) { return (x & (x - 1)) == 0; }
};
} // namespace audio_tools

View File

@@ -0,0 +1,391 @@
#pragma once
#include "AudioTools/CoreAudio/AudioStreams.h"
#include "AudioTools/AudioLibs/AudioFaustDSP.h"
namespace audio_tools {
/**
* @brief Integration into Faust DSP see https://faust.grame.fr/
* To generate code from faust, select src and cpp
* @ingroup dsp
* @author Phil Schatzmann
* @copyright GPLv3
*/
template<class DSP>
class FaustStream : public AudioStream {
public:
/// Constructor for Faust as Audio Source
FaustStream(bool useSeparateOutputBuffer=true) {
with_output_buffer = useSeparateOutputBuffer;
}
/// Constructor for Faust as signal Processor - changing an input signal and sending it to out
FaustStream(Print &out, bool useSeparateOutputBuffer=true){
p_out = &out;
with_output_buffer = useSeparateOutputBuffer;
}
~FaustStream(){
end();
deleteFloatBuffer();
delete p_dsp;
#ifdef USE_MEMORY_MANAGER
DSP::classDestroy();
#endif
}
/// Provides a pointer to the actual dsp object
dsp *getDSP(){
return p_dsp;
}
AudioInfo defaultConfig() {
AudioInfo def;
def.channels = 2;
def.bits_per_sample = 16;
def.sample_rate = 44100;
return def;
}
/// Checks the parameters and starts the processing
bool begin(AudioInfo cfg){
TRACED();
bool result = true;
this->cfg = cfg;
this->bytes_per_sample = cfg.bits_per_sample / 8;
this->bytes_per_frame = bytes_per_sample * cfg.channels;
this->float_to_int_factor = NumberConverter::maxValue(cfg.bits_per_sample);
if (p_dsp==nullptr){
#ifdef USE_MEMORY_MANAGER
DSP::fManager = new dsp_memory_manager();
DSP::memoryInfo();
p_dsp = DSP::create();
#else
p_dsp = new DSP();
#endif
}
if (p_dsp==nullptr){
LOGE("dsp is null");
return false;
}
DSP::classInit(cfg.sample_rate);
p_dsp->buildUserInterface(&ui);
p_dsp->init(cfg.sample_rate);
p_dsp->instanceInit(cfg.sample_rate);
// we do expect an output
result = checkChannels();
// allocate array of channel data
if (p_buffer==nullptr){
p_buffer = new FAUSTFLOAT*[cfg.channels]();
}
if (with_output_buffer && p_buffer_out==nullptr){
p_buffer_out = new FAUSTFLOAT*[cfg.channels]();
}
LOGI("is_read: %s", is_read?"true":"false");
LOGI("is_write: %s", is_write?"true":"false");
gate_exists = ui.exists("gate");
LOGI("gate_exists: %s", gate_exists?"true":"false");
return result;
}
/// Ends the processing
void end() {
TRACED();
is_read = false;
is_write = false;
p_dsp->instanceClear();
#ifdef USE_MEMORY_MANAGER
DSP::destroy(p_dsp);
p_dsp = nullptr;
#endif
}
/// Used if FaustStream is used as audio source
size_t readBytes(uint8_t *data, size_t len) override {
size_t result = 0;
if (is_read){
TRACED();
result = len;
int samples = len / bytes_per_sample;
allocateFloatBuffer(samples, false);
p_dsp->compute(samples, nullptr, p_buffer);
// convert from float to int
switch(cfg.bits_per_sample){
case 8:
convertFloatBufferToInt<int8_t>(samples, p_buffer, data);
break;
case 16:
convertFloatBufferToInt<int16_t>(samples, p_buffer, data);
break;
case 24:
convertFloatBufferToInt<int24_t>(samples, p_buffer, data);
break;
case 32:
convertFloatBufferToInt<int32_t>(samples, p_buffer, data);
break;
default:
TRACEE();
}
}
return result;
}
/// Used if FaustStream is used as audio sink or filter
size_t write(const uint8_t *data, size_t len) override {
LOGD("FaustStream::write: %d", len);
switch(cfg.bits_per_sample){
case 8:
return writeT<int8_t>(data, len);
case 16:
return writeT<int16_t>(data, len);
case 24:
return writeT<int24_t>(data, len);
case 32:
return writeT<int32_t>(data, len);
default:
TRACEE();
}
return 0;
}
int available() override {
return DEFAULT_BUFFER_SIZE;
}
int availableForWrite() override {
return DEFAULT_BUFFER_SIZE / bytes_per_frame; // we limit the write size
}
/// Determines the value of a parameter
virtual FAUSTFLOAT labelValue(const char*label) {
return ui.getValue(label);
}
/// Defines the value of a parameter
virtual bool setLabelValue(const char*label, FAUSTFLOAT value){
if (!is_read && !is_write) LOGE("setLabelValue must be called after begin");
bool result = ui.setValue(label, value);
LOGI("setLabelValue('%s',%f) -> %s", label, value, result?"true":"false");
return result;
}
virtual bool setMidiNote(int note){
FAUSTFLOAT frq = noteToFrequency(note);
return setFrequency(frq);
}
virtual bool setFrequency(FAUSTFLOAT freq){
return setLabelValue("freq", freq);
}
virtual FAUSTFLOAT frequency() {
return labelValue("freq");
}
virtual bool setBend(FAUSTFLOAT bend){
return setLabelValue("bend", bend);
}
virtual FAUSTFLOAT bend() {
return labelValue("bend");
}
virtual bool setGain(FAUSTFLOAT gain){
return setLabelValue("gain", gain);
}
virtual FAUSTFLOAT gain() {
return labelValue("gain");
}
virtual bool midiOn(int note, FAUSTFLOAT gain){
if (gate_exists) setLabelValue("gate",1.0);
return setMidiNote(note) && setGain(gain);
}
virtual bool midiOff(int note){
if (gate_exists) setLabelValue("gate",0.0);
return setMidiNote(note) && setGain(0.0);
}
protected:
bool is_init = false;
bool is_read = false;
bool is_write = false;
bool gate_exists = false;
bool with_output_buffer;
int bytes_per_sample;
int bytes_per_frame;
int buffer_allocated;
float float_to_int_factor = 32767;
DSP *p_dsp = nullptr;
AudioInfo cfg;
Print *p_out=nullptr;
FAUSTFLOAT** p_buffer=nullptr;
FAUSTFLOAT** p_buffer_out=nullptr;
UI ui;
/// Checks the input and output channels and updates the is_write or is_read scenario flags
bool checkChannels() {
bool result = true;
// update channels
int num_outputs = p_dsp->getNumOutputs();
if (cfg.channels!=num_outputs){
cfg.channels = num_outputs;
LOGW("Updating channels to %d", num_outputs);
}
if (num_outputs>0){
if (num_outputs==cfg.channels){
is_read = true;
} else {
LOGE("NumOutputs %d is not matching with number of channels %d", num_outputs, cfg.channels);
result = false;
}
if (p_dsp->getNumInputs()!=0 && p_dsp->getNumInputs()!=cfg.channels){
LOGE("NumInputs is not matching with number of channels");
result = false;
}
if (p_dsp->getNumInputs()>0){
if (p_out!=nullptr){
is_write = true;
} else {
LOGE("Faust expects input - you need to provide and AudioStream in the constructor");
result = false;
}
}
}
return result;
}
/// Converts the float buffer to int values
template <class T>
void convertFloatBufferToInt(int samples, FAUSTFLOAT**p_float_in, void *data_out){
T *dataT = (T*) data_out;
int frameCount = samples/cfg.channels;
for (int j=0; j<frameCount; j++){
for (int i=0;i<cfg.channels;i++){
float sample = p_float_in[i][j];
// clip input
if(sample > 1.0f){
sample = 1.0f;
}
if(sample < -1.0f){
sample = -1.0f;
}
dataT[(j*cfg.channels)+i] = sample * float_to_int_factor;
}
}
}
/// Converts the int buffer to float values
template <class T>
void convertIntBufferToFloat(int samples, void *data_in, FAUSTFLOAT**p_float_out ){
T *dataT = (T*) data_in;
int frameCount = samples/cfg.channels;
for(int j=0;j<frameCount;j++){
for(int i=0;i<cfg.channels;i++){
p_float_out[i][j] = static_cast<FAUSTFLOAT>(dataT[(j*cfg.channels)+i]) / float_to_int_factor;
}
}
}
/// Used if FaustStream is used as audio sink or filter
template <class T>
size_t writeT(const uint8_t *write_data, size_t len) {
size_t result = 0;
if (is_write){
TRACED();
int samples = len / bytes_per_sample;
int frames = samples / cfg.channels;
// prepare float input for faust
allocateFloatBuffer(samples, with_output_buffer);
convertIntBufferToFloat<T>(samples, (void*) write_data, p_buffer);
// determine result
FAUSTFLOAT** p_float_buffer = with_output_buffer ? p_buffer_out : p_buffer;
p_dsp->compute(frames, p_buffer, p_float_buffer);
// update buffer with data from faust
convertFloatBufferToInt<T>(samples, p_float_buffer, (void*) write_data);
// write data to final output
result = p_out->write(write_data, len);
}
return result;
}
/// Allocate the buffer that is needed by faust
void allocateFloatBuffer(int samples, bool allocate_out){
if (samples>buffer_allocated){
if (p_buffer[0]!=nullptr){
for (int j=0;j<cfg.channels;j++){
delete[]p_buffer[j];
p_buffer[j] = nullptr;
}
}
if (p_buffer_out!=nullptr && p_buffer_out[0]!=nullptr){
for (int j=0;j<cfg.channels;j++){
delete[]p_buffer_out[j];
p_buffer_out[j] = nullptr;
}
}
}
if (p_buffer[0]==nullptr){
const int ch = cfg.channels;
for (int j=0;j<ch;j++){
p_buffer[j] = new FAUSTFLOAT[samples];
}
buffer_allocated = samples;
}
if (allocate_out){
if (p_buffer_out[0]==nullptr){
const int ch = cfg.channels;
for (int j=0;j<ch;j++){
p_buffer_out[j] = new FAUSTFLOAT[samples];
}
}
}
}
void deleteFloatBuffer() {
if (p_buffer!=nullptr) {
for (int j=0;j<cfg.channels;j++){
if (p_buffer[j]!=nullptr) delete p_buffer[j];
}
delete[] p_buffer;
p_buffer = nullptr;
}
if (p_buffer_out!=nullptr) {
for (int j=0;j<cfg.channels;j++){
if (p_buffer_out[j]!=nullptr) delete p_buffer_out[j];
}
delete[] p_buffer_out;
p_buffer_out = nullptr;
}
}
FAUSTFLOAT noteToFrequency(uint8_t x) {
FAUSTFLOAT note = x;
return 440.0 * pow(2.0f, (note-69)/12);
}
};
} // namespace

View File

@@ -0,0 +1,255 @@
#pragma once
#include "AudioToolsConfig.h"
#include "AudioTools/CoreAudio/AudioBasic/Collections.h"
#include "AudioTools/CoreAudio/AudioBasic/Float16.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
#ifndef FAUSTFLOAT
#define FAUSTFLOAT float
#endif
#ifndef PSRAM_LIMIT
#define PSRAM_LIMIT 1024
#endif
// forward declarations
class UI;
/**
* @brief minimal dsp base class needed by Faust
* @author Phil Schatzmann
* @copyright GPLv3
*/
class dsp {
public:
virtual void init(int sample_rate) = 0;
virtual void compute(int count, FAUSTFLOAT** inputs, FAUSTFLOAT** outputs) = 0;
virtual void instanceClear() = 0;
virtual int getNumInputs() = 0;
virtual int getNumOutputs() = 0;
virtual void buildUserInterface(UI* ui_interface) = 0;
};
/**
* @brief minimial implementtion of Meta which just ignores the data
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Meta {
public:
void declare(const char*, const char*){}
};
typedef void Soundfile;
/**
* @brief Minimum implementation of UI parameters. We only support the setting and getting of values
* @author Phil Schatzmann
* @copyright GPLv3
*/
class UI {
struct Entry {
const char* label=nullptr;
FAUSTFLOAT* zone=nullptr;
bool withLimits;
FAUSTFLOAT min;
FAUSTFLOAT max;
};
public:
// set and get values
virtual FAUSTFLOAT getValue(const char*label) {
Entry *e = findEntry(label);
if (e==nullptr){
LOGE("Label '%s' not found", label);
}
return e!=nullptr ? *(e->zone) :(FAUSTFLOAT) 0.0;
}
virtual bool setValue(const char* label, FAUSTFLOAT value){
bool result = false;
Entry* e = findEntry(label);
if (e!=nullptr){
if (e->withLimits){
if (value>=e->min && value<=e->max){
*(e->zone) = value;
result = true;
} else {
LOGE("Value '%s' outsde limits %f (%f-%f)", e->label, value, e->min, e->max);
}
} else {
*(e->zone) = value;
result = true;
}
} else {
LOGE("Label '%s' not found", label);
}
return result;
}
// -- widget's layouts
virtual void openTabBox(const char* label) {}
virtual void openHorizontalBox(const char* label) {}
virtual void openVerticalBox(const char* label) {}
virtual void closeBox() {}
// -- active widgets
virtual void addButton(const char* label, FAUSTFLOAT* zone) {
addEntry(label, zone);
}
virtual void addCheckButton(const char* label, FAUSTFLOAT* zone) {
addEntry(label, zone);
}
virtual void addVerticalSlider(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT init, FAUSTFLOAT min, FAUSTFLOAT max, FAUSTFLOAT step) {
addEntry(label, zone, true, min, max);
*zone = init;
}
virtual void addHorizontalSlider(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT init, FAUSTFLOAT min, FAUSTFLOAT max, FAUSTFLOAT step) {
addEntry(label, zone, true, min, max);
*zone = init;
}
virtual void addNumEntry(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT init, FAUSTFLOAT min, FAUSTFLOAT max, FAUSTFLOAT step) {
addEntry(label, zone, true, min, max);
*zone = init;
}
// -- passive widgets
virtual void addHorizontalBargraph(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT min, FAUSTFLOAT max) {}
virtual void addVerticalBargraph(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT min, FAUSTFLOAT max) {}
// -- soundfiles
virtual void addSoundfile(const char* label, const char* filename, Soundfile** sf_zone) {}
// -- metadata declarations
virtual void declare(FAUSTFLOAT* zone, const char* key, const char* val) {}
/// checks if a label exists
virtual bool exists(const char*label){
return findEntry(label)!=nullptr;
}
/// Returns the number of label entries
virtual size_t size() {
return entries.size();
}
/// Returns the label at the indicated position. nullptr is returned if the index is too big
const char* label(int idx){
if (idx<size()){
return entries[idx].label;
}
return nullptr;
}
protected:
audio_tools::Vector<Entry> entries;
Entry *findEntry(const char* name){
StrView nameStr(name);
for (int j=0; j<entries.size();j++){
if (nameStr.equals(entries[j].label)){
return &entries[j];
}
}
return nullptr;
}
void addEntry(const char*label,FAUSTFLOAT* zone, bool withLimits=false, FAUSTFLOAT min=0, FAUSTFLOAT max=0){
Entry e;
e.label = label;
e.zone = zone;
e.withLimits = withLimits;
if (withLimits){
e.min = min;
e.max = max;
LOGI("Label: %s value: %f range: %f - %f", label, *zone, min, max);
} else {
LOGI("Label: %s value: %f", label, *zone);
}
entries.push_back(e);
}
};
/**
* @brief Memory manager which uses psram when it is available
*
*/
class dsp_memory_manager {
public:
virtual ~dsp_memory_manager() {}
/**
* Inform the Memory Manager with the number of expected memory zones.
* @param count - the number of memory zones
*/
virtual bool begin(size_t count){
this->count = count;
total = 0;
return true;
}
/**
* Give the Memory Manager information on a given memory zone.
* @param size - the size in bytes of the memory zone
* @param reads - the number of Read access to the zone used to compute one frame
* @param writes - the number of Write access to the zone used to compute one frame
*/
virtual void info(size_t size, size_t reads, size_t writes) {
LOGD("info %d", size);
total+=size;
}
/**
* Inform the Memory Manager that all memory zones have been described,
* to possibly start a 'compute the best allocation strategy' step.
*/
virtual void end(){
#ifdef ESP32
is_psram = total>2000 && ESP.getFreePsram()>0;
#endif
LOGI("use PSRAM: %s", is_psram?"true":"false");
}
/**
* Allocate a memory zone.
* @param size - the memory zone size in bytes
*/
virtual void* allocate(size_t size) {
LOGD("allocate %d", size);
#ifdef ESP32
void* result = is_psram && size > PSRAM_LIMIT ? ps_malloc(size) : malloc(size);
#else
void* result = malloc(size);
#endif
if (result!=nullptr){
memset(result, size, 0);
} else {
LOGE("allocate %u bytes - failed", (unsigned) size);
}
return result;
};
/**
* Destroy a memory zone.
* @param ptr - the memory zone pointer to be deallocated
*/
virtual void destroy(void* ptr) {
LOGD("destroy");
free(ptr);
};
private:
size_t count;
size_t total;
bool is_psram = false;
};

View File

@@ -0,0 +1,109 @@
#pragma once
#include "kiss_fix.h"
#include "AudioFFT.h"
/**
* @defgroup fft-kiss KISS
* @ingroup fft
* @brief FFT using KISS
**/
namespace audio_tools {
/**
* @brief Driver for RealFFT
* @ingroup fft-kiss
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FFTDriverKissFFT : public FFTDriver {
public:
bool begin(int len) override {
this->len = len;
k_data.resize(len);
if (p_fft_object==nullptr) p_fft_object = cpp_kiss_fft_alloc(len,false,nullptr,nullptr);
assert(p_fft_object!=nullptr);
return p_fft_object!=nullptr;
}
void end() override {
if (p_fft_object!=nullptr) kiss_fft_free(p_fft_object);
if (p_fft_object_inv!=nullptr) kiss_fft_free(p_fft_object_inv);
p_fft_object = nullptr;
k_data.resize(0);
}
void setValue(int idx, float value) override {
k_data[idx].r = value;
}
void fft() override {
cpp_kiss_fft (p_fft_object, k_data.data(), k_data.data());
};
void rfft() override {
if(p_fft_object_inv==nullptr) {
p_fft_object_inv = cpp_kiss_fft_alloc(len,true,nullptr,nullptr);
}
cpp_kiss_fft (p_fft_object_inv, k_data.data(), k_data.data());
};
float magnitude(int idx) override {
return sqrt(magnitudeFast(idx));
}
/// magnitude w/o sqrt
float magnitudeFast(int idx) override {
return (k_data[idx].r * k_data[idx].r + k_data[idx].i * k_data[idx].i);
}
bool isValid() override{ return p_fft_object!=nullptr; }
bool isReverseFFT() override {return true;}
float getValue(int idx) override { return k_data[idx].r; }
bool setBin(int pos, FFTBin &bin) { return FFTDriver::setBin(pos, bin);}
bool setBin(int pos, float real, float img) override {
if (pos>=len) return false;
k_data[pos].r = real;
k_data[pos].i = img;
return true;
}
bool getBin(int pos, FFTBin &bin) override {
if (pos>=len) return false;
bin.real = k_data[pos].r;
bin.img = k_data[pos].i;
return true;
}
kiss_fft_cfg p_fft_object=nullptr;
kiss_fft_cfg p_fft_object_inv=nullptr;
Vector<kiss_fft_cpx> k_data{0}; // real
int len = 0;
};
/**
* @brief AudioFFT using FFTReal. The only specific functionality is the access to the dataArray
* @ingroup fft-kiss
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioKissFFT : public AudioFFTBase {
public:
AudioKissFFT():AudioFFTBase(new FFTDriverKissFFT()) {}
/// Provides the complex array returned by the FFT
kiss_fft_cpx *dataArray() {
return driverEx()->k_data.data();
}
FFTDriverKissFFT* driverEx() {
return (FFTDriverKissFFT*)driver();
}
};
}

View File

@@ -0,0 +1,635 @@
#pragma once
#include "AudioTools.h"
#include "AudioKitHAL.h"
#include "AudioTools/CoreAudio/AudioI2S/I2SConfig.h"
#include "AudioTools/CoreAudio/AudioActions.h"
#ifndef AUDIOKIT_V1
#error Upgrade the AudioKit library
#endif
namespace audio_tools {
class AudioKitStream;
static AudioKitStream *pt_AudioKitStream = nullptr;
/**
* @brief Configuration for AudioKitStream: we use as subclass of I2SConfig
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioKitStreamConfig : public I2SConfig {
friend class AudioKitStream;
public:
AudioKitStreamConfig(RxTxMode mode=RXTX_MODE) { setupI2SPins(mode); };
// set adc channel with audio_hal_adc_input_t
audio_hal_adc_input_t input_device = AUDIOKIT_DEFAULT_INPUT;
// set dac channel
audio_hal_dac_output_t output_device = AUDIOKIT_DEFAULT_OUTPUT;
bool sd_active = true;
bool default_actions_active = true;
audio_kit_pins pins;
audio_hal_func_t driver = AUDIO_DRIVER;
/// convert to config object needed by HAL
AudioKitConfig toAudioKitConfig() {
TRACED();
audiokit_config.driver = driver;
audiokit_config.pins = pins;
audiokit_config.i2s_num = (i2s_port_t)port_no;
audiokit_config.adc_input = input_device;
audiokit_config.dac_output = output_device;
audiokit_config.codec_mode = toCodecMode();
audiokit_config.master_slave_mode = toMode();
audiokit_config.fmt = toFormat();
audiokit_config.sample_rate = toSampleRate();
audiokit_config.bits_per_sample = toBits();
#if defined(ESP32)
audiokit_config.buffer_size = buffer_size;
audiokit_config.buffer_count = buffer_count;
#endif
// we use the AudioKit library only to set up the codec
audiokit_config.i2s_active = false;
#if AUDIOKIT_SETUP_SD
audiokit_config.sd_active = sd_active;
#else
// SD has been deactivated in the AudioKitConfig.h file
audiokit_config.sd_active = false;
#endif
LOGW("sd_active = %s", sd_active ? "true" : "false" );
return audiokit_config;
}
protected:
AudioKitConfig audiokit_config;
board_driver board;
/// Defines the pins based on the information provided by the AudioKit project
void setupI2SPins(RxTxMode rxtx_mode) {
TRACED();
this->rx_tx_mode = rxtx_mode;
i2s_pin_config_t i2s_pins = {};
board.setup(pins);
board.get_i2s_pins((i2s_port_t)port_no, &i2s_pins);
pin_mck = i2s_pins.mck_io_num;
pin_bck = i2s_pins.bck_io_num;
pin_ws = i2s_pins.ws_io_num;
if (rx_tx_mode == RX_MODE){
pin_data = i2s_pins.data_in_num;
pin_data_rx = I2S_PIN_NO_CHANGE;
} else {
pin_data = i2s_pins.data_out_num;
pin_data_rx = i2s_pins.data_in_num;
}
};
// convert to audio_hal_iface_samples_t
audio_hal_iface_bits_t toBits() {
TRACED();
static const int ia[] = {16, 24, 32};
static const audio_hal_iface_bits_t oa[] = {AUDIO_HAL_BIT_LENGTH_16BITS,
AUDIO_HAL_BIT_LENGTH_24BITS,
AUDIO_HAL_BIT_LENGTH_32BITS};
for (int j = 0; j < 3; j++) {
if (ia[j] == bits_per_sample) {
LOGD("-> %d",ia[j])
return oa[j];
}
}
LOGE("Bits per sample not supported: %d", bits_per_sample);
return AUDIO_HAL_BIT_LENGTH_16BITS;
}
/// Convert to audio_hal_iface_samples_t
audio_hal_iface_samples_t toSampleRate() {
TRACED();
static const int ia[] = {8000, 11025, 16000, 22050,
24000, 32000, 44100, 48000};
static const audio_hal_iface_samples_t oa[] = {
AUDIO_HAL_08K_SAMPLES, AUDIO_HAL_11K_SAMPLES, AUDIO_HAL_16K_SAMPLES,
AUDIO_HAL_22K_SAMPLES, AUDIO_HAL_24K_SAMPLES, AUDIO_HAL_32K_SAMPLES,
AUDIO_HAL_44K_SAMPLES, AUDIO_HAL_48K_SAMPLES};
int diff = 99999;
int result = 0;
for (int j = 0; j < 8; j++) {
if (ia[j] == sample_rate) {
LOGD("-> %d",ia[j])
return oa[j];
} else {
int new_diff = abs((int)(oa[j] - sample_rate));
if (new_diff < diff) {
result = j;
diff = new_diff;
}
}
}
LOGE("Sample Rate not supported: %d - using %d", sample_rate, ia[result]);
return oa[result];
}
/// Convert to audio_hal_iface_format_t
audio_hal_iface_format_t toFormat() {
TRACED();
static const int ia[] = {I2S_STD_FORMAT,
I2S_LSB_FORMAT,
I2S_MSB_FORMAT,
I2S_PHILIPS_FORMAT,
I2S_RIGHT_JUSTIFIED_FORMAT,
I2S_LEFT_JUSTIFIED_FORMAT,
I2S_PCM};
static const audio_hal_iface_format_t oa[] = {
AUDIO_HAL_I2S_NORMAL, AUDIO_HAL_I2S_LEFT, AUDIO_HAL_I2S_RIGHT,
AUDIO_HAL_I2S_NORMAL, AUDIO_HAL_I2S_RIGHT, AUDIO_HAL_I2S_LEFT,
AUDIO_HAL_I2S_DSP};
for (int j = 0; j < 8; j++) {
if (ia[j] == i2s_format) {
LOGD("-> %d",j)
return oa[j];
}
}
LOGE("Format not supported: %d", i2s_format);
return AUDIO_HAL_I2S_NORMAL;
}
/// Determine if ESP32 is master or slave - this is just the oposite of the
/// HAL device
audio_hal_iface_mode_t toMode() {
return (is_master) ? AUDIO_HAL_MODE_SLAVE : AUDIO_HAL_MODE_MASTER;
}
/// Convert to audio_hal_codec_mode_t
audio_hal_codec_mode_t toCodecMode() {
switch (rx_tx_mode) {
case TX_MODE:
LOGD("-> %s","AUDIO_HAL_CODEC_MODE_DECODE");
return AUDIO_HAL_CODEC_MODE_DECODE;
case RX_MODE:
LOGD("-> %s","AUDIO_HAL_CODEC_MODE_ENCODE");
return AUDIO_HAL_CODEC_MODE_ENCODE;
default:
LOGD("-> %s","AUDIO_HAL_CODEC_MODE_BOTH");
return AUDIO_HAL_CODEC_MODE_BOTH;
}
}
};
/**
* @brief AudioKit Stream which uses the
* https://github.com/pschatzmann/arduino-audiokit library
* @ingroup io
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioKitStream : public AudioStream {
public:
AudioKitStream() { pt_AudioKitStream = this; }
/// Provides the default configuration
AudioKitStreamConfig defaultConfig(RxTxMode mode = RXTX_MODE) {
TRACED();
AudioKitStreamConfig result{mode};
result.rx_tx_mode = mode;
return result;
}
/// Starts the processing
bool begin(AudioKitStreamConfig config) {
TRACED();
cfg = config;
AudioStream::setAudioInfo(config);
cfg.logInfo("AudioKitStream");
// start codec
auto kit_cfg = cfg.toAudioKitConfig();
if (!kit.begin(kit_cfg)){
LOGE("begin faild: please verify your AUDIOKIT_BOARD setting: %d", AUDIOKIT_BOARD);
stop();
}
// start i2s
i2s_stream.begin(cfg);
// Volume control and headphone detection
if (cfg.default_actions_active){
setupActions();
}
// set initial volume
setVolume(volume_value);
is_started = true;
return true;
}
// restart after end with initial parameters
bool begin() override {
return begin(cfg);
}
/// Stops the processing
void end() override {
TRACED();
kit.end();
i2s_stream.end();
is_started = false;
}
/// We get the data via I2S - we expect to fill one buffer size
int available() {
return cfg.rx_tx_mode == TX_MODE ? 0 : DEFAULT_BUFFER_SIZE;
}
size_t write(const uint8_t *data, size_t len) override {
return i2s_stream.write(data, len);
}
/// Reads the audio data
size_t readBytes(uint8_t *data, size_t len) override {
return i2s_stream.readBytes(data, len);
}
/// Update the audio info with new values: e.g. new sample_rate,
/// bits_per_samples or channels.
void setAudioInfo(AudioInfo info) override {
TRACEI();
if (cfg.sample_rate != info.sample_rate
&& cfg.bits_per_sample == info.bits_per_sample
&& cfg.channels == info.channels
&& is_started) {
// update sample rate only
LOGW("Update sample rate: %d", info.sample_rate);
cfg.sample_rate = info.sample_rate;
i2s_stream.setAudioInfo(cfg);
kit.setSampleRate(cfg.toSampleRate());
} else if (cfg.sample_rate != info.sample_rate
|| cfg.bits_per_sample != info.bits_per_sample
|| cfg.channels != info.channels
|| !is_started) {
// more has changed and we need to start the processing
cfg.sample_rate = info.sample_rate;
cfg.bits_per_sample = info.bits_per_sample;
cfg.channels = info.channels;
cfg.logInfo("AudioKit");
// Stop first
if(is_started){
end();
}
// start kit with new config
i2s_stream.begin(cfg);
kit.begin(cfg.toAudioKitConfig());
is_started = true;
}
}
AudioKitStreamConfig &config() { return cfg; }
/// Sets the codec active / inactive
bool setActive(bool active) { return kit.setActive(active); }
/// Mutes the output
bool setMute(bool mute) { return kit.setMute(mute); }
/// Defines the Volume: Range 0 to 100
bool setVolume(int vol) {
if (vol>100) LOGW("Volume is > 100: %d",vol);
// update variable, so if called before begin we set the default value
volume_value = vol;
return kit.setVolume(vol);
}
/// Defines the Volume: Range 0 to 1.0
bool setVolume(float vol) {
if (vol>1.0) LOGW("Volume is > 1.0: %f",vol);
// update variable, so if called before begin we set the default value
volume_value = 100.0 * vol;
return kit.setVolume(volume_value);
}
/// Defines the Volume: Range 0 to 1.0
bool setVolume(double vol) {
return setVolume((float)vol);
}
/// Determines the volume
int volume() { return kit.volume(); }
/// Activates/Deactives the speaker
/// @param active
void setSpeakerActive (bool active){
kit.setSpeakerActive(active);
}
/// @brief Returns true if the headphone was detected
/// @return
bool headphoneStatus() {
return kit.headphoneStatus();
}
/**
* @brief Process input keys and pins
*
*/
void processActions() {
// TRACED();
actions.processActions();
yield();
}
/**
* @brief Defines a new action that is executed when the indicated pin is
* active
*
* @param pin
* @param action
* @param ref
*/
void addAction(int pin, void (*action)(bool,int,void*), void* ref=nullptr ) {
TRACEI();
// determine logic from config
AudioActions::ActiveLogic activeLogic = getActionLogic(pin);
actions.add(pin, action, activeLogic, ref);
}
/**
* @brief Defines a new action that is executed when the indicated pin is
* active
*
* @param pin
* @param action
* @param activeLogic
* @param ref
*/
void addAction(int pin, void (*action)(bool,int,void*), AudioActions::ActiveLogic activeLogic, void* ref=nullptr ) {
TRACEI();
actions.add(pin, action, activeLogic, ref);
}
/// Provides access to the AudioActions
AudioActions &audioActions() {
return actions;
}
/**
* @brief Relative volume control
*
* @param vol
*/
void incrementVolume(int vol) {
volume_value += vol;
LOGI("incrementVolume: %d -> %d",vol, volume_value);
kit.setVolume(volume_value);
}
/**
* @brief Increase the volume
*
*/
static void actionVolumeUp(bool, int, void*) {
TRACEI();
pt_AudioKitStream->incrementVolume(+2);
}
/**
* @brief Decrease the volume
*
*/
static void actionVolumeDown(bool, int, void*) {
TRACEI();
pt_AudioKitStream->incrementVolume(-2);
}
/**
* @brief Toggle start stop
*
*/
static void actionStartStop(bool, int, void*) {
TRACEI();
pt_AudioKitStream->active = !pt_AudioKitStream->active;
pt_AudioKitStream->setActive(pt_AudioKitStream->active);
}
/**
* @brief Start
*
*/
static void actionStart(bool, int, void*) {
TRACEI();
pt_AudioKitStream->active = true;
pt_AudioKitStream->setActive(pt_AudioKitStream->active);
}
/**
* @brief Stop
*
*/
static void actionStop(bool, int, void*) {
TRACEI();
pt_AudioKitStream->active = false;
pt_AudioKitStream->setActive(pt_AudioKitStream->active);
}
/**
* @brief Switch off the PA if the headphone in plugged in
* and switch it on again if the headphone is unplugged.
* This method complies with the
*/
static void actionHeadphoneDetection(bool, int, void*) {
AudioKit::actionHeadphoneDetection();
}
/**
* @brief Get the gpio number for auxin detection
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinAuxin() { return kit.pinAuxin(); }
/**
* @brief Get the gpio number for headphone detection
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinHeadphoneDetect() { return kit.pinHeadphoneDetect(); }
/**
* @brief Get the gpio number for PA enable
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinPaEnable() { return kit.pinPaEnable(); }
/**
* @brief Get the gpio number for adc detection
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinAdcDetect() { return kit.pinAdcDetect(); }
/**
* @brief Get the mclk gpio number of es7243
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinEs7243Mclk() { return kit.pinEs7243Mclk(); }
/**
* @brief Get the record-button id for adc-button
*
* @return -1 non-existent
* Others button id
*/
int8_t pinInputRec() { return kit.pinInputRec(); }
/**
* @brief Get the number for mode-button
*
* @return -1 non-existent
* Others number
*/
int8_t pinInputMode() { return kit.pinInputMode(); }
/**
* @brief Get number for set function
*
* @return -1 non-existent
* Others number
*/
int8_t pinInputSet() { return kit.pinInputSet(); };
/**
* @brief Get number for play function
*
* @return -1 non-existent
* Others number
*/
int8_t pinInputPlay() { return kit.pinInputPlay(); }
/**
* @brief number for volume up function
*
* @return -1 non-existent
* Others number
*/
int8_t pinVolumeUp() { return kit.pinVolumeUp(); }
/**
* @brief Get number for volume down function
*
* @return -1 non-existent
* Others number
*/
int8_t pinVolumeDown() { return kit.pinVolumeDown(); }
/**
* @brief Get green led gpio number
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinResetCodec() { return kit.pinResetCodec(); }
/**
* @brief Get DSP reset gpio number
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinResetBoard() { return kit.pinResetBoard(); }
/**
* @brief Get DSP reset gpio number
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinGreenLed() { return kit.pinGreenLed(); }
/**
* @brief Get green led gpio number
*
* @return -1 non-existent
* Others gpio number
*/
int8_t pinBlueLed() { return kit.pinBlueLed(); }
protected:
AudioKit kit;
I2SStream i2s_stream;
AudioKitStreamConfig cfg = defaultConfig(RXTX_MODE);
AudioActions actions;
int volume_value = 40;
bool active = true;
bool is_started = false;
/// Determines the action logic (ActiveLow or ActiveTouch) for the pin
AudioActions::ActiveLogic getActionLogic(int pin){
#if defined(USE_EXT_BUTTON_LOGIC)
input_key_service_info_t input_key_info[] = INPUT_KEY_DEFAULT_INFO();
int size = sizeof(input_key_info) / sizeof(input_key_info[0]);
for (int j=0; j<size; j++){
if (pin == input_key_info[j].act_id){
switch(input_key_info[j].type){
case PERIPH_ID_ADC_BTN:
LOGD("getActionLogic for pin %d -> %d", pin, AudioActions::ActiveHigh);
return AudioActions::ActiveHigh;
case PERIPH_ID_BUTTON:
LOGD("getActionLogic for pin %d -> %d", pin, AudioActions::ActiveLow);
return AudioActions::ActiveLow;
case PERIPH_ID_TOUCH:
LOGD("getActionLogic for pin %d -> %d", pin, AudioActions::ActiveTouch);
return AudioActions::ActiveTouch;
}
}
}
LOGW("Undefined ActionLogic for pin: %d ",pin);
#endif
return AudioActions::ActiveLow;
}
/// Setup the supported default actions
void setupActions() {
TRACEI();
// pin conflicts with the SD CS pin for AIThinker and buttons
if (! (cfg.sd_active && (AUDIOKIT_BOARD==5 || AUDIOKIT_BOARD==6))){
LOGD("actionStartStop")
addAction(kit.pinInputMode(), actionStartStop);
} else {
LOGW("Mode Button ignored because of conflict: %d ",kit.pinInputMode());
}
// pin conflicts with AIThinker A101 and headphone detection
if (! (cfg.sd_active && AUDIOKIT_BOARD==6)) {
LOGD("actionHeadphoneDetection pin:%d",kit.pinHeadphoneDetect())
actions.add(kit.pinHeadphoneDetect(), actionHeadphoneDetection, AudioActions::ActiveChange);
} else {
LOGW("Headphone detection ignored because of conflict: %d ",kit.pinHeadphoneDetect());
}
// pin conflicts with SD Lyrat SD CS GpioPinand buttons / Conflict on Audiokit V. 2957
if (! (cfg.sd_active && (AUDIOKIT_BOARD==1 || AUDIOKIT_BOARD==7))){
LOGD("actionVolumeDown")
addAction(kit.pinVolumeDown(), actionVolumeDown);
LOGD("actionVolumeUp")
addAction(kit.pinVolumeUp(), actionVolumeUp);
} else {
LOGW("Volume Buttons ignored because of conflict: %d ",kit.pinVolumeDown());
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,113 @@
#pragma once
#include "AudioTools/CoreAudio/AudioStreams.h"
#include "PDM.h"
namespace audio_tools {
/**
* @brief Config for MP34DT05 Microphone. Supported sample rates 16000, 41667,
* Supported bits_per_sample only 16
*
*/
struct AudioMP34DT05Config : public AudioInfo {
AudioMP34DT05Config() {
channels = 1;
sample_rate = 16000;
bits_per_sample = 16;
}
int gain = 20; // value of DEFAULT_PDM_GAIN
int buffer_size = 512;
int buffer_count = 2;
// define pins
// int pin_data = PIN_PDM_DIN;
// int pin_clk = PIN_PDM_CLK;
// int pin_pwr = PIN_PDM_PWR;
void logInfo() {
AudioInfo::logInfo();
LOGI("gain: %d", gain);
LOGI("buffer_size: %d", buffer_size);
}
};
class AudioMP34DT05 *selfAudioMP34DT05 = nullptr;
/**
* @brief MP34DT05 Microphone of Nano BLE Sense. We provide a proper Stream
* implementation. See https://github.com/arduino/ArduinoCore-nRF528x-mbedos
* @ingroup io
*/
class AudioMP34DT05 : public AudioStream {
public:
AudioMP34DT05() { selfAudioMP34DT05 = this; };
virtual ~AudioMP34DT05() {
if (p_buffer != nullptr) delete p_buffer;
};
AudioMP34DT05Config defaultConfig(int mode = RX_MODE) {
AudioMP34DT05Config cfg;
if (mode != RX_MODE) {
LOGE("TX_MODE is not supported");
}
return cfg;
}
bool begin() { return begin(config); }
bool begin(AudioMP34DT05Config cfg) {
TRACEI();
config = cfg;
cfg.logInfo();
if (p_buffer == nullptr) {
p_buffer = new NBuffer<uint8_t>(cfg.buffer_size, cfg.buffer_count);
}
p_mic->setBufferSize(cfg.buffer_size);
p_mic->onReceive(onReceiveStatic);
LOGD("begin(%d,%d)", cfg.channels, cfg.sample_rate);
bool result = p_mic->begin(cfg.channels, cfg.sample_rate);
if (!result) {
LOGE("begin(%d,%d)", cfg.channels, cfg.sample_rate);
}
LOGD("setGain: %d", cfg.gain);
p_mic->setGain(cfg.gain);
return result;
}
void end() {
TRACEI();
if (p_mic != nullptr) {
p_mic->end();
}
delete p_buffer;
p_buffer=nullptr;
}
size_t readBytes(uint8_t *data, size_t len) override {
if (p_buffer == nullptr) return 0;
return p_buffer->readArray(data, len);
}
int available() override {
if (p_buffer == nullptr) return 0;
return p_buffer->available();
}
protected:
PDMClass *p_mic = &PDM;
NBuffer<uint8_t> *p_buffer = nullptr;
AudioMP34DT05Config config;
/// for some strange reasons available provides only the right result after
/// onReceive, so unfortunately we need to use an additional buffer
void onReceive() {
int bytesAvailable = p_mic->available();
// Read into the sample buffer
uint8_t sampleBuffer[bytesAvailable]={0};
int read = PDM.read(sampleBuffer, bytesAvailable);
p_buffer->writeArray(sampleBuffer, read);
}
static void onReceiveStatic() { selfAudioMP34DT05->onReceive(); }
};
} // namespace

View File

@@ -0,0 +1,114 @@
#pragma once
#include "AudioFFT.h"
#include "FFT/FFTReal.h"
/**
* @defgroup fft-real Real
* @ingroup fft
* @brief FFT using Real FFT
**/
namespace audio_tools {
/**
* @brief Driver for RealFFT
* @ingroup fft-real
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FFTDriverRealFFT : public FFTDriver {
public:
bool begin(int len) override {
this->len = len;
v_x.resize(len);
v_f.resize(len);
if (p_fft_object==nullptr) p_fft_object = new ffft::FFTReal<float>(len);
assert(p_fft_object!=nullptr);
return p_fft_object!=nullptr;
}
void end()override{
if (p_fft_object!=nullptr) {
delete p_fft_object;
p_fft_object = nullptr;
}
v_x.resize(0);
v_f.resize(0);
}
void setValue(int idx, float value) override{
v_x[idx] = value;
}
void fft() override{
memset(v_f.data(),0,len*sizeof(float));
p_fft_object->do_fft(v_f.data(), v_x.data());
};
/// Inverse fft - convert fft result back to time domain (samples)
void rfft() override{
// ifft
p_fft_object->do_ifft(v_f.data(), v_x.data());
}
bool isReverseFFT() override { return true;}
float magnitude(int idx) override {
return sqrt(magnitudeFast(idx));
}
/// magnitude w/o sqrt
float magnitudeFast(int idx) override {
return ((v_x[idx] * v_x[idx]) + (v_f[idx] * v_f[idx]));
}
bool isValid() override{ return p_fft_object!=nullptr; }
/// get Real value
float getValue(int idx) override { return v_x[idx];}
bool setBin(int pos, float real, float img) override {
if (pos < 0 || pos >= len) return false;
v_x[pos] = real;
v_f[pos] = img;
return true;
}
bool getBin(int pos, FFTBin &bin) override {
if (pos>=len) return false;
bin.real = v_x[pos];
bin.img = v_f[pos];
return true;
}
ffft::FFTReal <float> *p_fft_object=nullptr;
Vector<float> v_x{0}; // real
Vector<float> v_f{0}; // complex
int len;
};
/**
* @brief AudioFFT using RealFFT
* @ingroup fft-real
* @author Phil Schatzmann
* @copyright GPLv3
*/
class AudioRealFFT : public AudioFFTBase {
public:
AudioRealFFT():AudioFFTBase(new FFTDriverRealFFT()) {}
/// Provides the real array returned by the FFT
float* realArray() {
return driverEx()->v_x.data();
}
/// Provides the complex array returned by the FFT
float *imgArray() {
return driverEx()->v_f.data();
}
FFTDriverRealFFT* driverEx() {
return (FFTDriverRealFFT*)driver();
}
};
}

View File

@@ -0,0 +1,318 @@
#pragma once
#include "AudioTools/CoreAudio/AudioEffects/AudioEffect.h"
#include "AudioTools/CoreAudio/AudioStreams.h"
#include "AudioToolsConfig.h"
#ifdef ESP32
#include "freertos/FreeRTOS.h"
#endif
#include "StkAll.h"
namespace audio_tools {
/**
* @brief The Synthesis ToolKit in C++ (STK) is a set of open source audio
* signal processing and algorithmic synthesis classes written in the C++
* programming language. You need to install
* https://github.com/pschatzmann/Arduino-STK
*
* You can find further informarmation in the original Readme of the STK Project
*
* Like many other sound libraries it originates from an University (Princeton)
* and can look back at a very long history: it was created in 1995. In the 90s
* the computers had limited processor power and memory available. In todays
* world we can get some cheap Microcontrollers, which provide almost the same
* capabilities.
*
* @ingroup generator
* @tparam T
*/
template <class StkCls, class T>
class STKGenerator : public SoundGenerator<T> {
public:
STKGenerator() = default;
// Creates an STKGenerator for an instrument
STKGenerator(StkCls& instrument) : SoundGenerator<T>() {
this->p_instrument = &instrument;
}
void setInput(StkCls& instrument) { this->p_instrument = &instrument; }
/// provides the default configuration
AudioInfo defaultConfig() {
AudioInfo info;
info.channels = 2;
info.bits_per_sample = sizeof(T) * 8;
info.sample_rate = stk::Stk::sampleRate();
return info;
}
/// Starts the processing
bool begin(AudioInfo cfg) {
TRACEI();
cfg.logInfo();
SoundGenerator<T>::begin(cfg);
max_value = NumberConverter::maxValue(sizeof(T) * 8);
stk::Stk::setSampleRate(SoundGenerator<T>::info.sample_rate);
return true;
}
/// Provides a single sample
T readSample() {
T result = 0;
if (p_instrument != nullptr) {
result = p_instrument->tick() * max_value;
}
return result;
}
protected:
StkCls* p_instrument = nullptr;
T max_value;
};
/**
* @brief STK Stream for Instrument
* @ingroup dsp
*/
template <class StkCls, class T>
class STKInstrument : public STKGenerator<StkCls, T> {
public:
STKInstrument() = default;
STKInstrument(StkCls& instrument) : STKGenerator<StkCls, T>(instrument) {}
/// sets the frequency
void setFrequency(float frequency) override {
this->p_instrument->noteOn(frequency, amplitude);
}
void noteOn(float freq, float vol) { this->p_instrument->noteOn(freq, vol); }
void noteOff() { this->p_instrument->noteOff(); }
/// Defines the amplitude (0.0 ... 1.0)
void setAmplitude(float amplitude) {
this->amplitude = amplitude;
if (this->amplitude > 1.0) this->amplitude = 1.0;
if (this->amplitude < 0.0) this->amplitude = 0.0;
}
protected:
float amplitude = 1.0;
};
/**
* @brief STK Stream for Instrument or Voicer
* @ingroup dsp
*/
template <class StkCls>
class STKStream : public GeneratedSoundStream<int16_t> {
public:
STKStream() { GeneratedSoundStream<int16_t>::setInput(generator); };
STKStream(StkCls& instrument) {
generator.setInput(instrument);
GeneratedSoundStream<int16_t>::setInput(generator);
}
void setInput(StkCls& instrument) {
generator.setInput(instrument);
GeneratedSoundStream<int16_t>::setInput(generator);
}
void setInput(StkCls* instrument) {
generator.setInput(*instrument);
GeneratedSoundStream<int16_t>::setInput(generator);
}
AudioInfo defaultConfig() {
AudioInfo info;
info.channels = 1;
info.bits_per_sample = 16;
info.sample_rate = stk::Stk::sampleRate();
return info;
}
protected:
STKGenerator<StkCls, int16_t> generator;
};
/**
* @brief Use any effect from the STK framework: e.g. Chorus, Echo, FreeVerb,
* JCRev, PitShift... https://github.com/pschatzmann/Arduino-STK
*
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKEffect : public AudioEffect {
public:
STKEffect(stk::Effect& stkEffect) { p_effect = &stkEffect; }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return p_effect->tick(value) * 32767.0;
}
protected:
stk::Effect* p_effect = nullptr;
};
/**
* @brief Chorus Effect
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKChorus : public AudioEffect, public stk::Chorus {
public:
STKChorus(float baseDelay = 6000) : stk::Chorus(baseDelay) {}
STKChorus(const STKChorus& copy) = default;
AudioEffect* clone() override { return new STKChorus(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::Chorus::tick(value) * 32767.0;
}
};
/**
* @brief Echo Effect
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKEcho : public AudioEffect, public stk::Echo {
public:
STKEcho(unsigned long maximumDelay = (unsigned long)Stk::sampleRate())
: stk::Echo(maximumDelay) {}
STKEcho(const STKEcho& copy) = default;
AudioEffect* clone() override { return new STKEcho(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::Echo::tick(value) * 32767.0;
}
};
/**
* @brief Jezar at Dreampoint's FreeVerb, implemented in STK.
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKFreeVerb : public AudioEffect, public stk::FreeVerb {
public:
STKFreeVerb() = default;
STKFreeVerb(const STKFreeVerb& copy) = default;
AudioEffect* clone() override { return new STKFreeVerb(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::FreeVerb::tick(value) * 32767.0;
}
};
/**
* @brief John Chowning's reverberator class.
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKChowningReverb : public AudioEffect, public stk::JCRev {
public:
STKChowningReverb() = default;
STKChowningReverb(const STKChowningReverb& copy) = default;
AudioEffect* clone() override { return new STKChowningReverb(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::JCRev::tick(value) * 32767.0;
}
};
/**
* @brief CCRMA's NRev reverberator class.
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKNReverb : public AudioEffect, public stk::NRev {
public:
STKNReverb(float t60 = 1.0) : NRev(t60) {}
STKNReverb(const STKNReverb& copy) = default;
AudioEffect* clone() override { return new STKNReverb(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::NRev::tick(value) * 32767.0;
}
};
/**
* @brief Perry's simple reverberator class
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKPerryReverb : public AudioEffect, public stk::PRCRev {
public:
STKPerryReverb(float t60 = 1.0) : PRCRev(t60) {}
STKPerryReverb(const STKPerryReverb& copy) = default;
AudioEffect* clone() override { return new STKPerryReverb(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::PRCRev::tick(value) * 32767.0;
}
};
/**
* @brief Pitch shifter effect class based on the Lent algorithm
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKLentPitShift : public AudioEffect, public stk::LentPitShift {
public:
STKLentPitShift(float periodRatio = 1.0, int tMax = 512)
: stk::LentPitShift(periodRatio, tMax) {}
STKLentPitShift(const STKLentPitShift& copy) = default;
AudioEffect* clone() override { return new STKLentPitShift(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::LentPitShift::tick(value) * 32767.0;
}
};
/**
* @brief Simple Pitch shifter effect class: This class implements a simple
* pitch shifter using a delay line.
* @ingroup effects
* @author Phil Schatzmann
* @copyright GPLv3
*/
class STKPitShift : public AudioEffect, public stk::PitShift {
public:
STKPitShift() = default;
STKPitShift(const STKPitShift& copy) = default;
AudioEffect* clone() override { return new STKPitShift(*this); }
virtual effect_t process(effect_t in) {
// just convert between int16 and float
float value = static_cast<float>(in) / 32767.0;
return stk::PitShift::tick(value) * 32767.0;
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,3 @@
#pragma once
#warning("obsolete: use AudioTools/Communication/AudioServerEx.h")
#include "AudioTools/Communication/AudioServerEx.h"

View File

@@ -0,0 +1,7 @@
#pragma once
/**
* @defgroup concurrency Concurrency
* @ingroup main
* @brief Multicore support
*/
#include "AudioTools/Concurrency/RTOS.h"

View File

@@ -0,0 +1,20 @@
#pragma once
#define VFS_SD SD
#include "AudioTools/Disk/VFSFile.h"
#include "AudioTools/Disk/VFS.h"
// We allow the access to the files via the global SD object
namespace audio_tools {
/// @brief Desktop file system compatibility alias
/// @ingroup io
using File = VFSFile;
/// @brief Desktop file system compatibility alias
/// @ingroup io
using FS = VFS;
static FS SD; // global object for compatibility with Arduino code
}

View File

@@ -0,0 +1,220 @@
#pragma once
#include "AudioTools/AudioLibs/Desktop/NoArduino.h"
#include "AudioTools/CoreAudio/AudioStreams.h"
#include "AudioTools/CoreAudio/AudioOutput.h"
#include "AudioTools/AudioCodecs/CodecWAV.h"
#include <string.h>
#include <iostream>
#include <fstream>
#include <filesystem>
#include <stdio.h>
#include "nlohmann/json.hpp"
#include "xtl/xbase64.hpp"
namespace audio_tools {
/**
* @brief Simple layer for Print object to write to a c++ file
*/
class FileOutput : public Print {
public:
FileOutput(std::fstream &stream){
p_audio_stream = &stream;
}
size_t write(const uint8_t *data, size_t len) override {
p_audio_stream->write((const char*)data,len);
return len;
}
int availableForWrite() override {
return 1024;
}
protected:
std::fstream *p_audio_stream=nullptr;
};
/**
* @brief Displays audio in a Jupyter as chart
* Just wrapps a stream to provide the chart data
*/
template <typename T>
class ChartT {
public:
void setup(std::string fName, int channelCount, int channelNo) {
this->fname = fName;
this->channels = channelCount;
if (this->channels==0){
LOGE("Setting channels to 0");
}
this->channel = channelNo;
}
int getChannels() {
return this->channels;
}
int getChannel() {
return this->channel;
}
/// Provides data as svg polyline
const std::string chartData() {
str.clear();
str.str("");
// reset buffer;
if (channel<channels){
ifstream is;
is.open(fname, is.binary);
is.seekg(wav_header_size, is.beg);
std::list<int16_t> audioList;
T buffer[channels];
size_t rec_size = channels*sizeof(T);
while(is.read((char *)buffer, rec_size)){
audioList.push_back(transform(buffer[channel]));
}
string str_size = "102400"; //std::to_string(audioList.size());
str << "<style>div.x-svg {width: "<< str_size <<"px; }</style>";
str << "<div class='x-svg'><svg viewBox='0 0 "<< str_size << " 100'> <polyline fill='none' stroke='blue' stroke-width='1' points ='";
// copy data from input stream
size_t idx = 0;
for(int16_t sample: audioList){
str << idx++ << "," << sample << " ";
}
str << "'/></svg></div>";
} else {
str << "<p>Channel " << channel << " of " << channels << " does not exist!</p>";
}
return str.str();
}
protected:
std::stringstream str;
std::string fname;
const int wav_header_size = 44;
int channels=0;
int channel=0;
int transform(int x){
int result = x / 1000; // scale -32 to 32
result += 60; // shift down
return result;
}
};
/// @brief Default chart type for Jupyter integration
/// @ingroup io
using Chart = ChartT<int16_t>;
/**
* @brief Output to Jupyter. We write the data just to a file from where we can
* load the data again for different representations.
*/
template <typename T>
class JupyterAudioT : public AudioStream {
public:
JupyterAudioT(const char* fileName, AudioStream &stream, int bufferCount=20, int bufferSize=1024) {
buffer_count = bufferCount;
p_audio_stream = &stream;
cfg = stream.audioInfo();
copier.resize(bufferSize);
fname = fileName;
if (fileExists()){
remove(fileName);
}
}
ChartT<T> &chart(int channel=0) {
createWAVFile();
assert(cfg.channels>0);
chrt.setup(fname, cfg.channels, channel);
return chrt;
}
// provide the file name
const std::string &name() const {
return fname;
}
// provides the absolute file path as string
const std::string path() const {
std::filesystem::path p = fname;
std::string result = std::filesystem::absolute(p);
return result;
}
// fills a wav file with data once, the first time it was requested
void createWAVFile(){
try{
if (!fileExists()){
std::fstream fstream(fname, fstream.binary | fstream.trunc | fstream.out);
FileOutput fp(fstream);
wave_encoder.setAudioInfo(audioInfo());
out.setOutput(&fp);
out.setEncoder(&wave_encoder);
out.begin(); // output to decoder
copier.begin(out, *p_audio_stream);
copier.copyN(buffer_count);
fstream.close();
}
} catch(const std::exception& ex){
std::cerr << ex.what();
}
}
bool fileExists() {
ifstream f(fname.c_str());
return f.good();
}
int bufferCount(){
return buffer_count;
}
// provides the wav data as bas64 encded string
std::string audio() {
std::ifstream fin(fname, std::ios::binary);
std::stringstream m_buffer;
m_buffer << fin.rdbuf();
return xtl::base64encode(m_buffer.str());
}
// Provides the audion information
AudioInfo audioInfo() {
return cfg;
}
protected:
AudioStream *p_audio_stream=nullptr;
ChartT<T> chrt;
WAVEncoder wave_encoder;
EncodedAudioOutput out;
StreamCopyT<T> copier;
AudioInfo cfg;
string fname;
size_t buffer_count=0;
};
/// @brief Default Jupyter audio output with 16-bit samples
/// @ingroup io
using JupyterAudio = JupyterAudioT<int16_t>;
} // namespace audio_tools
/// Disply Chart in Jupyterlab xeus
nl::json mime_bundle_repr(Chart &in) {
auto bundle = nl::json::object();
bundle["text/html"] = in.chartData();
return bundle;
}
/// Disply Audio player in Jupyterlab xeus
nl::json mime_bundle_repr(JupyterAudio &in) {
auto bundle = nl::json::object();
in.createWAVFile();
bundle["text/html"] = "<audio controls "
"src='data:audio/wav;base64," +
in.audio() + "'/>";
return bundle;
}

View File

@@ -0,0 +1,17 @@
/**
* Generic main for desktop arduino emulation
*/
#ifndef NO_MAIN
#pragma once
void loop();
void setup();
int main (void) {
setup();
while(true){
loop();
}
}
#endif

View File

@@ -0,0 +1,240 @@
#pragma once
/**
* @file NoArduino.h
* @author Phil Schatzmann
* @brief If you want to use the framework w/o Arduino you need to provide the
* implementation of a couple of classes and methods!
* @version 0.1
* @date 2022-09-19
*
* @copyright Copyright (c) 2022
*
*/
#include "AudioToolsConfig.h"
#ifdef IS_DESKTOP
# error We should not get here!
#endif
#include <ctype.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm> // std::max
#include <chrono>
#define IS_NOARDUINO
#ifndef PSTR
#define PSTR(fmt) fmt
#endif
#ifndef PI
#define PI 3.14159265359f
#endif
#ifndef INPUT
#define INPUT 0x0
#endif
#ifndef OUTPUT
#define OUTPUT 0x1
#endif
#ifndef INPUT_PULLUP
#define INPUT_PULLUP 0x2
#endif
#ifndef HIGH
#define HIGH 0x1
#endif
#ifndef LOW
#define LOW 0x0
#endif
using namespace std;
enum PrintCharFmt { DEC=10, HEX=16 };
namespace audio_tools {
class Print {
public:
#ifndef DOXYGEN
virtual size_t write(uint8_t ch) {
// not implememnted: to be overritten
return 0;
}
virtual size_t write(const char *str) {
return write((const uint8_t *)str, strlen(str));
}
virtual size_t write(const char *buffer, size_t size) {
return write((const uint8_t *)buffer, size);
}
virtual int print(const char *msg) {
int result = strlen(msg);
return write(msg, result);
}
virtual int println(const char *msg = "") {
int result = print(msg);
write('\n');
return result + 1;
}
virtual int println(float number) {
char buffer[120];
snprintf(buffer, 120, "%f", number);
return println(buffer);
}
virtual int print(float number) {
char buffer[120];
snprintf(buffer, 120, "%f", number);
return print(buffer);
}
virtual int print(int number) {
char buffer[80];
snprintf(buffer, 80, "%d", number);
return print(buffer);
}
virtual int print(char c, PrintCharFmt spec) {
char result[5];
switch (spec) {
case DEC:
snprintf(result, 3, "%c", c);
return print(result);
case HEX:
snprintf(result, 3, "%x", c);
return print(result);
}
return -1;
}
int println(int value, PrintCharFmt fmt) {
return print(value, fmt) + println();
}
#endif
virtual size_t write(const uint8_t *data, size_t len) {
if (data == nullptr) return 0;
for (size_t j = 0; j < len; j++) {
write(data[j]);
}
return len;
}
virtual int availableForWrite() { return 1024; }
virtual void flush() { /* Empty implementation for backward compatibility */ }
protected:
int _timeout = 10;
};
class Stream : public Print {
public:
virtual ~Stream() = default;
virtual int available() { return 0; }
virtual size_t readBytes(uint8_t *data, size_t len) { return 0; }
#ifndef DOXYGEN
virtual int read() { return -1; }
virtual int peek() { return -1; }
virtual void setTimeout(size_t timeoutMs) {}
size_t readBytesUntil(char terminator, char *buffer, size_t length) {
for (int j=0;j<length;j++){
int val = read();
if (val == -1) return j-1;
if (val == terminator) return j;
buffer[j] = val;
}
return length;
};
size_t readBytesUntil(char terminator, uint8_t *buffer, size_t length) {
return readBytesUntil(terminator, (char *)buffer, length);
}
#endif
operator bool() { return true; }
};
class Client : public Stream {
public:
void stop() {};
virtual int read(uint8_t *buffer, size_t len) { return 0; };
virtual int read() { return 0; };
bool connected() { return false; };
bool connect(const char *ip, int port) { return false; }
virtual operator bool() { return false; }
};
class HardwareSerial : public Stream {
public:
size_t write(uint8_t ch) override { return putchar(ch); }
virtual operator bool() { return true; }
bool begin(long baudrate, int config=0) { return true; }
};
static HardwareSerial Serial;
/// Maps input to output values
inline long map(long x, long in_min, long in_max, long out_min, long out_max) {
return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min;
}
} // namespace audio_tools
#if defined(ESP32)
#include "driver/gpio.h"
#include "freertos/FreeRTOS.h" // needed for ESP Arduino < 2.0
#include "freertos/FreeRTOSConfig.h"
/// e.g. for AudioActions
inline int digitalRead(int pin) {
printf("digitalRead:%d\n", pin);
return gpio_get_level((gpio_num_t)pin);
}
inline void digitalWrite(int pin, int value) {
gpio_set_level((gpio_num_t)pin, value);
}
inline void pinMode(int pin, int mode) {
gpio_num_t gpio_pin = (gpio_num_t)pin;
printf("pinMode(%d,%d)\n", pin, mode);
gpio_reset_pin(gpio_pin);
switch (mode) {
case INPUT:
gpio_set_direction(gpio_pin, GPIO_MODE_INPUT);
break;
case OUTPUT:
gpio_set_direction(gpio_pin, GPIO_MODE_OUTPUT);
break;
case INPUT_PULLUP:
gpio_set_direction(gpio_pin, GPIO_MODE_INPUT);
gpio_set_pull_mode(gpio_pin, GPIO_PULLUP_ONLY);
break;
default:
gpio_set_direction(gpio_pin, GPIO_MODE_INPUT_OUTPUT);
break;
}
}
inline void delay(uint32_t ms){ vTaskDelay(ms / portTICK_PERIOD_MS);}
inline uint32_t millis() {return (xTaskGetTickCount() * portTICK_PERIOD_MS);}
inline void delayMicroseconds(uint32_t ms) {esp_rom_delay_us(ms);}
inline uint64_t micros() { return xTaskGetTickCount() * portTICK_PERIOD_MS * 1000;}
// delay and millis has been defined
#define DESKTOP_MILLIS_DEFINED
#endif

View File

@@ -0,0 +1,45 @@
#pragma once
#include "AudioTools/AudioLibs/Desktop/NoArduino.h"
#include <iostream>
#include <thread>
#ifndef DESKTOP_MILLIS_DEFINED
#define DESKTOP_MILLIS_DEFINED
namespace audio_tools {
/// Returns the milliseconds since the start
inline uint32_t millis(){
using namespace std::chrono;
// Get current time with precision of milliseconds
auto now = time_point_cast<milliseconds>(system_clock::now());
// sys_milliseconds is type time_point<system_clock, milliseconds>
using sys_milliseconds = decltype(now);
// Convert time_point to signed integral type
return now.time_since_epoch().count();
}
// sleep ms milliseconds
void delay(unsigned long ms){
std::this_thread::sleep_for(std::chrono::milliseconds(ms));
}
// sleep us milliseconds
void delayMicroseconds(unsigned int us){
std::this_thread::sleep_for(std::chrono::microseconds(us));
}
// Returns the micros of milliseconds passed since epich
inline unsigned long micros(void){
using namespace std::chrono;
// Get current time with precision of milliseconds
auto now = time_point_cast<microseconds>(system_clock::now());
// sys_milliseconds is type time_point<system_clock, milliseconds>
using sys_milliseconds = decltype(now);
// Convert time_point to signed integral type
return now.time_since_epoch().count();
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,265 @@
/**
* @file FFTWindows.h
* @author Phil Schatzmann
* @brief Different Window functions that can be used by FFT
* @version 0.1
* @date 2022-04-29
*
* @copyright Copyright (c) 2022
*
*/
#pragma once
#include <math.h>
#include "AudioTools/CoreAudio/AudioBasic/Collections/Vector.h"
namespace audio_tools {
/**
* @brief FFT Window Function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class WindowFunction {
public:
WindowFunction() = default;
/// Setup the window function providing the fft length
virtual void begin(int samples) {
this->samples_minus_1 = -1.0f + samples;
this->i_samples = samples;
this->i_half_samples = samples / 2;
}
/// Provides the multipication factor at the indicated position. The result is
/// symetrically mirrored around the center
inline float factor(int idx) {
assert(i_half_samples == i_samples / 2);
float result = idx <= i_half_samples ? factor_internal(idx)
: factor_internal(i_samples - idx - 1);
return result > 1.0f ? 1.0f : result;
}
/// Provides the number of samples (fft length)
inline int samples() { return i_samples; }
virtual const char* name() = 0;
protected:
float samples_minus_1 = 0.0f;
int i_samples = 0;
int i_half_samples = 0;
const float twoPi = 6.28318531f;
const float fourPi = 12.56637061f;
const float sixPi = 18.84955593f;
// virtual function provide implementation in subclass
virtual float factor_internal(int idx) = 0;
// the ratio idx / samples -1
inline float ratio(int idx) {
return (static_cast<float>(idx)) / samples_minus_1;
}
};
/**
* @brief Buffered window function, so that we do not need to re-calculate the
* values
* @author Phil Schatzmann
* @copyright GPLv3
*/
class BufferedWindow : public WindowFunction {
public:
BufferedWindow(WindowFunction* wf) { p_wf = wf; }
const char* name() override {
static char buffer[80] = "Buffered ";
strncpy(buffer + 9, p_wf->name(), 69);
return buffer;
}
virtual void begin(int samples) override {
// process only if there is a change
WindowFunction::begin(samples);
if (p_wf->samples() != samples) {
p_wf->begin(samples);
int to_be_size = i_half_samples + 1;
if (buffer.size() != to_be_size) {
buffer.resize(to_be_size);
for (int j = 0; j <= i_half_samples; j++) {
buffer[j] = p_wf->factor(j);
}
}
}
}
protected:
WindowFunction* p_wf = nullptr;
Vector<float> buffer{0};
float factor_internal(int idx) override {
if (idx < 0 || idx > i_half_samples) return 0.0;
return buffer[idx];
}
};
/**
* @brief Rectange FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Rectange : public WindowFunction {
public:
Rectange() = default;
float factor_internal(int idx) {
if (idx < 0 || idx >= i_samples) return 0;
return 1.0f;
}
const char* name() { return "Rectange"; }
};
/**
* @brief Hamming FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Hamming : public WindowFunction {
public:
Hamming() = default;
float factor_internal(int idx) {
return 0.54f - (0.46f * cos(twoPi * ratio(idx)));
}
const char* name() { return "Hamming"; }
};
/**
* @brief Hann FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Hann : public WindowFunction {
public:
Hann() = default;
const char* name() { return "Hann"; }
float factor_internal(int idx) {
return 0.54f * (1.0f - cos(twoPi * ratio(idx)));
}
};
/**
* @brief Triangle FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Triangle : public WindowFunction {
public:
Triangle() = default;
const char* name() { return "Triangle"; }
float factor_internal(int idx) {
return 1.0f - ((2.0f * fabs((idx - 1) -
(static_cast<float>(i_samples - 1) / 2.0f))) /
samples_minus_1);
}
};
/**
* @brief Nuttall FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Nuttall : public WindowFunction {
public:
Nuttall() = default;
const char* name() override { return "Nuttall"; }
float factor_internal(int idx) override {
float r = ratio(idx);
return 0.355768f - (0.487396f * (cos(twoPi * r))) +
(0.144232f * (cos(fourPi * r))) - (0.012604f * (cos(sixPi * r)));
}
};
/**
* @brief Blackman FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Blackman : public WindowFunction {
public:
Blackman() = default;
const char* name() override { return "Blackman"; }
float factor_internal(int idx) override {
float r = ratio(idx);
return 0.42323f - (0.49755f * (cos(twoPi * r))) +
(0.07922f * (cos(fourPi * r)));
}
};
/**
* @brief BlackmanNuttall FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class BlackmanNuttall : public WindowFunction {
public:
BlackmanNuttall() = default;
const char* name() override { return "BlackmanNuttall"; }
float factor_internal(int idx) override {
float r = ratio(idx);
return 0.3635819f - (0.4891775f * (cos(twoPi * r))) +
(0.1365995f * (cos(fourPi * r))) - (0.0106411f * (cos(sixPi * r)));
}
};
/**
* @brief BlackmanHarris FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class BlackmanHarris : public WindowFunction {
public:
BlackmanHarris() = default;
const char* name() override { return "BlackmanHarris"; }
float factor_internal(int idx) override {
float r = ratio(idx);
return 0.35875f - (0.48829f * (cos(twoPi * r))) +
(0.14128f * (cos(fourPi * r))) - (0.01168f * (cos(sixPi * r)));
}
};
/**
* @brief FlatTop FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class FlatTop : public WindowFunction {
public:
FlatTop() = default;
const char* name() override { return "FlatTop"; }
float factor_internal(int idx) override {
float r = ratio(idx);
return 0.2810639f - (0.5208972f * cos(twoPi * r)) +
(0.1980399f * cos(fourPi * r));
}
};
/**
* @brief Welch FFT Window function
* @author Phil Schatzmann
* @copyright GPLv3
*/
class Welch : public WindowFunction {
public:
Welch() = default;
const char* name() override { return "Welch"; }
float factor_internal(int idx) override {
float tmp =
(((idx - 1) - samples_minus_1 / 2.0f) / (samples_minus_1 / 2.0f));
return 1.0f - (tmp * tmp);
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,90 @@
#pragma once
#include "AudioTools/AudioLibs/AudioFFT.h"
#include "AudioTools/Concurrency/LockGuard.h"
namespace audio_tools {
class FFTDisplay;
static FFTDisplay *selfFFTDisplay = nullptr;
#if defined(USE_CONCURRENCY)
// fft mutex
static Mutex fft_mux;
#endif
/**
* Display FFT result: we can define a start bin and group susequent bins for a
* combined result.
*/
class FFTDisplay {
public:
FFTDisplay(AudioFFTBase &fft) {
p_fft = &fft;
selfFFTDisplay = this;
}
/// start bin which is displayed
int fft_start_bin = 0;
/// group result by adding subsequent bins
int fft_group_bin = 1;
/// Influences the senitivity
float fft_max_magnitude = 700.0f;
void begin() {
// assign fft callback
AudioFFTConfig &fft_cfg = p_fft->config();
fft_cfg.callback = fftCallback;
// number of bins
magnitudes.resize(p_fft->size());
for (int j = 0; j < p_fft->size(); j++) {
magnitudes[j] = 0;
}
}
/// Returns the magnitude for the indicated led x position. We might
/// need to combine values from the magnitudes array if this is much bigger.
float getMagnitude(int x) {
// get magnitude from fft
float total = 0;
for (int j = 0; j < fft_group_bin; j++) {
int idx = fft_start_bin + (x * fft_group_bin) + j;
if (idx >= magnitudes.size()) {
idx = magnitudes.size() - 1;
}
total += magnitudes[idx];
}
return total / fft_group_bin;
}
int getMagnitudeScaled(int x, int max) {
int result = mapT<float>(getMagnitude(x), 0, fft_max_magnitude, 0.0f,
static_cast<float>(max));
if (result > max){
LOGD("fft_max_magnitude too small: current value is %f", getMagnitude(x))
}
// limit value to max
return min(result, max);
}
/// callback method which provides updated data from fft
static void fftCallback(AudioFFTBase &fft) {
selfFFTDisplay->loadMangnitudes();
};
protected:
AudioFFTBase *p_fft = nullptr;
Vector<float> magnitudes{0};
void loadMangnitudes() {
// just save magnitudes to be displayed
#if defined(USE_CONCURRENCY)
LockGuard guard(fft_mux);
#endif
for (int j = 0; j < p_fft->size(); j++) {
float value = p_fft->magnitude(j);
magnitudes[j] = value;
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,274 @@
#pragma once
#include "AudioTools/AudioLibs/AudioRealFFT.h" // using RealFFT
#include "AudioTools/CoreAudio/AudioOutput.h"
#include "AudioTools/CoreAudio/StreamCopy.h"
namespace audio_tools {
static Hann fft_effects_hann;
static BufferedWindow fft_effects_buffered_window{&fft_effects_hann};
/**
* @brief Common configuration for FFT effects
* @ingroup transform
* @author phil schatzmann
*/
struct FFTEffectConfig : public AudioInfo {
int length = 1024;
int stride = 512;
WindowFunction *window_function = &fft_effects_buffered_window;
};
/**
* @brief Abstract class for common Logic for FFT based effects. The effect is
* applied after the fft to the frequency domain before executing the ifft.
* Please note that this is quite processing time intensitive: so you might keep
* the sample rate quite low if the processor is not fast enough!
* @ingroup transform
* @author phil schatzmann
*/
class FFTEffect : public AudioOutput {
public:
FFTEffect(Print &out) {
p_out = &out;
fft_cfg.ref = this;
}
FFTEffectConfig defaultConfig() {
FFTEffectConfig c;
return c;
}
bool begin(FFTEffectConfig info) {
copier.setLogName("ifft");
setAudioInfo(info);
fft_cfg.length = info.length;
fft_cfg.stride = info.stride > 0 ? info.stride : info.length;
fft_cfg.window_function = info.window_function;
return begin();
}
bool begin() override {
TRACED();
// copy result to output
copier.begin(*p_out, fft);
// setup fft
fft_cfg.copyFrom(audioInfo());
fft_cfg.callback = effect_callback;
LOGI("length: %d", fft_cfg.length);
LOGI("stride: %d", fft_cfg.stride);
LOGI("window_function: %s", (fft_cfg.window_function != nullptr)
? fft_cfg.window_function->name()
: "-");
return fft.begin(fft_cfg);
}
size_t write(const uint8_t *data, size_t len) override {
TRACED();
return fft.write(data, len);
}
protected:
Print *p_out = nullptr;
AudioRealFFT fft;
AudioFFTConfig fft_cfg{fft.defaultConfig(RXTX_MODE)};
Hann hann;
BufferedWindow buffered{&hann};
StreamCopy copier;
virtual void effect(AudioFFTBase &fft) = 0;
static void effect_callback(AudioFFTBase &fft) {
TRACED();
FFTEffect *ref = (FFTEffect *)fft.config().ref;
// execute effect
ref->effect(fft);
// write ifft to output
ref->processOutput();
}
void processOutput() {
TRACED();
while (copier.copy());
}
};
/**
* @brief Apply Robotize FFT Effect on frequency domain data. See
* https://learn.bela.io/tutorials/c-plus-plus-for-real-time-audio-programming/phase-vocoder-part-3/
* @ingroup transform
* @author phil schatzmann
*/
class FFTRobotize : public FFTEffect {
friend FFTEffect;
public:
FFTRobotize(AudioStream &out) : FFTEffect(out) { addNotifyAudioChange(out); };
FFTRobotize(AudioOutput &out) : FFTEffect(out) { addNotifyAudioChange(out); };
FFTRobotize(Print &out) : FFTEffect(out) {};
protected:
/// Robotise the output
void effect(AudioFFTBase &fft) {
TRACED();
AudioFFTResult best = fft.result();
FFTBin bin;
for (int n = 0; n < fft.size(); n++) {
float amplitude = fft.magnitude(n);
// update new bin value
bin.real = amplitude / best.magnitude;
bin.img = 0.0;
Serial.println(bin.real);
fft.setBin(n, bin);
}
}
};
/**
* @brief Apply Robotize FFT Effect on frequency domain data. See
* https://learn.bela.io/tutorials/c-plus-plus-for-real-time-audio-programming/phase-vocoder-part-3/
* @ingroup transform
* @author phil schatzmann
*/
class FFTWhisper : public FFTEffect {
friend FFTEffect;
public:
FFTWhisper(AudioStream &out) : FFTEffect(out) { addNotifyAudioChange(out); };
FFTWhisper(AudioOutput &out) : FFTEffect(out) { addNotifyAudioChange(out); };
FFTWhisper(Print &out) : FFTEffect(out) {};
protected:
/// Robotise the output
void effect(AudioFFTBase &fft) {
TRACED();
FFTBin bin;
for (int n = 0; n < fft.size(); n++) {
float amplitude = fft.magnitude(n);
float phase = rand() / (float)RAND_MAX * 2.f * PI;
// update new bin value
bin.real = cosf(phase) * amplitude;
bin.img = sinf(phase) * amplitude;
fft.setBin(n, bin);
}
}
};
/**
* @brief Apply FFT and IFFT w/o any changes to the frequency domain
* @ingroup transform
* @author phil schatzmann
*/
class FFTNop : public FFTEffect {
friend FFTEffect;
public:
FFTNop(AudioStream &out) : FFTEffect(out) { addNotifyAudioChange(out); };
FFTNop(AudioOutput &out) : FFTEffect(out) { addNotifyAudioChange(out); };
FFTNop(Print &out) : FFTEffect(out) {};
protected:
/// Do nothing
void effect(AudioFFTBase &fft) {}
};
/**
* @brief Pitch Shift FFT Effect Configuration
* @ingroup transform
* @author phil schatzmann
*/
struct FFTPitchShiftConfig : public FFTEffectConfig {
int shift = 1;
};
/**
* @brief Apply Pitch Shift FFT Effect on frequency domain data: we just move
* the bins up or down
* @ingroup transform
* @author phil schatzmann
*/
class FFTPitchShift : public FFTEffect {
friend FFTEffect;
public:
FFTPitchShift(AudioStream &out) : FFTEffect(out) {
addNotifyAudioChange(out);
};
FFTPitchShift(AudioOutput &out) : FFTEffect(out) {
addNotifyAudioChange(out);
};
FFTPitchShift(Print &out) : FFTEffect(out) {};
FFTPitchShiftConfig defaultConfig() {
FFTPitchShiftConfig result;
result.shift = shift;
return result;
}
bool begin(FFTPitchShiftConfig psConfig) {
setShift(psConfig.shift);
FFTEffect::begin(psConfig);
return begin();
}
bool begin() override {
bool rc = FFTEffect::begin();
// you can not shift more then you have bins
assert(abs(shift) < fft.size());
return rc;
}
/// defines how many bins should be shifted up (>0) or down (<0);
void setShift(int bins) { shift = bins; }
protected:
int shift = 1;
/// Pitch Shift
void effect(AudioFFTBase &fft) override {
TRACED();
FFTBin bin;
int max = fft.size();
if (shift < 0) {
// copy bins: left shift
for (int n = -shift; n < max; n++) {
int to_bin = n + shift;
assert(to_bin >= 0);
assert(to_bin < max);
fft.getBin(n, bin);
fft.setBin(to_bin, bin);
}
// clear tail
bin.clear();
for (int n = max + shift; n < max; n++) {
fft.setBin(n, bin);
}
} else if (shift > 0) {
// copy bins: right shift
for (int n = max - shift - 1; n >= 0; n--) {
int to_bin = n + shift;
assert(to_bin >= 0);
assert(to_bin < max);
fft.getBin(n, bin);
fft.setBin(to_bin, bin);
}
// clear head
bin.clear();
for (int n = 0; n < shift; n++) {
fft.setBin(n, bin);
}
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,3 @@
#pragma once
#WARNING("Obsolete - use /AudioTools/Communication/HLSStream.h")
#include "AudioTools/Communication/HLSStream.h"

View File

@@ -0,0 +1,387 @@
#pragma once
#include "AudioBoard.h" // install audio-driver library
#include "AudioToolsConfig.h"
#include "AudioTools/CoreAudio/AudioI2S/I2SStream.h"
//#pragma GCC diagnostic ignored "-Wclass-memaccess"
// Added to be compatible with the AudioKitStream.h
#ifndef PIN_AUDIO_KIT_SD_CARD_CS
#define PIN_AUDIO_KIT_SD_CARD_CS 13
#define PIN_AUDIO_KIT_SD_CARD_MISO 2
#define PIN_AUDIO_KIT_SD_CARD_MOSI 15
#define PIN_AUDIO_KIT_SD_CARD_CLK 14
#endif
namespace audio_tools {
/**
* @brief Configuration for I2SCodecStream
* @ingroup io
* @author Phil Schatzmann
* @copyright GPLv3
*/
struct I2SCodecConfig : public I2SConfig {
input_device_t input_device = ADC_INPUT_LINE1;
output_device_t output_device = DAC_OUTPUT_ALL;
// to be compatible with the AudioKitStream -> do not activate SD spi if false
bool sd_active = true;
// define pin source in driver configuration
PinFunction i2s_function = PinFunction::UNDEFINED; //CODEC;
bool operator==(I2SCodecConfig alt) {
return input_device == alt.input_device &&
output_device == alt.output_device && *((AudioInfo *)this) == alt;
}
bool operator!=(I2SCodecConfig alt) { return !(*this == alt); }
};
/**
* @brief I2S Stream which also sets up a codec chip and i2s
* @ingroup io
* @author Phil Schatzmann
* @copyright GPLv3
*/
class I2SCodecStream : public AudioStream, public VolumeSupport {
public:
/// Default Constructor (w/o codec)
I2SCodecStream() = default;
/**
* @brief Default constructor: for available AudioBoard values check
* audioboard variables in
* https://pschatzmann.github.io/arduino-audio-driver/html/group__audio__driver.html
* Further information can be found in
* https://github.com/pschatzmann/arduino-audio-driver/wiki
*/
I2SCodecStream(AudioBoard &board) { setBoard(board); }
/// Provide board via pointer
I2SCodecStream(AudioBoard *board) { setBoard(board); }
/// Provides the default configuration
I2SCodecConfig defaultConfig(RxTxMode mode = TX_MODE) {
auto cfg1 = i2s.defaultConfig(mode);
I2SCodecConfig cfg;
memcpy(&cfg, &cfg1, sizeof(cfg1));
cfg.input_device = ADC_INPUT_LINE1;
cfg.output_device = DAC_OUTPUT_ALL;
cfg.sd_active = true;
cfg.rx_tx_mode = mode;
return cfg;
}
bool begin() {
TRACED();
return begin(cfg);
}
/// Starts the I2S interface
virtual bool begin(I2SCodecConfig cfg) {
TRACED();
this->cfg = cfg;
this->info = cfg;
return begin1();
}
/// Stops the I2S interface
void end() {
TRACED();
if (p_board) p_board->end();
i2s.end();
is_active = false;
}
/// updates the sample rate dynamically
virtual void setAudioInfo(AudioInfo info) {
TRACEI();
AudioStream::setAudioInfo(info);
i2s.setAudioInfo(info);
cfg.sample_rate = info.sample_rate;
cfg.bits_per_sample = info.bits_per_sample;
cfg.channels = info.channels;
// update codec_cfg
codec_cfg.i2s.bits = toCodecBits(cfg.bits_per_sample);
codec_cfg.i2s.rate = toRate(cfg.sample_rate);
// return if we we are not ready
if (!is_active || p_board == nullptr) {
return;
}
// return if there is nothing to do
if (cfg.sample_rate == info.sample_rate &&
cfg.bits_per_sample == info.bits_per_sample &&
cfg.channels == info.channels) {
return;
}
// update cfg
p_board->setConfig(codec_cfg);
}
/// Writes the audio data to I2S
virtual size_t write(const uint8_t *data, size_t len) {
LOGD("I2SStream::write: %d", len);
return i2s.write(data, len);
}
/// Reads the audio data
virtual size_t readBytes(uint8_t *data, size_t len) override {
return i2s.readBytes(data, len);
}
/// Provides the available audio data
virtual int available() override { return i2s.available(); }
/// Provides the available audio data
virtual int availableForWrite() override { return i2s.availableForWrite(); }
/// sets the volume (range 0.0f - 1.0f)
bool setVolume(float vol) override {
VolumeSupport::setVolume(vol);
if (!is_active || p_board == nullptr) return false;
return p_board->setVolume(vol * 100.0);
}
/// Provides the actual volume (0.0f - 1.0f)
float volume() override {
if (p_board == nullptr) return 0.0f;
return static_cast<float>(p_board->getVolume()) / 100.0f;
}
/// legacy: same as volume()
float getVolume() { return volume(); }
/// Mute / unmote
bool setMute(bool mute) {
if (p_board == nullptr) return false;
return p_board->setMute(mute);
}
/// Mute / unmute of an individual line (codec)
bool setMute(bool mute, int line) {
if (p_board == nullptr) return false;
return p_board->setMute(mute, line);
}
/// Sets the output of the PA Power Pin
bool setPAPower(bool active) {
if (p_board == nullptr) return false;
return p_board->setPAPower(active);
}
/// Sets the volume of the microphone (if available)
bool setInputVolume(float vol){
if (!is_active || p_board == nullptr) return false;
return p_board->setInputVolume(100.0 * vol);
}
/// Provides the board
AudioBoard &board() { return *p_board; }
/// (re)defines the board
void setBoard(AudioBoard &board) { p_board = &board; }
/// (re)defines the board
void setBoard(AudioBoard *board) { p_board = board; }
/// checks if a board has been defined
bool hasBoard() { return p_board != nullptr; }
/// Provides the gpio for the indicated function
GpioPin getPinID(PinFunction function) {
if (p_board == nullptr) return -1;
return p_board->getPins().getPinID(function);
}
/// Provides the gpio for the indicated function
GpioPin getPinID(PinFunction function, int pos) {
if (p_board == nullptr) return -1;
return p_board->getPins().getPinID(function, pos);
}
/// Provides the gpio for the indicated key pos
GpioPin getKey(int pos) { return getPinID(PinFunction::KEY, pos); }
/// Provides access to the pin information
DriverPins &getPins() { return p_board->getPins(); }
/// Provides the i2s driver
I2SDriver *driver() { return i2s.driver(); }
protected:
I2SStream i2s;
I2SCodecConfig cfg;
CodecConfig codec_cfg;
AudioBoard *p_board = nullptr;
bool is_active = false;
bool begin1() {
TRACED();
setupI2SFunction();
setupI2SPins();
if (!beginCodec(cfg)) {
TRACEE();
is_active = false;
return false;
}
is_active = i2s.begin(cfg);
// if setvolume was called before begin
float tobeVol = VolumeSupport::volume();
if (is_active && tobeVol >= 0.0f) {
setVolume(tobeVol);
}
return is_active;
}
/// if the cfg.i2s_function was not defined we determine the "correct" default value
void setupI2SFunction() {
if (cfg.i2s_function == PinFunction::UNDEFINED){
if (cfg.rx_tx_mode == RX_MODE){
auto i2s = p_board->getPins().getI2SPins(PinFunction::CODEC_ADC);
if (i2s){
cfg.i2s_function = PinFunction::CODEC_ADC;
LOGI("using i2s_function: CODEC_ADC");
} else {
cfg.i2s_function = PinFunction::CODEC;
}
} else {
cfg.i2s_function = PinFunction::CODEC;
}
}
}
/// We use the board pins if they are available
void setupI2SPins() {
TRACED();
// determine relevant I2S pins from driver configuration
auto i2s = getI2SPins();
if (i2s) {
// determine i2s pins from board definition
PinsI2S i2s_pins = i2s.value();
cfg.pin_bck = i2s_pins.bck;
cfg.pin_mck = i2s_pins.mclk;
cfg.pin_ws = i2s_pins.ws;
switch (cfg.rx_tx_mode) {
case RX_MODE:
cfg.pin_data = i2s_pins.data_in;
break;
case TX_MODE:
cfg.pin_data = i2s_pins.data_out;
break;
default:
cfg.pin_data = i2s_pins.data_out;
cfg.pin_data_rx = i2s_pins.data_in;
break;
}
}
}
audio_driver_local::Optional<PinsI2S> getI2SPins(){
TRACED();
audio_driver_local::Optional<PinsI2S> i2s;
// Deterine I2S pins
return p_board->getPins().getI2SPins(cfg.i2s_function);
}
bool beginCodec(I2SCodecConfig info) {
TRACED();
switch (cfg.rx_tx_mode) {
case RX_MODE:
codec_cfg.input_device = info.input_device;
codec_cfg.output_device = DAC_OUTPUT_NONE;
break;
case TX_MODE:
codec_cfg.output_device = info.output_device;
codec_cfg.input_device = ADC_INPUT_NONE;
break;
default:
codec_cfg.input_device = info.input_device;
codec_cfg.output_device = info.output_device;
break;
}
codec_cfg.sd_active = info.sd_active;
LOGD("input: %d", info.input_device);
LOGD("output: %d", info.output_device);
codec_cfg.i2s.bits = toCodecBits(info.bits_per_sample);
codec_cfg.i2s.rate = toRate(info.sample_rate);
codec_cfg.i2s.fmt = toFormat(info.i2s_format);
codec_cfg.i2s.signal_type = (signal_t) info.signal_type;
// use reverse logic for codec setting
codec_cfg.i2s.mode = info.is_master ? MODE_SLAVE : MODE_MASTER;
if (p_board == nullptr) return false;
// setup driver only on changes
return p_board->begin(codec_cfg);
}
sample_bits_t toCodecBits(int bits) {
switch (bits) {
case 16:
LOGD("BIT_LENGTH_16BITS");
return BIT_LENGTH_16BITS;
case 24:
LOGD("BIT_LENGTH_24BITS");
return BIT_LENGTH_24BITS;
case 32:
LOGD("BIT_LENGTH_32BITS");
return BIT_LENGTH_32BITS;
}
LOGE("Unsupported bits: %d", bits);
return BIT_LENGTH_16BITS;
}
samplerate_t toRate(int rate) {
if (rate <= 8000) {
LOGD("RATE_8K");
return RATE_8K;
}
if (rate <= 11000) {
LOGD("RATE_11K");
return RATE_11K;
}
if (rate <= 16000) {
LOGD("RATE_16K");
return RATE_16K;
}
if (rate <= 22050) {
LOGD("RATE_22K");
return RATE_22K;
}
if (rate <= 32000) {
LOGD("RATE_32K");
return RATE_32K;
}
if (rate <= 44100) {
LOGD("RATE_44K");
return RATE_44K;
}
if (rate <= 48000 || rate > 48000) {
LOGD("RATE_48K");
return RATE_44K;
}
LOGE("Invalid rate: %d using 44K", rate);
return RATE_44K;
}
i2s_format_t toFormat(I2SFormat fmt) {
switch (fmt) {
case I2S_PHILIPS_FORMAT:
case I2S_STD_FORMAT:
LOGD("I2S_NORMAL");
return I2S_NORMAL;
case I2S_LEFT_JUSTIFIED_FORMAT:
case I2S_MSB_FORMAT:
LOGD("I2S_LEFT");
return I2S_LEFT;
case I2S_RIGHT_JUSTIFIED_FORMAT:
case I2S_LSB_FORMAT:
LOGD("I2S_RIGHT");
return I2S_RIGHT;
case I2S_PCM:
LOGD("I2S_DSP");
return I2S_DSP;
default:
LOGE("unsupported mode");
return I2S_NORMAL;
}
}
};
} // namespace audio_tools

View File

@@ -0,0 +1,4 @@
#pragma once
#include "Desktop/Time.h"
#include "Desktop/JupyterAudio.h"
#include "Desktop/File.h"

View File

@@ -0,0 +1,282 @@
#pragma once
#include <FastLED.h>
#include "AudioTools/CoreAudio/AudioBasic/Collections/Vector.h"
#include "AudioTools/AudioLibs/AudioFFT.h"
#include "FFTDisplay.h"
namespace audio_tools {
class LEDOutput;
struct LEDOutputConfig;
// default callback function which implements led update
void fftLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix);
// led update for volume
void volumeLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix);
// default color
CHSV getDefaultColor(int x, int y, int magnitude);
/**
* LED Matrix Configuration. Provide the number of leds in x and y direction and
* the data pin.
* @author Phil Schatzmann
*/
struct LEDOutputConfig {
/// Number of leds in x direction
int x = 0;
/// Number of leds in y direction
int y = 1;
/// optinal custom logic to provide CHSV color: Prividing a 'rainbow' color
/// with hue 0-255, saturating 0-255, and brightness (value) 0-255 (v2)
CHSV (*color_callback)(int x, int y, int magnitude) = getDefaultColor;
/// Custom callback logic to update the LEDs - by default we use
/// fftLEDOutput()
void (*update_callback)(LEDOutputConfig *cfg, LEDOutput *matrix) = nullptr;
/// Update the leds only ever nth call
int update_frequency = 1; // update every call
bool is_serpentine_layout = true;
bool is_matrix_vertical = true;
/// Influences the senitivity
int max_magnitude = 700;
};
/**
* @brief LED output using the FastLED library.
* @author Phil Schatzmann
*/
class LEDOutput {
public:
/// @brief Default Constructor
LEDOutput() = default;
/// @brief Constructor for FFT scenario
/// @param fft
LEDOutput(FFTDisplay &fft) {
p_fft = &fft;
cfg.update_callback = fftLEDOutput;
}
/// @brief Constructor for VolumeMeter scenario
/// @param vol
LEDOutput(VolumeMeter &vol) {
p_vol = &vol;
cfg.update_callback = volumeLEDOutput;
}
/// Provides the default config object
LEDOutputConfig defaultConfig() { return cfg; }
/// Setup Led matrix
bool begin(LEDOutputConfig config) {
cfg = config;
if (ledCount() == 0) {
LOGE("x or y == 0");
return false;
}
// allocate leds
leds.resize(ledCount());
for (int j = 0; j < ledCount(); j++) {
led(j) = CRGB::Black;
}
// clear LED
FastLED.clear(); // clear all pixel data
if (p_fft != nullptr) {
p_fft->begin();
}
max_column = -1;
return true;
}
/// Provides the number of LEDs: call begin() first!
int ledCount() {
int num_leds = cfg.x * cfg.y;
return num_leds;
}
/// Provides the address fo the CRGB array: call begin() first!
CRGB *ledData() {
if (ledCount() == 0) {
LOGE("x or y == 0");
return nullptr;
}
// leds.resize(ledCount());
return leds.data();
}
/// Updates the display: call this method in your loop
virtual void update() {
if (cfg.update_callback != nullptr && count++ % cfg.update_frequency == 0) {
// use custom update logic defined in config
cfg.update_callback(&cfg, this);
} else {
display();
}
}
/// Determine the led with the help of the x and y pos
CRGB &ledXY(uint8_t x, uint8_t y) {
if (x > cfg.x) x = cfg.x - 1;
if (x < 0) x = 0;
if (y > cfg.y) y = cfg.y - 1;
if (y < 0) y = 0;
int index = xy(x, y);
return leds[index];
}
/// Determine the led with the help of the index pos
CRGB &led(uint8_t index) {
if (index > cfg.x * cfg.y) return not_valid;
return leds[index];
}
/// Update the indicated column with the indicated bar
void setColumnBar(int x, int currY) {
// update vertical bar
for (uint8_t y = 0; y < currY; y++) {
// determine color
CHSV color = cfg.color_callback(x, y, currY);
// update LED
ledXY(x, y) = color;
}
for (uint8_t y = currY; y < cfg.y; y++) {
ledXY(x, y) = CRGB::Black;
}
if (x > max_column) max_column = x;
}
/// Update the last column with the indicated bar
void setColumnBar(int currY) { setColumnBar(cfg.x - 1, currY); }
/// Update the last column with the indicated bar
void addColumnBar(int currY) {
max_column++;
if (max_column >= cfg.x) {
addEmptyColumn();
}
if (max_column > cfg.x - 1) {
max_column = cfg.x - 1;
}
setColumnBar(max_column, currY);
}
/// Provides access to the actual config object. E.g. to change the update logic
LEDOutputConfig &config() { return cfg; }
///Provodes the max magnitude for both the
virtual float getMaxMagnitude() {
// get magnitude from
if (p_vol != nullptr) {
return p_vol->volume();
}
float max = 0;
if (p_fft != nullptr) {
for (int j = 0; j < cfg.x; j++) {
float value = p_fft->getMagnitude(j);
if (value > max) {
max = value;
}
}
}
return max;
}
/// Update the led_matrix (calling FastLED.show();
void display() {
FastLED.show();
}
/// Provides acces to the FFTDisplay object
FFTDisplay &fftDisplay() { return *p_fft; }
protected:
friend class AudioFFTBase;
CRGB not_valid;
Vector<CRGB> leds{0};
LEDOutputConfig cfg;
VolumeMeter *p_vol = nullptr;
FFTDisplay *p_fft = nullptr;
uint64_t count = 0;
int max_column = -1;
/// Adds an empty column to the end shifting the content to the left
void addEmptyColumn() {
for (int x = 1; x < cfg.x; x++) {
for (int y = 0; y < cfg.y; y++) {
ledXY(x - 1, y) = ledXY(x, y);
}
}
for (int y = 0; y < cfg.y; y++) {
ledXY(cfg.x - 1, y) = CRGB::Black;
}
}
uint16_t xy(uint8_t x, uint8_t y) {
uint16_t i;
if (cfg.is_serpentine_layout == false) {
if (cfg.is_matrix_vertical == false) {
i = (y * cfg.x) + x;
} else {
i = cfg.y * (cfg.x - (x + 1)) + y;
}
}
if (cfg.is_serpentine_layout == true) {
if (cfg.is_matrix_vertical == false) {
if (y & 0x01) {
// Odd rows run backwards
uint8_t reverseX = (cfg.x - 1) - x;
i = (y * cfg.x) + reverseX;
} else {
// Even rows run forwards
i = (y * cfg.x) + x;
}
} else { // vertical positioning
if (x & 0x01) {
i = cfg.y * (cfg.x - (x + 1)) + y;
} else {
i = cfg.y * (cfg.x - x) - (y + 1);
}
}
}
return i;
}
};
/// Default update implementation which provides the fft result as "barchart"
void fftLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix) {
// process horizontal
LockGuard guard(fft_mux);
for (int x = 0; x < cfg->x; x++) {
// max y determined by magnitude
int currY = matrix->fftDisplay().getMagnitudeScaled(x, cfg->y);
LOGD("x: %d, y: %d", x, currY);
matrix->setColumnBar(x, currY);
}
FastLED.show();
}
/// Default update implementation which provides the fft result as "barchart"
void volumeLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix) {
float vol = matrix->getMaxMagnitude();
int currY = mapT<float>(vol, 0,
cfg->max_magnitude, 0.0f,
static_cast<float>(cfg->y));
matrix->addColumnBar(currY);
FastLED.show();
}
/// Default logic to update the color for the indicated x,y position
CHSV getDefaultColor(int x, int y, int magnitude) {
int color = map(magnitude, 0, 7, 255, 0);
return CHSV(color, 255, 100); // blue CHSV(160, 255, 255
}
} // namespace audio_tools

View File

@@ -0,0 +1,193 @@
#pragma once
#include "Arduino_LED_Matrix.h"
#include "AudioTools/AudioLibs/AudioFFT.h"
#include "FFTDisplay.h"
namespace audio_tools {
class LEDOutputUnoR4;
struct LEDOutputUnoR4Config;
// default callback function which implements led update based on fft
void fftLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix);
// led update for volume
void volumeLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix);
/**
* LED Matrix Configuration. Provide the number of leds in x and y direction and
* the data pin.
* @author Phil Schatzmann
*/
struct LEDOutputUnoR4Config {
/// Custom callback logic to update the LEDs when update() is called
void (*update_callback)(LEDOutputUnoR4Config *cfg,
LEDOutputUnoR4 *matrix) = nullptr;
/// Update the leds only ever nth call
int update_frequency = 1; // update every call
/// Number of LEDs in a rows
int x = 12;
/// Number of LEDs in a column
int y = 8;
/// when true 0,0 is in the lower left corder
bool y_mirror = true;
/// Influences the senitivity
int max_magnitude = 700;
};
/**
* @brief LED output using the R4 LED matrix library.
* @ingroup io
* @author Phil Schatzmann
*/
class LEDOutputUnoR4 {
public:
/// @brief Default Constructor
LEDOutputUnoR4() = default;
/// @brief Constructor for FFT scenario
/// @param fft
LEDOutputUnoR4(FFTDisplay &fft) {
p_fft = &fft;
cfg.update_callback = fftLEDOutputUnoR4;
}
/// @brief Constructor for VolumeMeter scenario
/// @param vol
LEDOutputUnoR4(VolumeMeter &vol) {
p_vol = &vol;
cfg.update_callback = volumeLEDOutputUnoR4;
}
/// Provides the default config object
LEDOutputUnoR4Config defaultConfig() { return cfg; }
/// Starts the processing with the default configuration
bool begin() { return begin(defaultConfig()); }
/// Setup Led matrix
bool begin(LEDOutputUnoR4Config config) {
cfg = config;
frame.resize(cfg.x * cfg.y);
led_matrix.begin();
max_column = -1;
return true;
}
/// Updates the display by calling the update callback method: call this method in your loop
virtual void update() {
if (cfg.update_callback != nullptr && count++ % cfg.update_frequency == 0) {
// use custom update logic defined in config
cfg.update_callback(&cfg, this);
} else {
display();
}
}
/// Determine the led with the help of the x and y pos
bool &ledXY(uint8_t x, uint8_t y) {
if (cfg.y_mirror) y = cfg.y - y - 1;
return frame[x + (y * cfg.x)];
}
/// Provodes the max magnitude for the VolumeMeter and FFT scenario
virtual float getMaxMagnitude() {
// get magnitude from
if (p_vol != nullptr) {
return p_vol->volume();
}
float max = 0;
if (p_fft != nullptr) {
for (int j = 0; j < cfg.x; j++) {
float value = p_fft->getMagnitude(j);
if (value > max) {
max = value;
}
}
}
return max;
}
/// Update the indicated column with the indicated bar
void setColumnBar(int x, int currY) {
// update vertical bar
for (uint8_t y = 0; y < currY; y++) {
// update LED
ledXY(x, y) = true;
}
for (uint8_t y = currY; y < cfg.y; y++) {
ledXY(x, y) = false;
}
if (x > max_column) max_column = x;
}
/// Update the last column with the indicated bar
void addColumnBar(int currY) {
max_column++;
if (max_column >= cfg.x) {
addEmptyColumn();
}
if (max_column > cfg.x - 1) {
max_column = cfg.x - 1;
}
setColumnBar(max_column, currY);
}
/// Provides access to the actual config object. E.g. to change the update logic
LEDOutputUnoR4Config &config() { return cfg; }
/// Update the led_matrix
void display() {
led_matrix.loadPixels((uint8_t *)frame.data(), cfg.x * cfg.y);
}
/// Provides access to the FFTDisplay object
FFTDisplay& fftDisplay() {
return *p_fft;
}
protected:
friend class AudioFFTBase;
LEDOutputUnoR4Config cfg;
FFTDisplay *p_fft = nullptr;
VolumeMeter *p_vol = nullptr;
uint64_t count = 0;
ArduinoLEDMatrix led_matrix;
Vector<bool> frame{0};
int max_column = -1;
/// Adds an empty column to the end shifting the content to the left
void addEmptyColumn() {
for (int x = 1; x < cfg.x; x++) {
for (int y = 0; y < cfg.y; y++) {
ledXY(x - 1, y) = ledXY(x, y);
}
}
for (int y = 0; y < cfg.y; y++) {
ledXY(cfg.x - 1, y) = false;
}
}
};
/// Default update implementation which provides the fft result as "barchart"
void fftLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix) {
// process horizontal
for (int x = 0; x < cfg->x; x++) {
// max y determined by magnitude
int currY = matrix->fftDisplay().getMagnitudeScaled(x, cfg->y);
LOGD("x: %d, y: %d", x, currY);
matrix->setColumnBar(x, currY);
}
matrix->display();
}
/// Default update implementation which provides the fft result as "barchart"
void volumeLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix) {
float vol = matrix->getMaxMagnitude();
int currY = mapT<float>(vol, 0.0,
cfg->max_magnitude, 0.0f,
static_cast<float>(cfg->y));
matrix->addColumnBar(currY);
matrix->display();
}
} // namespace audio_tools

View File

@@ -0,0 +1,78 @@
#pragma once
#include "AudioToolsConfig.h"
#include "AudioTools/CoreAudio/AudioTypes.h"
#include "maximilian.h"
#include "libs/maxiClock.h"
// Maximilian play function - return an array of 2 channels
void play(maxi_float_t *channels);//run dac!
void play1(maxi_float_t *channels);//run dac!
void play2(maxi_float_t *channels);//run dac!
namespace audio_tools {
/**
* @brief AudioTools integration with Maximilian
* @ingroup dsp
*/
class Maximilian : public VolumeSupport {
public:
Maximilian(Print &out, int bufferSize=DEFAULT_BUFFER_SIZE, void (*callback)(maxi_float_t *channels)=play){
buffer_size = bufferSize;
p_sink = &out;
this->callback = callback;
}
~Maximilian() {
}
/// Setup Maximilian with audio parameters
void begin(AudioInfo cfg){
this->cfg = cfg;
buffer.resize(buffer_size);
maxiSettings::setup(cfg.sample_rate, cfg.channels, DEFAULT_BUFFER_SIZE);
}
/// Defines the volume. The values are between 0.0 and 1.0
bool setVolume(float f) override{
if (f>1.0f){
VolumeSupport::setVolume(1.0f);
return false;
}
if (f<0.0f){
VolumeSupport::setVolume(0.0f);
return false;
}
VolumeSupport::setVolume(f);
return true;
}
/// Copies the audio data from maximilian to the audio sink, Call this method from the Arduino Loop.
void copy() {
// fill buffer with data
maxi_float_t out[cfg.channels];
uint16_t samples = buffer_size / sizeof(uint16_t);
int16_t *p_samples = (int16_t *) buffer.data();
for (uint16_t j=0;j<samples;j+=cfg.channels){
callback(out);
// convert all channels to int16
for (int ch=0;ch<cfg.channels;ch++){
p_samples[j+ch] = volume() * out[ch] * 32767.0f;
}
}
// write buffer to audio sink
unsigned int result = p_sink->write(buffer.data(), buffer_size);
LOGI("bytes written %u", result)
}
protected:
Vector<uint8_t> buffer;
int buffer_size=256;
Print *p_sink=nullptr;
AudioInfo cfg;
void (*callback)(maxi_float_t *channels);
};
} // namespace

View File

@@ -0,0 +1,38 @@
#pragma once
#include "AudioLogger.h"
#ifdef ESP32
#include "esp_heap_caps.h"
#endif
namespace audio_tools {
/**
* @brief MemoryManager which activates the use of external SPIRAM memory.
* When external memory is in use, the allocation strategy is to initially try
* to satisfy smaller allocation requests with internal memory and larger requests
* with external memory. This sets the limit between the two, as well as generally
* enabling allocation in external memory.
* @ingroup memorymgmt
*/
class MemoryManager {
public:
/// Default Constructor - call begin() to activate PSRAM
MemoryManager() = default;
/// Constructor which activates PSRAM. This constructor automatically calls begin()
MemoryManager(int limit) {
begin(limit);
};
/// Activate the PSRAM for allocated memory > limit
bool begin(int limit = 10000) {
#ifdef ESP32
LOGI("Activate PSRAM from %d bytes", limit);
heap_caps_malloc_extmem_enable(limit);
return true;
#else
return false;
#endif
}
};
}

View File

@@ -0,0 +1,439 @@
#pragma once
/**
* @brief MiniAudio
* @author Phil Schatzmann
* @copyright GPLv3
*/
#include "AudioTools.h"
#include <mutex>
#include <atomic>
#define MINIAUDIO_IMPLEMENTATION
#include "miniaudio.h"
#define MA_BUFFER_COUNT 10
#define MA_BUFFER_SIZE 1200
#define MA_START_COUNT 2
#define MA_DELAY 10
namespace audio_tools {
/**
* @brief Configuration for MiniAudio
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MiniAudioConfig : public AudioInfo {
public:
MiniAudioConfig() {
sample_rate = 44100;
channels = 2;
bits_per_sample = 16;
};
MiniAudioConfig(const MiniAudioConfig &) = default;
MiniAudioConfig(const AudioInfo &in) {
sample_rate = in.sample_rate;
channels = in.channels;
bits_per_sample = in.bits_per_sample;
}
bool is_input = false;
bool is_output = true;
int delay_ms_if_buffer_full = MA_DELAY;
int buffer_size = MA_BUFFER_SIZE;
int buffer_count = MA_BUFFER_COUNT;
int buffer_start_count = MA_START_COUNT;
bool auto_restart_on_underrun = true; // Automatically restart after buffer underrun
int underrun_tolerance = 5; // Number of empty reads before stopping playback
};
/**
* @brief MiniAudio: https://miniaud.io/
* @ingroup io
* @author Phil Schatzmann
* @copyright GPLv3
*/
class MiniAudioStream : public AudioStream {
public:
MiniAudioStream() = default;
~MiniAudioStream() { end(); };
MiniAudioConfig defaultConfig(RxTxMode mode = RXTX_MODE) {
MiniAudioConfig info;
info.sample_rate = 44100;
info.channels = 2;
info.bits_per_sample = 16;
switch (mode) {
case RX_MODE:
info.is_input = true;
info.is_output = false;
break;
case TX_MODE:
info.is_input = false;
info.is_output = true;
break;
case RXTX_MODE:
info.is_input = true;
info.is_output = true;
break;
default:
info.is_input = false;
info.is_output = false;
break;
}
return info;
}
void setAudioInfo(AudioInfo in) override {
AudioStream::setAudioInfo(in);
if (in.sample_rate != config.sample_rate ||
in.channels != config.channels ||
in.bits_per_sample != config.bits_per_sample) {
config.copyFrom(in);
if (is_active.load()) {
is_active.store(false);
is_playing.store(false);
// This will stop the device so no need to do that manually.
ma_device_uninit(&device_ma);
begin();
}
}
}
bool begin(MiniAudioConfig info) {
AudioStream::setAudioInfo(info);
this->config = info;
return begin();
}
bool begin() override {
TRACEI();
setupBuffers(config.buffer_size);
if (config.is_output && !config.is_input)
config_ma = ma_device_config_init(ma_device_type_playback);
else if (!config.is_output && config.is_input)
config_ma = ma_device_config_init(ma_device_type_capture);
else if (config.is_output && config.is_input)
config_ma = ma_device_config_init(ma_device_type_duplex);
else if (!config.is_output && !config.is_input)
config_ma = ma_device_config_init(ma_device_type_loopback);
config_ma.pUserData = this;
config_ma.playback.channels = config.channels;
config_ma.sampleRate = config.sample_rate;
config_ma.dataCallback = data_callback;
switch (config.bits_per_sample) {
case 8:
config_ma.playback.format = ma_format_u8;
break;
case 16:
config_ma.playback.format = ma_format_s16;
break;
case 24:
config_ma.playback.format = ma_format_s24;
break;
case 32:
config_ma.playback.format = ma_format_s32;
break;
default:
LOGE("Invalid format");
return false;
}
if (ma_device_init(NULL, &config_ma, &device_ma) != MA_SUCCESS) {
// Failed to initialize the device.
return false;
}
// The device is sleeping by default so you'll need to start it manually.
if (ma_device_start(&device_ma) != MA_SUCCESS) {
// Failed to initialize the device.
ma_device_uninit(&device_ma);
return false;
}
is_active.store(true);
return is_active.load();
}
void end() override {
is_active.store(false);
is_playing.store(false);
// This will stop the device so no need to do that manually.
ma_device_uninit(&device_ma);
// release buffer memory
buffer_in.resize(0);
buffer_out.resize(0);
is_buffers_setup.store(false);
}
int availableForWrite() override {
return buffer_out.size() == 0 ? 0 : DEFAULT_BUFFER_SIZE;
}
size_t write(const uint8_t *data, size_t len) override {
// Input validation
if (!data || len == 0) {
LOGW("Invalid write parameters: data=%p, len=%zu", data, len);
return 0;
}
if (buffer_out.size() == 0) {
LOGW("Output buffer not initialized");
return 0;
}
if (!is_active.load()) {
LOGW("Stream not active");
return 0;
}
LOGD("write: %zu", len);
// write data to buffer
int open = len;
int written = 0;
int retry_count = 0;
const int max_retries = 1000; // Prevent infinite loops
while (open > 0 && retry_count < max_retries) {
size_t result = 0;
{
std::lock_guard<std::mutex> guard(write_mtx);
result = buffer_out.writeArray(data + written, open);
open -= result;
written += result;
}
if (result == 0) {
retry_count++;
doWait();
} else {
retry_count = 0; // Reset on successful write
}
}
if (retry_count >= max_retries) {
LOGE("Write timeout after %d retries, written %d of %zu bytes", max_retries, written, len);
}
// activate playing
// if (!is_playing && buffer_out.bufferCountFilled()>=MA_START_COUNT) {
int current_buffer_size = buffer_size.load();
bool should_start_playing = false;
// Start playing if we have enough data and either:
// 1. We're not playing yet, or
// 2. We stopped due to buffer underrun but now have data again
if (current_buffer_size > 0) {
int available_data = buffer_out.available();
int threshold = config.buffer_start_count * current_buffer_size;
if (!is_playing.load() && available_data >= threshold) {
should_start_playing = true;
} else if (is_playing.load() && available_data == 0) {
// Stop playing if buffer is completely empty (helps with long delays)
LOGW("Buffer empty, pausing playback");
is_playing.store(false);
}
}
if (should_start_playing) {
LOGI("starting audio playback");
is_playing.store(true);
}
// std::this_thread::yield();
return written;
}
int available() override {
return buffer_in.size() == 0 ? 0 : buffer_in.available();
}
size_t readBytes(uint8_t *data, size_t len) override {
if (!data || len == 0) {
LOGW("Invalid read parameters: data=%p, len=%zu", data, len);
return 0;
}
if (buffer_in.size() == 0) {
LOGW("Input buffer not initialized");
return 0;
}
if (!is_active.load()) {
LOGW("Stream not active");
return 0;
}
LOGD("read: %zu", len);
std::lock_guard<std::mutex> guard(read_mtx);
return buffer_in.readArray(data, len);
}
/// Manually restart playback (useful after long delays)
void restartPlayback() {
if (!is_active.load()) {
LOGW("Cannot restart playback - stream not active");
return;
}
int current_buffer_size = buffer_size.load();
if (current_buffer_size > 0 && buffer_out.available() > 0) {
LOGI("Manually restarting playback");
is_playing.store(true);
} else {
LOGW("Cannot restart playback - no data available");
}
}
/// Check if playback is currently active
bool isPlaying() const {
return is_playing.load();
}
protected:
MiniAudioConfig config;
ma_device_config config_ma;
ma_device device_ma;
std::atomic<bool> is_playing{false};
std::atomic<bool> is_active{false};
std::atomic<bool> is_buffers_setup{false};
RingBuffer<uint8_t> buffer_out{0};
RingBuffer<uint8_t> buffer_in{0};
std::mutex write_mtx;
std::mutex read_mtx;
std::atomic<int> buffer_size{0};
// In playback mode copy data to pOutput. In capture mode read data from
// pInput. In full-duplex mode, both pOutput and pInput will be valid and
// you can move data from pInput into pOutput. Never process more than
// frameCount frames.
void setupBuffers(int size = MA_BUFFER_SIZE) {
std::lock_guard<std::mutex> guard(write_mtx);
if (is_buffers_setup.load()) return;
// Validate buffer size
if (size <= 0 || size > 1024 * 1024) { // Max 1MB per buffer chunk
LOGE("Invalid buffer size: %d", size);
return;
}
buffer_size.store(size);
int buffer_count = config.buffer_count;
// Validate total buffer size to prevent excessive memory allocation
size_t total_size = static_cast<size_t>(size) * buffer_count;
if (total_size > 100 * 1024 * 1024) { // Max 100MB total
LOGE("Buffer size too large: %zu bytes", total_size);
return;
}
LOGI("setupBuffers: %d * %d = %zu bytes", size, buffer_count, total_size);
if (buffer_out.size() == 0 && config.is_output) {
if (!buffer_out.resize(size * buffer_count)) {
LOGE("Failed to resize output buffer");
return;
}
}
if (buffer_in.size() == 0 && config.is_input) {
if (!buffer_in.resize(size * buffer_count)) {
LOGE("Failed to resize input buffer");
return;
}
}
is_buffers_setup.store(true);
}
void doWait() {
//std::this_thread::yield();
delay(config.delay_ms_if_buffer_full);
//std::this_thread::sleep_for (std::chrono::milliseconds(MA_DELAY));
}
static void data_callback(ma_device *pDevice, void *pOutput,
const void *pInput, ma_uint32 frameCount) {
MiniAudioStream *self = (MiniAudioStream *)pDevice->pUserData;
if (!self || !self->is_active.load()) {
return; // Safety check
}
AudioInfo cfg = self->audioInfo();
if (cfg.channels == 0 || cfg.bits_per_sample == 0) {
LOGE("Invalid audio configuration in callback");
return;
}
int bytes = frameCount * cfg.channels * cfg.bits_per_sample / 8;
if (bytes <= 0 || bytes > 1024 * 1024) { // Sanity check
LOGE("Invalid byte count in callback: %d", bytes);
return;
}
self->setupBuffers(bytes);
if (pInput && self->buffer_in.size() > 0) {
int open = bytes;
int processed = 0;
int retry_count = 0;
const int max_retries = 100;
while (open > 0 && retry_count < max_retries && self->is_active.load()) {
int len = 0;
{
std::unique_lock<std::mutex> guard(self->read_mtx);
len = self->buffer_in.writeArray((uint8_t *)pInput + processed, open);
open -= len;
processed += len;
}
if (len == 0) {
retry_count++;
self->doWait();
} else {
retry_count = 0;
}
}
}
if (pOutput) {
memset(pOutput, 0, bytes);
if (self->is_playing.load() && self->buffer_out.size() > 0) {
int open = bytes;
int processed = 0;
int consecutive_failures = 0;
const int max_failures = self->config.underrun_tolerance;
while (open > 0 && self->is_active.load()) {
size_t len = 0;
{
std::lock_guard<std::mutex> guard(self->write_mtx);
len = self->buffer_out.readArray((uint8_t *)pOutput + processed, open);
open -= len;
processed += len;
}
if (len == 0) {
consecutive_failures++;
// If we can't get data for too long, stop playing to prevent issues
if (consecutive_failures >= max_failures && self->config.auto_restart_on_underrun) {
LOGW("Buffer underrun detected, stopping playback");
self->is_playing.store(false);
break;
}
// Don't wait in callback for too long - just output silence
break;
} else {
consecutive_failures = 0;
}
}
}
}
}
};
} // namespace audio_tools

Some files were not shown because too many files have changed in this diff Show More