snapshot
This commit is contained in:
43
libraries/audio-tools/src/AudioTools/AudioCodecs/All.h
Normal file
43
libraries/audio-tools/src/AudioTools/AudioCodecs/All.h
Normal file
@@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
|
||||
// All codecs, so that we can find any compile errors easily
|
||||
// This only works, when you have all codecs installed!
|
||||
#include "AudioTools/AudioCodecs/AudioEncoded.h"
|
||||
#include "AudioTools/AudioCodecs/ContainerOgg.h"
|
||||
#include "AudioTools/AudioCodecs/CodecOpus.h"
|
||||
#include "AudioTools/AudioCodecs/CodecOpusOgg.h"
|
||||
#include "AudioTools/AudioCodecs/CodecFLAC.h"
|
||||
#include "AudioTools/AudioCodecs/CodecVorbis.h"
|
||||
#include "AudioTools/AudioCodecs/CodecADPCM.h"
|
||||
//#include "AudioTools/AudioCodecs/CodecCodec2.h"
|
||||
#include "AudioTools/AudioCodecs/CodecGSM.h"
|
||||
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
|
||||
#include "AudioTools/AudioCodecs/CodecAACHelix.h"
|
||||
#include "AudioTools/AudioCodecs/ContainerBinary.h"
|
||||
#include "AudioTools/AudioCodecs/CodecADPCMXQ.h"
|
||||
#include "AudioTools/AudioCodecs/CodecCopy.h"
|
||||
#include "AudioTools/AudioCodecs/CodecHelix.h"
|
||||
#include "AudioTools/AudioCodecs/CodecMP3LAME.h"
|
||||
#include "AudioTools/AudioCodecs/CodecSBC.h"
|
||||
#include "AudioTools/AudioCodecs/ContainerM4A.h"
|
||||
#include "AudioTools/AudioCodecs/AudioFormat.h"
|
||||
#include "AudioTools/AudioCodecs/CodecADTS.h"
|
||||
#include "AudioTools/AudioCodecs/CodecILBC.h"
|
||||
#include "AudioTools/AudioCodecs/CodecMP3MAD.h"
|
||||
#include "AudioTools/AudioCodecs/CodecAACFAAD.h"
|
||||
#include "AudioTools/AudioCodecs/CodecAPTX.h"
|
||||
#include "AudioTools/AudioCodecs/CodecFloat.h"
|
||||
#include "AudioTools/AudioCodecs/CodecL16.h"
|
||||
#include "AudioTools/AudioCodecs/CodecWAV.h"
|
||||
#include "AudioTools/AudioCodecs/CodecAACFDK.h"
|
||||
#include "AudioTools/AudioCodecs/CodecBase64.h"
|
||||
#include "AudioTools/AudioCodecs/CodecG722.h"
|
||||
#include "AudioTools/AudioCodecs/CodecL8.h"
|
||||
#include "AudioTools/AudioCodecs/CodecMTS.h"
|
||||
#include "AudioTools/AudioCodecs/CodecWavIMA.h"
|
||||
#include "AudioTools/AudioCodecs/CodecBasic.h"
|
||||
#include "AudioTools/AudioCodecs/CodecG7xx.h"
|
||||
#include "AudioTools/AudioCodecs/CodecLC3.h"
|
||||
#include "AudioTools/AudioCodecs/ContainerAVI.h"
|
||||
#include "AudioTools/AudioCodecs/StreamingDecoder.h"
|
||||
//#include "AudioTools/AudioCodecs/CodecMP3Mini.h"
|
||||
@@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
/**
|
||||
* @defgroup codecs Codecs
|
||||
* @ingroup main
|
||||
* @brief Audio Coder and Decoder
|
||||
**/
|
||||
|
||||
/**
|
||||
* @defgroup encoder Encoder
|
||||
* @ingroup codecs
|
||||
* @brief Audio Encoder
|
||||
**/
|
||||
|
||||
/**
|
||||
* @defgroup decoder Decoder
|
||||
* @ingroup codecs
|
||||
* @brief Audio Decoder
|
||||
**/
|
||||
|
||||
// codecs that do not require any additional library
|
||||
#include "AudioTools/AudioCodecs/CodecWAV.h"
|
||||
#include "AudioTools/AudioCodecs/CodecCopy.h"
|
||||
#include "AudioTools/AudioCodecs/CodecL8.h"
|
||||
#include "AudioTools/AudioCodecs/CodecFloat.h"
|
||||
#include "AudioTools/AudioCodecs/CodecBase64.h"
|
||||
#include "AudioTools/AudioCodecs/CodecMTS.h"
|
||||
#include "AudioTools/AudioCodecs/CodecADTS.h"
|
||||
#include "AudioTools/AudioCodecs/CodecNetworkFormat.h"
|
||||
#include "AudioTools/AudioCodecs/CodecFactory.h"
|
||||
#include "AudioTools/AudioCodecs/StreamingDecoder.h"
|
||||
#include "AudioTools/AudioCodecs/MultiDecoder.h"
|
||||
@@ -0,0 +1,166 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioLogger.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Collections/Vector.h"
|
||||
#include "AudioTools/CoreAudio/AudioTypes.h"
|
||||
#include "AudioTools/CoreAudio/BaseStream.h"
|
||||
#include "AudioTools/CoreAudio/AudioOutput.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoding of encoded audio into PCM data
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioDecoder : public AudioWriter, public AudioInfoSource {
|
||||
public:
|
||||
AudioDecoder() = default;
|
||||
virtual ~AudioDecoder() = default;
|
||||
AudioDecoder(AudioDecoder const &) = delete;
|
||||
AudioDecoder &operator=(AudioDecoder const &) = delete;
|
||||
|
||||
AudioInfo audioInfo() override { return info; };
|
||||
|
||||
/// for most decoders this is not needed
|
||||
void setAudioInfo(AudioInfo from) override {
|
||||
TRACED();
|
||||
if (info != from) {
|
||||
info = from;
|
||||
notifyAudioChange(from);
|
||||
}
|
||||
}
|
||||
/// Defines where the decoded result is written to
|
||||
virtual void setOutput(AudioStream &out_stream) {
|
||||
Print *p_print = &out_stream;
|
||||
setOutput(*p_print);
|
||||
addNotifyAudioChange(out_stream);
|
||||
}
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
virtual void setOutput(AudioOutput &out_stream) {
|
||||
Print *p_print = &out_stream;
|
||||
setOutput(*p_print);
|
||||
addNotifyAudioChange(out_stream);
|
||||
}
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
virtual void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
/// Returns true to indicate that the decoding result is PCM data
|
||||
virtual bool isResultPCM() { return true; }
|
||||
virtual bool begin(AudioInfo info) override {
|
||||
setAudioInfo(info);
|
||||
return begin();
|
||||
}
|
||||
bool begin() override { return true; }
|
||||
void end() override {}
|
||||
|
||||
/// custom id to be used by application
|
||||
int id;
|
||||
|
||||
Print* getOutput(){
|
||||
return p_print;
|
||||
}
|
||||
|
||||
/// Some decoders need e.g. a magic cookie to provide the relevant info for decoding
|
||||
virtual bool setCodecConfig(const uint8_t* data, size_t len){
|
||||
LOGE("not implemented");
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
AudioInfo info;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Parent class for all container formats
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class ContainerDecoder : public AudioDecoder {
|
||||
bool isResultPCM() override { return true; }
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Encoding of PCM data
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioEncoder : public AudioWriter {
|
||||
public:
|
||||
AudioEncoder() = default;
|
||||
virtual ~AudioEncoder() = default;
|
||||
AudioEncoder(AudioEncoder const &) = delete;
|
||||
AudioEncoder &operator=(AudioEncoder const &) = delete;
|
||||
/// Provides the mime type of the encoded result
|
||||
virtual const char *mime() = 0;
|
||||
/// Defines the sample rate, number of channels and bits per sample
|
||||
void setAudioInfo(AudioInfo from) override { info = from; }
|
||||
AudioInfo audioInfo() override { return info; }
|
||||
/// Default output assignment (encoders may override to store Print reference)
|
||||
virtual void setOutput(Print &out_stream) override { (void)out_stream; }
|
||||
/// Optional rtsp function: provide the frame duration in microseconds
|
||||
virtual uint32_t frameDurationUs() { return 0;};
|
||||
/// Optional rtsp function: provide samples per the frame
|
||||
virtual uint16_t samplesPerFrame() { return 0;};
|
||||
|
||||
protected:
|
||||
AudioInfo info;
|
||||
};
|
||||
|
||||
class AudioDecoderExt : public AudioDecoder {
|
||||
public:
|
||||
virtual void setBlockSize(int blockSize) = 0;
|
||||
};
|
||||
|
||||
class AudioEncoderExt : public AudioEncoder {
|
||||
public:
|
||||
virtual int blockSize() = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Dummy no implmentation Codec. This is used so that we can initialize
|
||||
* some pointers to decoders and encoders to make sure that they do not point to
|
||||
* null.
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class CodecNOP : public AudioDecoder, public AudioEncoder {
|
||||
public:
|
||||
static CodecNOP *instance() {
|
||||
static CodecNOP self;
|
||||
return &self;
|
||||
}
|
||||
|
||||
virtual bool begin() { return true; }
|
||||
virtual void end() {}
|
||||
virtual void setOutput(Print &out_stream) {}
|
||||
virtual void addNotifyAudioChange(AudioInfoSupport &bi) {}
|
||||
virtual void setAudioInfo(AudioInfo info) {}
|
||||
|
||||
virtual AudioInfo audioInfo() {
|
||||
AudioInfo info;
|
||||
return info;
|
||||
}
|
||||
virtual operator bool() { return false; }
|
||||
virtual int readStream(Stream &in) { return 0; };
|
||||
|
||||
// just output silence
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
memset((void *)data, 0, len);
|
||||
return len;
|
||||
}
|
||||
|
||||
virtual const char *mime() { return nullptr; }
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
508
libraries/audio-tools/src/AudioTools/AudioCodecs/AudioEncoded.h
Normal file
508
libraries/audio-tools/src/AudioTools/AudioCodecs/AudioEncoded.h
Normal file
@@ -0,0 +1,508 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioCodecsBase.h"
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioTools/CoreAudio/AudioLogger.h"
|
||||
#include "AudioTools/CoreAudio/AudioIO.h"
|
||||
#include "AudioTools/CoreAudio/AudioOutput.h"
|
||||
#include "AudioTools/CoreAudio/AudioStreams.h"
|
||||
#include "AudioTools/CoreAudio/AudioTypes.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief A more natural Print class to process encoded data (aac, wav,
|
||||
* mp3...). Just define the output and the decoder and write the encoded
|
||||
* data.
|
||||
* @ingroup transform
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncodedAudioOutput : public ModifyingOutput {
|
||||
public:
|
||||
EncodedAudioOutput() { active = false; }
|
||||
|
||||
EncodedAudioOutput(AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
active = false;
|
||||
}
|
||||
|
||||
EncodedAudioOutput(AudioEncoder *encoder) {
|
||||
setEncoder(encoder);
|
||||
active = false;
|
||||
}
|
||||
|
||||
EncodedAudioOutput(AudioStream *outputStream, AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
setOutput(outputStream);
|
||||
active = false;
|
||||
}
|
||||
|
||||
EncodedAudioOutput(AudioOutput *outputStream, AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
setOutput(outputStream);
|
||||
active = false;
|
||||
}
|
||||
|
||||
EncodedAudioOutput(Print *outputStream, AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
setOutput(outputStream);
|
||||
active = false;
|
||||
}
|
||||
|
||||
EncodedAudioOutput(Print *outputStream, AudioEncoder *encoder) {
|
||||
setEncoder(encoder);
|
||||
setOutput(outputStream);
|
||||
active = false;
|
||||
}
|
||||
|
||||
EncodedAudioOutput(AudioOutput *outputStream, AudioEncoder *encoder) {
|
||||
setEncoder(encoder);
|
||||
setOutput(outputStream);
|
||||
active = false;
|
||||
}
|
||||
|
||||
EncodedAudioOutput(AudioStream *outputStream, AudioEncoder *encoder) {
|
||||
setEncoder(encoder);
|
||||
setOutput(outputStream);
|
||||
active = false;
|
||||
}
|
||||
|
||||
virtual ~EncodedAudioOutput() { end(); }
|
||||
|
||||
/// Define object which need to be notified if the basinfo is changing
|
||||
void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
TRACEI();
|
||||
decoder_ptr->addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
AudioInfo defaultConfig() {
|
||||
AudioInfo cfg;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
virtual void setAudioInfo(AudioInfo newInfo) override {
|
||||
TRACED();
|
||||
if (this->cfg != newInfo && newInfo) {
|
||||
this->cfg = newInfo;
|
||||
decoder_ptr->setAudioInfo(cfg);
|
||||
encoder_ptr->setAudioInfo(cfg);
|
||||
}
|
||||
}
|
||||
|
||||
/// Provide audio info from decoder if relevant
|
||||
AudioInfo audioInfo() override {
|
||||
// return info from decoder if avilable
|
||||
if (decoder_ptr != undefined && *decoder_ptr){
|
||||
AudioInfo info = decoder_ptr->audioInfo();
|
||||
if (info) return info;
|
||||
}
|
||||
return ModifyingOutput::audioInfo();
|
||||
}
|
||||
|
||||
/// Defines the output
|
||||
void setOutput(Print *outputStream) {
|
||||
ptr_out = outputStream;
|
||||
if (decoder_ptr != undefined) {
|
||||
decoder_ptr->setOutput(*ptr_out);
|
||||
}
|
||||
if (encoder_ptr != undefined) {
|
||||
encoder_ptr->setOutput(*ptr_out);
|
||||
}
|
||||
}
|
||||
|
||||
void setOutput(AudioStream* out) {
|
||||
setOutput((Print*)out);
|
||||
to_notify = out;
|
||||
}
|
||||
|
||||
void setOutput(AudioOutput*out){
|
||||
setOutput((Print*)out);
|
||||
to_notify = out;
|
||||
}
|
||||
|
||||
void setOutput(Print &outputStream) override { setOutput(&outputStream); }
|
||||
void setOutput(AudioOutput &outputStream) { setOutput(&outputStream); }
|
||||
void setOutput(AudioStream &outputStream) { setOutput(&outputStream); }
|
||||
|
||||
void setEncoder(AudioEncoder *encoder) {
|
||||
if (encoder == nullptr) {
|
||||
encoder = undefined;
|
||||
}
|
||||
encoder_ptr = encoder;
|
||||
writer_ptr = encoder;
|
||||
if (ptr_out != nullptr) {
|
||||
encoder_ptr->setOutput(*ptr_out);
|
||||
}
|
||||
}
|
||||
|
||||
AudioEncoder *getEncoder() { return encoder_ptr; }
|
||||
|
||||
void setDecoder(AudioDecoder *decoder) {
|
||||
if (decoder == nullptr) {
|
||||
decoder = undefined;
|
||||
}
|
||||
decoder_ptr = decoder;
|
||||
writer_ptr = decoder;
|
||||
if (ptr_out != nullptr) {
|
||||
decoder_ptr->setOutput(*ptr_out);
|
||||
}
|
||||
}
|
||||
|
||||
AudioDecoder *getDecoder() { return decoder_ptr; }
|
||||
|
||||
/// Starts the processing - sets the status to active
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
if (!active) {
|
||||
TRACED();
|
||||
// Setup notification
|
||||
if (to_notify != nullptr) {
|
||||
decoder_ptr->addNotifyAudioChange(*to_notify);
|
||||
}
|
||||
// Get notifications from decoder
|
||||
decoder_ptr->addNotifyAudioChange(*this);
|
||||
if (decoder_ptr != undefined || encoder_ptr != undefined) {
|
||||
active = true;
|
||||
if (!decoder_ptr->begin(cfg)) active = false;
|
||||
if (!encoder_ptr->begin(cfg)) active = false;
|
||||
} else {
|
||||
LOGW("no decoder or encoder defined");
|
||||
}
|
||||
}
|
||||
return active;
|
||||
}
|
||||
|
||||
/// Starts the processing - sets the status to active
|
||||
virtual bool begin(AudioInfo newInfo) override {
|
||||
setAudioInfo(newInfo);
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// Ends the processing
|
||||
void end() override {
|
||||
if (active) {
|
||||
TRACEI();
|
||||
decoder_ptr->end();
|
||||
encoder_ptr->end();
|
||||
active = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// encoder decode the data
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (len == 0) {
|
||||
// LOGI("write: %d", 0);
|
||||
return 0;
|
||||
}
|
||||
LOGD("EncodedAudioOutput::write: %d", (int)len);
|
||||
|
||||
if (writer_ptr == nullptr || data == nullptr) {
|
||||
LOGE("NPE");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (check_available_for_write && availableForWrite() == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t result = writer_ptr->write(data, len);
|
||||
LOGD("EncodedAudioOutput::write: %d -> %d", (int)len, (int)result);
|
||||
return result;
|
||||
}
|
||||
|
||||
int availableForWrite() override {
|
||||
if (!check_available_for_write) return frame_size;
|
||||
return min(ptr_out->availableForWrite(), frame_size);
|
||||
}
|
||||
|
||||
/// Returns true if status is active and we still have data to be processed
|
||||
operator bool() override { return active; }
|
||||
|
||||
/// Provides the initialized decoder
|
||||
AudioDecoder &decoder() { return *decoder_ptr; }
|
||||
|
||||
/// Provides the initialized encoder
|
||||
AudioEncoder &encoder() { return *encoder_ptr; }
|
||||
|
||||
/// Is Available for Write check activated ?
|
||||
bool isCheckAvailableForWrite() { return check_available_for_write; }
|
||||
|
||||
/// defines the size of the decoded frame in bytes
|
||||
void setFrameSize(int size) { frame_size = size; }
|
||||
|
||||
EncodedAudioOutput& operator=(EncodedAudioOutput const& src) {
|
||||
decoder_ptr = src.decoder_ptr;
|
||||
encoder_ptr = src.encoder_ptr;
|
||||
ptr_out = src.ptr_out;
|
||||
active = src.active;
|
||||
check_available_for_write = src.check_available_for_write;
|
||||
frame_size = src.frame_size;
|
||||
cfg = src.cfg;
|
||||
is_active = src.is_active;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
// AudioInfo info;
|
||||
CodecNOP* undefined = CodecNOP::instance();
|
||||
AudioDecoder *decoder_ptr = undefined; // decoder
|
||||
AudioEncoder *encoder_ptr = undefined; // decoder
|
||||
AudioWriter *writer_ptr = nullptr;
|
||||
Print *ptr_out = nullptr;
|
||||
AudioInfoSupport *to_notify = nullptr;
|
||||
bool active = false;
|
||||
bool check_available_for_write = false;
|
||||
int frame_size = DEFAULT_BUFFER_SIZE;
|
||||
};
|
||||
|
||||
/// @brief Legacy alias for EncodedAudioOutput
|
||||
/// @ingroup codecs
|
||||
using EncodedAudioPrint = EncodedAudioOutput;
|
||||
|
||||
/**
|
||||
* @brief A more natural Stream class to process encoded data (aac, wav,
|
||||
* mp3...) which also supports the decoding by calling readBytes().
|
||||
* @ingroup transform
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncodedAudioStream : public ReformatBaseStream {
|
||||
public:
|
||||
EncodedAudioStream() = default;
|
||||
|
||||
EncodedAudioStream(AudioStream *ioStream, AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
setStream(*ioStream);
|
||||
}
|
||||
|
||||
EncodedAudioStream(Stream *ioStream, AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
setStream(*ioStream);
|
||||
}
|
||||
|
||||
EncodedAudioStream(AudioOutput *outputStream, AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
setOutput(*outputStream);
|
||||
}
|
||||
|
||||
EncodedAudioStream(Print *outputStream, AudioDecoder *decoder) {
|
||||
setDecoder(decoder);
|
||||
setOutput(*outputStream);
|
||||
}
|
||||
|
||||
EncodedAudioStream(Print *outputStream, AudioEncoder *encoder) {
|
||||
setEncoder(encoder);
|
||||
setOutput(*outputStream);
|
||||
}
|
||||
|
||||
EncodedAudioStream(AudioDecoder *decoder) { setDecoder(decoder); }
|
||||
|
||||
EncodedAudioStream(AudioEncoder *encoder) { setEncoder(encoder); }
|
||||
|
||||
virtual ~EncodedAudioStream() { end(); }
|
||||
|
||||
void setEncoder(AudioEncoder *encoder) { enc_out.setEncoder(encoder); }
|
||||
|
||||
void setDecoder(AudioDecoder *decoder) { enc_out.setDecoder(decoder); }
|
||||
|
||||
AudioEncoder *getEncoder() { return enc_out.getEncoder(); }
|
||||
|
||||
AudioDecoder *getDecoder() { return enc_out.getDecoder(); }
|
||||
|
||||
/// Provides the initialized decoder
|
||||
AudioDecoder &decoder() { return *getDecoder(); }
|
||||
|
||||
/// Provides the initialized encoder
|
||||
AudioEncoder &encoder() { return *getEncoder(); }
|
||||
|
||||
void setStream(Stream *stream) { setStream(*stream); }
|
||||
|
||||
void setStream(AudioStream *stream) { setStream(*stream); }
|
||||
|
||||
void setOutput(AudioOutput *stream) { setOutput(*stream); }
|
||||
|
||||
void setOutput(Print *stream) { setOutput(*stream); }
|
||||
|
||||
void setStream(AudioStream &stream) override {
|
||||
ReformatBaseStream::setStream(stream);
|
||||
enc_out.setOutput(&stream);
|
||||
}
|
||||
|
||||
void setStream(Stream &stream) override {
|
||||
ReformatBaseStream::setStream(stream);
|
||||
enc_out.setOutput(&stream);
|
||||
}
|
||||
|
||||
void setOutput(AudioOutput &stream) override {
|
||||
ReformatBaseStream::setOutput(stream);
|
||||
enc_out.setOutput(&stream);
|
||||
}
|
||||
|
||||
void setOutput(Print &out) override {
|
||||
ReformatBaseStream::setOutput(out);
|
||||
enc_out.setOutput(&out);
|
||||
}
|
||||
|
||||
AudioInfo defaultConfig() {
|
||||
AudioInfo ai;
|
||||
return ai;
|
||||
}
|
||||
|
||||
bool begin(AudioInfo info) {
|
||||
setAudioInfo(info);
|
||||
return begin();
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
// is_output_notify = false;
|
||||
setupReader();
|
||||
ReformatBaseStream::begin();
|
||||
enc_out.addNotifyAudioChange(*this);
|
||||
return enc_out.begin(audioInfo());
|
||||
}
|
||||
|
||||
void end() override {
|
||||
enc_out.end();
|
||||
reader.end();
|
||||
}
|
||||
|
||||
int availableForWrite() override { return enc_out.availableForWrite(); }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
// addNotifyOnFirstWrite();
|
||||
return enc_out.write(data, len);
|
||||
}
|
||||
|
||||
size_t readBytes(uint8_t *data, size_t len) override {
|
||||
return reader.readBytes(data, len);
|
||||
}
|
||||
|
||||
void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
enc_out.addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
/// approx compression factor: e.g. mp3 is around 4
|
||||
float getByteFactor() override { return byte_factor; }
|
||||
void setByteFactor(float factor) { byte_factor = factor; }
|
||||
|
||||
/// defines the size of the decoded frame in bytes
|
||||
void setFrameSize(int size) { enc_out.setFrameSize(size); }
|
||||
|
||||
EncodedAudioStream& operator=(EncodedAudioStream const& src) {
|
||||
enc_out = src.enc_out;
|
||||
byte_factor = src.byte_factor;
|
||||
p_stream = src.p_stream;
|
||||
p_print = src.p_print;
|
||||
info = src.info;
|
||||
return *this;
|
||||
};
|
||||
|
||||
AudioInfo audioInfo() override {
|
||||
return enc_out.audioInfo();;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo newInfo) override {
|
||||
ReformatBaseStream::setAudioInfo(newInfo);
|
||||
enc_out.setAudioInfo(newInfo);
|
||||
}
|
||||
|
||||
protected:
|
||||
EncodedAudioOutput enc_out;
|
||||
float byte_factor = 2.0f;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Adapter class which lets an AudioWriter behave like a Print
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*
|
||||
*/
|
||||
|
||||
class AudioWriterToAudioOutput : public AudioOutputAdapter {
|
||||
public:
|
||||
void setWriter(AudioWriter *writer) { p_writer = writer; }
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
return p_writer->write(data, len);
|
||||
};
|
||||
|
||||
protected:
|
||||
AudioWriter *p_writer = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief ContainerTarget: forwards requests to both the output and the
|
||||
* encoder/decoder and sets up the output chain for Containers. We also
|
||||
* manage the proper sequence of the output classes
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ContainerTarget {
|
||||
public:
|
||||
virtual bool begin() = 0;
|
||||
virtual void end() = 0;
|
||||
virtual void setAudioInfo(AudioInfo info) {
|
||||
if (this->info != info && info.channels != 0 && info.sample_rate != 0) {
|
||||
this->info = info;
|
||||
if (p_writer1 != nullptr) p_writer1->setAudioInfo(info);
|
||||
if (p_writer2 != nullptr) p_writer2->setAudioInfo(info);
|
||||
}
|
||||
}
|
||||
virtual size_t write(uint8_t *data, size_t size) = 0;
|
||||
|
||||
protected:
|
||||
AudioInfo info;
|
||||
AudioWriter *p_writer1 = nullptr;
|
||||
AudioWriter *p_writer2 = nullptr;
|
||||
AudioWriterToAudioOutput print2;
|
||||
bool active = false;
|
||||
};
|
||||
|
||||
class ContainerTargetPrint : public ContainerTarget {
|
||||
public:
|
||||
void setupOutput(AudioWriter *writer1, AudioWriter *writer2, Print &print) {
|
||||
p_print = &print;
|
||||
p_writer1 = writer1;
|
||||
p_writer2 = writer2;
|
||||
print2.setWriter(p_writer2);
|
||||
}
|
||||
|
||||
void setupOutput(AudioWriter *writer1, Print &print) {
|
||||
p_print = &print;
|
||||
p_writer1 = writer1;
|
||||
}
|
||||
|
||||
virtual bool begin() {
|
||||
if (!active) {
|
||||
active = true;
|
||||
if (p_writer2 != nullptr) {
|
||||
p_writer1->setOutput(print2);
|
||||
p_writer2->setOutput(*p_print);
|
||||
p_writer1->begin();
|
||||
p_writer2->begin();
|
||||
} else {
|
||||
p_writer1->setOutput(*p_print);
|
||||
p_writer1->begin();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
virtual void end() {
|
||||
if (active) {
|
||||
if (p_writer1 != nullptr) p_writer1->end();
|
||||
if (p_writer2 != nullptr) p_writer2->end();
|
||||
}
|
||||
active = false;
|
||||
}
|
||||
virtual size_t write(uint8_t *data, size_t size) {
|
||||
TRACED();
|
||||
return p_writer1->write(data, size);
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
AudioWriterToAudioOutput print2;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
292
libraries/audio-tools/src/AudioTools/AudioCodecs/AudioFormat.h
Normal file
292
libraries/audio-tools/src/AudioTools/AudioCodecs/AudioFormat.h
Normal file
@@ -0,0 +1,292 @@
|
||||
/**
|
||||
* @file AudioFormat.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief WAV Audio Formats used by Microsoft e.g. in AVI video files
|
||||
* @version 0.1
|
||||
* @date 2023-05-06
|
||||
*
|
||||
* @copyright Copyright (c) 2023
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Audio format codes used by Microsoft e.g. in avi or wav files
|
||||
* @ingroup video
|
||||
*/
|
||||
enum class AudioFormat : uint16_t {
|
||||
UNKNOWN = 0x0000, /* Microsoft Corporation */
|
||||
PCM = 0x0001,
|
||||
ADPCM = 0x0002, /* Microsoft Corporation */
|
||||
IEEE_FLOAT = 0x0003, /* Microsoft Corporation */
|
||||
// VSELP = 0x0004, /* Compaq Computer Corp. */
|
||||
// IBM_CVSD = 0x0005, /* IBM Corporation */
|
||||
ALAW = 0x0006, /* Microsoft Corporation */
|
||||
MULAW = 0x0007, /* Microsoft Corporation */
|
||||
// DTS = 0x0008, /* Microsoft Corporation */
|
||||
// DRM = 0x0009, /* Microsoft Corporation */
|
||||
// WMAVOICE9 = 0x000A, /* Microsoft Corporation */
|
||||
// WMAVOICE10 = 0x000B, /* Microsoft Corporation */
|
||||
OKI_ADPCM = 0x0010, /* OKI */
|
||||
DVI_ADPCM = 0x0011, /* Intel Corporation */
|
||||
//IMA_ADPCM(DVI_ADPCM), /* Intel Corporation */
|
||||
MEDIASPACE_ADPCM = 0x0012, /* Videologic */
|
||||
SIERRA_ADPCM = 0x0013, /* Sierra Semiconductor Corp */
|
||||
G723_ADPCM = 0x0014, /* Antex Electronics Corporation */
|
||||
// DIGISTD = 0x0015, /* DSP Solutions, Inc. */
|
||||
// DIGIFIX = 0x0016, /* DSP Solutions, Inc. */
|
||||
DIALOGIC_OKI_ADPCM = 0x0017, /* Dialogic Corporation */
|
||||
MEDIAVISION_ADPCM = 0x0018, /* Media Vision, Inc. */
|
||||
// CU_CODEC = 0x0019, /* Hewlett-Packard Company */
|
||||
// HP_DYN_VOICE = 0x001A, /* Hewlett-Packard Company */
|
||||
YAMAHA_ADPCM = 0x0020, /* Yamaha Corporation of America */
|
||||
// SONARC = 0x0021, /* Speech Compression */
|
||||
// DSPGROUP_TRUESPEECH = 0x0022, /* DSP Group, Inc */
|
||||
// ECHOSC1 = 0x0023, /* Echo Speech Corporation */
|
||||
// AUDIOFILE_AF36 = 0x0024, /* Virtual Music, Inc. */
|
||||
// APTX = 0x0025, /* Audio Processing Technology */
|
||||
// AUDIOFILE_AF10 = 0x0026, /* Virtual Music, Inc. */
|
||||
// PROSODY_1612 = 0x0027, /* Aculab plc */
|
||||
// LRC = 0x0028, /* Merging Technologies S.A. */
|
||||
// DOLBY_AC2 = 0x0030, /* Dolby Laboratories */
|
||||
// GSM610 = 0x0031, /* Microsoft Corporation */
|
||||
// MSNAUDIO = 0x0032, /* Microsoft Corporation */
|
||||
ANTEX_ADPCME = 0x0033, /* Antex Electronics Corporation */
|
||||
// CONTROL_RES_VQLPC = 0x0034, /* Control Resources Limited */
|
||||
// DIGIREAL = 0x0035, /* DSP Solutions, Inc. */
|
||||
DIGIADPCM = 0x0036, /* DSP Solutions, Inc. */
|
||||
// CONTROL_RES_CR10 = 0x0037, /* Control Resources Limited */
|
||||
NMS_VBXADPCM = 0x0038, /* Natural MicroSystems */
|
||||
CS_IMAADPCM = 0x0039, /* Crystal Semiconductor IMA ADPCM */
|
||||
// ECHOSC3 = 0x003A, /* Echo Speech Corporation */
|
||||
ROCKWELL_ADPCM = 0x003B, /* Rockwell International */
|
||||
// ROCKWELL_DIGITALK = 0x003C, /* Rockwell International */
|
||||
// XEBEC = 0x003D, /* Xebec Multimedia Solutions Limited */
|
||||
G721_ADPCM = 0x0040, /* Antex Electronics Corporation */
|
||||
// G728_CELP = 0x0041, /* Antex Electronics Corporation */
|
||||
// MSG723 = 0x0042, /* Microsoft Corporation */
|
||||
// INTEL_G723_1 = 0x0043, /* Intel Corp. */
|
||||
// INTEL_G729 = 0x0044, /* Intel Corp. */
|
||||
// SHARP_G726 = 0x0045, /* Sharp */
|
||||
// MPEG = 0x0050, /* Microsoft Corporation */
|
||||
// RT24 = 0x0052, /* InSoft, Inc. */
|
||||
// PAC = 0x0053, /* InSoft, Inc. */
|
||||
// MPEGLAYER3 = 0x0055, /* ISO/MPEG Layer3 Format Tag */
|
||||
// LUCENT_G723 = 0x0059, /* Lucent Technologies */
|
||||
// CIRRUS = 0x0060, /* Cirrus Logic */
|
||||
// ESPCM = 0x0061, /* ESS Technology */
|
||||
// VOXWARE = 0x0062, /* Voxware Inc */
|
||||
// CANOPUS_ATRAC = 0x0063, /* Canopus, co., Ltd. */
|
||||
G726_ADPCM = 0x0064, /* APICOM */
|
||||
G722_ADPCM = 0x0065, /* APICOM */
|
||||
// DSAT = 0x0066, /* Microsoft Corporation */
|
||||
// DSAT_DISPLAY = 0x0067, /* Microsoft Corporation */
|
||||
// VOXWARE_BYTE_ALIGNED = 0x0069, /* Voxware Inc */
|
||||
// VOXWARE_AC8 = 0x0070, /* Voxware Inc */
|
||||
// VOXWARE_AC10 = 0x0071, /* Voxware Inc */
|
||||
// VOXWARE_AC16 = 0x0072, /* Voxware Inc */
|
||||
// VOXWARE_AC20 = 0x0073, /* Voxware Inc */
|
||||
// VOXWARE_RT24 = 0x0074, /* Voxware Inc */
|
||||
// VOXWARE_RT29 = 0x0075, /* Voxware Inc */
|
||||
// VOXWARE_RT29HW = 0x0076, /* Voxware Inc */
|
||||
// VOXWARE_VR12 = 0x0077, /* Voxware Inc */
|
||||
// VOXWARE_VR18 = 0x0078, /* Voxware Inc */
|
||||
// VOXWARE_TQ40 = 0x0079, /* Voxware Inc */
|
||||
// VOXWARE_SC3 = 0x007A, /* Voxware Inc */
|
||||
// VOXWARE_SC3_1 = 0x007B, /* Voxware Inc */
|
||||
// SOFTSOUND = 0x0080, /* Softsound, Ltd. */
|
||||
// VOXWARE_TQ60 = 0x0081, /* Voxware Inc */
|
||||
// MSRT24 = 0x0082, /* Microsoft Corporation */
|
||||
// G729A = 0x0083, /* AT&T Labs, Inc. */
|
||||
// MVI_MVI2 = 0x0084, /* Motion Pixels */
|
||||
// DF_G726 = 0x0085, /* DataFusion Systems (Pty) (Ltd) */
|
||||
// DF_GSM610 = 0x0086, /* DataFusion Systems (Pty) (Ltd) */
|
||||
// ISIAUDIO = 0x0088, /* Iterated Systems, Inc. */
|
||||
// ONLIVE = 0x0089, /* OnLive! Technologies, Inc. */
|
||||
// MULTITUDE_FT_SX20 = 0x008A, /* Multitude Inc. */
|
||||
INFOCOM_ITS_G721_ADPCM = 0x008B, /* Infocom */
|
||||
// CONVEDIA_G729 = 0x008C, /* Convedia Corp. */
|
||||
// CONGRUENCY = 0x008D, /* Congruency Inc. */
|
||||
// SBC24 = 0x0091, /* Siemens Business Communications Sys */
|
||||
// DOLBY_AC3_SPDIF = 0x0092, /* Sonic Foundry */
|
||||
// MEDIASONIC_G723 = 0x0093, /* MediaSonic */
|
||||
// PROSODY_8KBPS = 0x0094, /* Aculab plc */
|
||||
ZYXEL_ADPCM = 0x0097, /* ZyXEL Communications, Inc. */
|
||||
// PHILIPS_LPCBB = 0x0098, /* Philips Speech Processing */
|
||||
// PACKED = 0x0099, /* Studer Professional Audio AG */
|
||||
// MALDEN_PHONYTALK = 0x00A0, /* Malden Electronics Ltd. */
|
||||
// RACAL_RECORDER_GSM = 0x00A1, /* Racal recorders */
|
||||
// RACAL_RECORDER_G720_A = 0x00A2, /* Racal recorders */
|
||||
// RACAL_RECORDER_G723_1 = 0x00A3, /* Racal recorders */
|
||||
// RACAL_RECORDER_TETRA_ACELP = 0x00A4, /* Racal recorders */
|
||||
// NEC_AAC = 0x00B0, /* NEC Corp. */
|
||||
// RAW_AAC1 = 0x00FF, /* For Raw AAC, with format block
|
||||
RHETOREX_ADPCM = 0x0100, /* Rhetorex Inc. */
|
||||
// IRAT = 0x0101, /* BeCubed Software Inc. */
|
||||
// VIVO_G723 = 0x0111, /* Vivo Software */
|
||||
// VIVO_SIREN = 0x0112, /* Vivo Software */
|
||||
// PHILIPS_CELP = 0x0120, /* Philips Speech Processing */
|
||||
// PHILIPS_GRUNDIG = 0x0121, /* Philips Speech Processing */
|
||||
// DIGITAL_G723 = 0x0123, /* Digital Equipment Corporation */
|
||||
SANYO_LD_ADPCM = 0x0125, /* Sanyo Electric Co., Ltd. */
|
||||
// SIPROLAB_ACEPLNET = 0x0130, /* Sipro Lab Telecom Inc. */
|
||||
// SIPROLAB_ACELP4800 = 0x0131, /* Sipro Lab Telecom Inc. */
|
||||
// SIPROLAB_ACELP8V3 = 0x0132, /* Sipro Lab Telecom Inc. */
|
||||
// SIPROLAB_G729 = 0x0133, /* Sipro Lab Telecom Inc. */
|
||||
// SIPROLAB_G729A = 0x0134, /* Sipro Lab Telecom Inc. */
|
||||
// SIPROLAB_KELVIN = 0x0135, /* Sipro Lab Telecom Inc. */
|
||||
// VOICEAGE_AMR = 0x0136, /* VoiceAge Corp. */
|
||||
G726ADPCM = 0x0140, /* Dictaphone Corporation */
|
||||
// DICTAPHONE_CELP68 = 0x0141, /* Dictaphone Corporation */
|
||||
// DICTAPHONE_CELP54 = 0x0142, /* Dictaphone Corporation */
|
||||
// QUALCOMM_PUREVOICE = 0x0150, /* Qualcomm, Inc. */
|
||||
// QUALCOMM_HALFRATE = 0x0151, /* Qualcomm, Inc. */
|
||||
// TUBGSM = 0x0155, /* Ring Zero Systems, Inc. */
|
||||
// MSAUDIO1 = 0x0160, /* Microsoft Corporation */
|
||||
// WMAUDIO2 = 0x0161, /* Microsoft Corporation */
|
||||
// WMAUDIO3 = 0x0162, /* Microsoft Corporation */
|
||||
// WMAUDIO_LOSSLESS = 0x0163, /* Microsoft Corporation */
|
||||
// WMASPDIF = 0x0164, /* Microsoft Corporation */
|
||||
UNISYS_NAP_ADPCM = 0x0170, /* Unisys Corp. */
|
||||
// UNISYS_NAP_ULAW = 0x0171, /* Unisys Corp. */
|
||||
// UNISYS_NAP_ALAW = 0x0172, /* Unisys Corp. */
|
||||
// UNISYS_NAP_16K = 0x0173, /* Unisys Corp. */
|
||||
// SYCOM_ACM_SYC008 = 0x0174, /* SyCom Technologies */
|
||||
// SYCOM_ACM_SYC701_G726L = 0x0175, /* SyCom Technologies */
|
||||
// SYCOM_ACM_SYC701_CELP54 = 0x0176, /* SyCom Technologies */
|
||||
// SYCOM_ACM_SYC701_CELP68 = 0x0177, /* SyCom Technologies */
|
||||
// KNOWLEDGE_ADVENTURE_ADPCM = 0x0178, /* Knowledge Adventure, Inc.*/
|
||||
// FRAUNHOFER_IIS_MPEG2_AAC = 0x0180, /* Fraunhofer IIS */
|
||||
// DTS_DS = 0x0190, /* Digital Theatre Systems, Inc. */
|
||||
// CREATIVE_ADPCM = 0x0200, /* Creative Labs, Inc */
|
||||
// CREATIVE_FASTSPEECH8 = 0x0202, /* Creative Labs, Inc */
|
||||
// CREATIVE_FASTSPEECH10 = 0x0203, /* Creative Labs, Inc */
|
||||
// UHER_ADPCM = 0x0210, /* UHER informatic GmbH */
|
||||
// ULEAD_DV_AUDIO = 0x0215, /* Ulead Systems, Inc. */
|
||||
// ULEAD_DV_AUDIO_1 = 0x0216, /* Ulead Systems, Inc. */
|
||||
// QUARTERDECK = 0x0220, /* Quarterdeck Corporation */
|
||||
// ILINK_VC = 0x0230, /* I-link Worldwide */
|
||||
// RAW_SPORT = 0x0240, /* Aureal Semiconductor */
|
||||
// ESST_AC3 = 0x0241, /* ESS Technology, Inc. */
|
||||
// GENERIC_PASSTHRU = 0x0249,
|
||||
// IPI_HSX = 0x0250, /* Interactive Products, Inc. */
|
||||
// IPI_RPELP = 0x0251, /* Interactive Products, Inc. */
|
||||
// CS2 = 0x0260, /* Consistent Software */
|
||||
// SONY_SCX = 0x0270, /* Sony Corp. */
|
||||
// SONY_SCY = 0x0271, /* Sony Corp. */
|
||||
// SONY_ATRAC3 = 0x0272, /* Sony Corp. */
|
||||
// SONY_SPC = 0x0273, /* Sony Corp. */
|
||||
// TELUM_AUDIO = 0x0280, /* Telum Inc. */
|
||||
// TELUM_IA_AUDIO = 0x0281, /* Telum Inc. */
|
||||
// NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285, /* Norcom Electronics Corp.*/
|
||||
// FM_TOWNS_SND = 0x0300, /* Fujitsu Corp. */
|
||||
// MICRONAS = 0x0350, /* Micronas Semiconductors, Inc. */
|
||||
// MICRONAS_CELP833 = 0x0351, /* Micronas Semiconductors, Inc. */
|
||||
// BTV_DIGITAL = 0x0400, /* Brooktree Corporation */
|
||||
// INTEL_MUSIC_CODER = 0x0401, /* Intel Corp. */
|
||||
// INDEO_AUDIO = 0x0402, /* Ligo */
|
||||
// QDESIGN_MUSIC = 0x0450, /* QDeign Corporation */
|
||||
// ON2_VP7_AUDIO = 0x0500, /* On2 echnologies */
|
||||
// ON2_VP6_AUDIO = 0x0501, /* On2 Tchnologies */
|
||||
// VME_VMPCM = 0x0680, /* AT&T Labs,Inc. */
|
||||
// TPC = 0x0681, /* AT&T Labs, Inc. *
|
||||
// LIGHTWAVE_LOSSLESS = 0x08AE, /* Clerjump */
|
||||
// OLIGSM = 0x1000, /* Ing C. Olivetti C., S.p.A. */
|
||||
// OLIADPCM = 0x1001, /* Ing C. Olivetti& C., S.p.A.*/
|
||||
// OLICELP = 0x1002, /* Ing C. Olivetti &C., S.p.A. */
|
||||
// OLISBC = 0x1003, /* Ing C. Olivetti & C, S.p.A. */
|
||||
// OLIOPR = 0x1004, /* Ing C. Olivetti & C. S.p.A. */
|
||||
// LH_CODEC = 0x1100, /* Lernout & Hauspie *
|
||||
// LH_CODEC_CELP = 0x1101, /* Lernout & Hauspie *
|
||||
// LH_CODEC_SBC8 = 0x1102, /* Lernout & Hauspie */
|
||||
// LH_CODEC_SBC12 = 0x1103, /* Lernout & Hauspie */
|
||||
// LH_CODEC_SBC16 = 0x1104, /* Lernout & Hauspie */
|
||||
// NORRIS = 0x1400, /* Norris Communications, Inc. */
|
||||
// ISIAUDIO_2 = 0x1401, /* ISIAudio */
|
||||
// SOUNDSPACE_MUSICOMPRESS = 0x1500, /* AT&T Labs, Inc. */
|
||||
// MPEG_ADTS_AAC = 0x1600, /* Microsoft Corporation */
|
||||
// MPEG_RAW_AAC = 0x1601, /* Microsoft Corporation */
|
||||
// MPEG_LOAS = 0x1602, /* Microsoft Corporation (MPEG-4 Audio Transport Streams (LOAS/LATM)
|
||||
// NOKIA_MPEG_ADTS_AAC = 0x1608, /* Microsoft Corporation */
|
||||
// NOKIA_MPEG_RAW_AAC = 0x1609, /* Microsoft Corporation */
|
||||
// VODAFONE_MPEG_ADTS_AAC = 0x160A, /* Microsoft Corporation */
|
||||
// VODAFONE_MPEG_RAW_AAC = 0x160B, /* Microsoft Corporation */
|
||||
// MPEG_HEAAC =
|
||||
// 0x1610, /* Microsoft Corporation (MPEG-2 AAC or MPEG-4 HE-AAC
|
||||
// v1/v2 streams with any payload (ADTS, ADIF, LOAS/LATM, RAW). Format block
|
||||
// icludes MP4 AudioSpecificConfig() -- see HEAACWAVEFORMAT below
|
||||
// */
|
||||
// VOXWARE_RT24_SPEECH = 0x181C, /* Voxware Inc. */
|
||||
// SONICFOUNDRY_LOSSLESS = 0x1971, /* Sonic Foundry */
|
||||
// INNINGS_TELECOM_ADPCM = 0x1979, /* Innings Telecom Inc. */
|
||||
// LUCENT_SX8300P = 0x1C07, /* Lucent Technologies */
|
||||
// LUCENT_SX5363S = 0x1C0C, /* Lucent Technologies */
|
||||
// CUSEEME = 0x1F03, /* CUSeeMe */
|
||||
// NTCSOFT_ALF2CM_ACM = 0x1FC4, /* NTCSoft */
|
||||
// DVM = 0x2000, /* FAST Multimedia AG */
|
||||
// DTS2 = 0x2001,
|
||||
// MAKEAVIS = 0x3313,
|
||||
// DIVIO_MPEG4_AAC = 0x4143, /* Divio, Inc. */
|
||||
// NOKIA_ADAPTIVE_MULTIRATE = 0x4201, /* Nokia */
|
||||
// DIVIO_G726 = 0x4243, /* Divio, Inc. */
|
||||
// LEAD_SPEECH = 0x434C, /* LEAD Technologies */
|
||||
// LEAD_VORBIS = 0x564C, /* LEAD Technologies */
|
||||
// WAVPACK_AUDIO = 0x5756, /* xiph.org */
|
||||
// ALAC = 0x6C61, /* Apple Lossless */
|
||||
// OGG_VORBIS_MODE_1 = 0x674F, /* Ogg Vorbis */
|
||||
// OGG_VORBIS_MODE_2 = 0x6750, /* Ogg Vorbis */
|
||||
// OGG_VORBIS_MODE_3 = 0x6751, /* Ogg Vorbis */
|
||||
// OGG_VORBIS_MODE_1_PLUS = 0x676F, /* Ogg Vorbis */
|
||||
// OGG_VORBIS_MODE_2_PLUS = 0x6770, /* Ogg Vorbis */
|
||||
// OGG_VORBIS_MODE_3_PLUS = 0x6771, /* Ogg Vorbis */
|
||||
// F3COM_NBX = 0x7000, /* 3COM Corp. */
|
||||
// OPUS = 0x704F, /* Opus */
|
||||
// FAAD_AAC = 0x706D,
|
||||
// AMR_NB = 0x7361, /* AMR Narrowband */
|
||||
// AMR_WB = 0x7362, /* AMR Wideband */
|
||||
// AMR_WP = 0x7363, /* AMR Wideband Plus */
|
||||
// GSM_AMR_CBR = 0x7A21, /* GSMA/3GPP */
|
||||
// GSM_AMR_VBR_SID = 0x7A22, /* GSMA/3GPP */
|
||||
// COMVERSE_INFOSYS_G723_1 = 0xA100, /* Comverse Infosys */
|
||||
// COMVERSE_INFOSYS_AVQSBC = 0xA101, /* Comverse Infosys */
|
||||
// COMVERSE_INFOSYS_SBC = 0xA102, /* Comverse Infosys */
|
||||
// SYMBOL_G729_A = 0xA103, /* Symbol Technologies */
|
||||
// VOICEAGE_AMR_WB = 0xA104, /* VoiceAge Corp. */
|
||||
// INGENIENT_G726 = 0xA105, /* Ingenient Technologies, Inc. */
|
||||
// MPEG4_AAC = 0xA106, /* ISO/MPEG-4 */
|
||||
// ENCORE_G726 = 0xA107, /* Encore Software */
|
||||
// ZOLL_ASAO = 0xA108, /* ZOLL Medical Corp. */
|
||||
// SPEEX_VOICE = 0xA109, /* xiph.org */
|
||||
// VIANIX_MASC = 0xA10A, /* Vianix LLC */
|
||||
// WM9_SPECTRUM_ANALYZER = 0xA10B, /* Microsoft */
|
||||
// WMF_SPECTRUM_ANAYZER = 0xA10C, /* Microsoft */
|
||||
// GSM_610 = 0xA0D,
|
||||
// GSM_620 = 0xA1E,
|
||||
// GSM_660 = 0xA10,
|
||||
// GSM_690 = 0xA110,
|
||||
// GSM_ADAPTIVE_MULTIRATE_WB = 0xA111,
|
||||
// POLYCOM_G722 = 0xA112 ,/* Polycom */
|
||||
// POLYCOM_G728 = 0xA113, /* Polycom */
|
||||
// POLYCOM_G729_A = 0xA114, /* Polycom */
|
||||
// POLYCOM_SIREN = 0xA115, /* Polycom */
|
||||
// GLOBAL_IP_ILBC = 0xA116, /* Global IP */
|
||||
// RADIOTIME_TIME_SHIFT_RADIO = 0xA117, /* RadioTime */
|
||||
// NICE_ACA = 0xA118, /* Nice Systems */
|
||||
// NICE_ADPCM = 0xA119, /* Nice Systems */
|
||||
// VOCORD_G721 = 0xA11A, /* Vocord Telecom */
|
||||
// VOCORD_G726 = 0xA11B, /* Vocord Telecom */
|
||||
// VOCORD_G722_1 = 0xA11C, /* Vocord Telecom */
|
||||
// VOCORD_G728 = 0xA11D, /* Vocord Telecom */
|
||||
// VOCORD_G729 = 0xA11E, /* Vocord Telecom */
|
||||
// VOCORD_G729_A = 0xA11F, /* Vocord Telecom */
|
||||
// VOCORD_G723_1 = 0xA120, /* Vocord Telecom */
|
||||
// VOCORD_LBC = 0xA121, /* Vocord Telecom */
|
||||
// NICE_G728 = 0xA122, /* Nice Systems */
|
||||
// FRACE_TELECOM_G729 = 0xA123, /* France Telecom */
|
||||
// CODIAN = 0xA124, /* CODIAN */
|
||||
// FLAC = 0xF1AC, /* flac.sourceforge.net */
|
||||
};
|
||||
|
||||
}
|
||||
191
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAACFAAD.h
Normal file
191
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAACFAAD.h
Normal file
@@ -0,0 +1,191 @@
|
||||
#pragma once
|
||||
|
||||
// #include "Stream.h"
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "faad.h"
|
||||
|
||||
#ifndef FAAD_INPUT_BUFFER_SIZE
|
||||
#define FAAD_INPUT_BUFFER_SIZE 1024*2
|
||||
#endif
|
||||
|
||||
// to prevent Decoding error: Maximum number of bitstream elements exceeded
|
||||
#ifndef FAAD_UNDERFLOW_LIMIT
|
||||
#define FAAD_UNDERFLOW_LIMIT 500
|
||||
#endif
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief AAC Decoder using faad: https://github.com/pschatzmann/arduino-libfaad
|
||||
* This needs a stack of around 60000 and you need to make sure that memory is allocated on PSRAM.
|
||||
* See https://www.pschatzmann.ch/home/2023/09/12/arduino-audio-tools-faat-aac-decoder/
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AACDecoderFAAD : public AudioDecoder {
|
||||
public:
|
||||
AACDecoderFAAD() {
|
||||
info.channels = 2;
|
||||
info.sample_rate = 44100;
|
||||
info.bits_per_sample = 16;
|
||||
};
|
||||
|
||||
~AACDecoderFAAD() { end(); }
|
||||
|
||||
/// Starts the processing
|
||||
bool begin() {
|
||||
TRACED();
|
||||
|
||||
unsigned long cap = NeAACDecGetCapabilities();
|
||||
// Check if decoder has the needed capabilities
|
||||
|
||||
if (!cap & FIXED_POINT_CAP) {
|
||||
LOGE("Fixed Point");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Open the library
|
||||
hAac = NeAACDecOpen();
|
||||
|
||||
// // Get the current config
|
||||
conf = NeAACDecGetCurrentConfiguration(hAac);
|
||||
|
||||
// // If needed change some of the values in conf
|
||||
conf->outputFormat = FAAD_FMT_16BIT;
|
||||
//conf->defObjectType = LC;
|
||||
conf->defSampleRate = info.sample_rate;
|
||||
conf->downMatrix = true; // 5.1 channel downmatrixed to 2 channel
|
||||
conf->useOldADTSFormat = false;
|
||||
conf->dontUpSampleImplicitSBR = false;
|
||||
|
||||
// Set the new configuration
|
||||
if (!NeAACDecSetConfiguration(hAac, conf)) {
|
||||
LOGE("NeAACDecSetConfiguration");
|
||||
return false;
|
||||
}
|
||||
|
||||
// setup input buffer
|
||||
if (input_buffer.size() != buffer_size_input){
|
||||
input_buffer.resize(buffer_size_input);
|
||||
}
|
||||
is_init = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Releases the reserved memory
|
||||
virtual void end() {
|
||||
TRACED();
|
||||
flush();
|
||||
if (hAac != nullptr) {
|
||||
NeAACDecClose(hAac);
|
||||
hAac = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/// Write AAC data to decoder
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
// Write supplied data to input buffer
|
||||
size_t result = input_buffer.writeArray((uint8_t *)data, len);
|
||||
// Decode from input buffer
|
||||
decode(underflow_limit);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void flush() {
|
||||
decode(0);
|
||||
}
|
||||
|
||||
/// Defines the input buffer size
|
||||
void setInputBufferSize(int len){
|
||||
buffer_size_input = len;
|
||||
}
|
||||
|
||||
/// Defines the min number of bytes that are submitted to the decoder
|
||||
void setUnderflowLimit(int len){
|
||||
underflow_limit = len;
|
||||
}
|
||||
|
||||
/// checks if the class is active
|
||||
virtual operator bool() { return hAac != nullptr; }
|
||||
|
||||
protected:
|
||||
int buffer_size_input = FAAD_INPUT_BUFFER_SIZE;
|
||||
int underflow_limit = FAAD_UNDERFLOW_LIMIT;
|
||||
NeAACDecHandle hAac = nullptr;
|
||||
NeAACDecConfigurationPtr conf;
|
||||
SingleBuffer<uint8_t> input_buffer{0};
|
||||
bool is_init = false;
|
||||
|
||||
void init(uint8_t *data, size_t len) {
|
||||
TRACEI();
|
||||
// Initialise the library using one of the initialization functions
|
||||
unsigned long samplerate = info.sample_rate;
|
||||
unsigned char channels = info.channels;
|
||||
|
||||
if (NeAACDecInit(hAac, data, len, &samplerate, &channels)==-1) {
|
||||
LOGE("NeAACDecInit");
|
||||
}
|
||||
info.sample_rate = samplerate;
|
||||
info.channels = channels;
|
||||
is_init = true;
|
||||
}
|
||||
|
||||
void decode(int minBufferSize) {
|
||||
TRACED();
|
||||
NeAACDecFrameInfo hInfo;
|
||||
|
||||
// decode until we do not conume any bytes
|
||||
while (input_buffer.available()>minBufferSize) {
|
||||
int eff_len = input_buffer.available();
|
||||
|
||||
if (!is_init) {
|
||||
init(input_buffer.data(), eff_len);
|
||||
}
|
||||
|
||||
uint8_t *sample_buffer=(uint8_t *)NeAACDecDecode(hAac, &hInfo, input_buffer.address(), eff_len);
|
||||
|
||||
LOGD("bytesconsumed: %d of %d", (int)hInfo.bytesconsumed, (int)eff_len);
|
||||
if (hInfo.error != 0) {
|
||||
LOGW("Decoding error: %s", NeAACDecGetErrorMessage(hInfo.error));
|
||||
}
|
||||
|
||||
if (hInfo.bytesconsumed == 0 ) {
|
||||
break;
|
||||
}
|
||||
|
||||
LOGD("Decoded %lu samples", hInfo.samples);
|
||||
LOGD(" bytesconsumed: %lu", hInfo.bytesconsumed);
|
||||
LOGD(" channels: %d", hInfo.channels);
|
||||
LOGD(" samplerate: %lu", hInfo.samplerate);
|
||||
LOGD(" sbr: %u", hInfo.sbr);
|
||||
LOGD(" object_type: %u", hInfo.object_type);
|
||||
LOGD(" header_type: %u", hInfo.header_type);
|
||||
LOGD(" num_front_channels: %u", hInfo.num_front_channels);
|
||||
LOGD(" num_side_channels: %u", hInfo.num_side_channels);
|
||||
LOGD(" num_back_channels: %u", hInfo.num_back_channels);
|
||||
LOGD(" num_lfe_channels: %u", hInfo.num_lfe_channels);
|
||||
LOGD(" ps: %u", hInfo.ps);
|
||||
|
||||
// removed consumed data
|
||||
input_buffer.clearArray(hInfo.bytesconsumed);
|
||||
|
||||
// check for changes in config
|
||||
AudioInfo tmp{(sample_rate_t)hInfo.samplerate, hInfo.channels, 16};
|
||||
if (tmp != info) {
|
||||
setAudioInfo(tmp);
|
||||
}
|
||||
|
||||
int bytes = hInfo.samples * sizeof(int16_t);
|
||||
size_t len = p_print->write(sample_buffer, bytes);
|
||||
if (len != bytes) {
|
||||
TRACEE();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
319
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAACFDK.h
Normal file
319
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAACFDK.h
Normal file
@@ -0,0 +1,319 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AACDecoderFDK.h"
|
||||
#include "AACEncoderFDK.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
// audio change notification target
|
||||
AudioInfoSupport *audioChangeFDK = nullptr;
|
||||
|
||||
/**
|
||||
* @brief Audio Decoder which decodes AAC into a PCM stream
|
||||
* This is basically just a wrapper using https://github.com/pschatzmann/arduino-fdk-aac
|
||||
* which uses AudioInfo and provides the handlig of AudioInfo changes.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AACDecoderFDK : public AudioDecoder {
|
||||
public:
|
||||
AACDecoderFDK(int output_buffer_size = FDK_OUT_BUFFER_DEFAULT_SIZE){
|
||||
TRACED();
|
||||
dec = new aac_fdk::AACDecoderFDK(output_buffer_size);
|
||||
}
|
||||
|
||||
AACDecoderFDK(Print &out_stream, int output_buffer_size = 2048){
|
||||
TRACED();
|
||||
dec = new aac_fdk::AACDecoderFDK(out_stream, output_buffer_size);
|
||||
}
|
||||
|
||||
virtual ~AACDecoderFDK(){
|
||||
delete dec;
|
||||
}
|
||||
|
||||
/// Defines the output stream
|
||||
void setOutput(Print &out_stream) override {
|
||||
dec->setOutput(out_stream);
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
return dec->begin(TT_MP4_ADTS, 1);
|
||||
}
|
||||
|
||||
// opens the decoder
|
||||
bool begin(TRANSPORT_TYPE transportType, UINT nrOfLayers){
|
||||
return dec->begin(transportType, nrOfLayers);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Explicitly configure the decoder by passing a raw AudioSpecificConfig (ASC) or a StreamMuxConfig
|
||||
* (SMC), contained in a binary buffer. This is required for MPEG-4 and Raw Packets file format bitstreams
|
||||
* as well as for LATM bitstreams with no in-band SMC. If the transport format is LATM with or without
|
||||
* LOAS, configuration is assumed to be an SMC, for all other file formats an ASC.
|
||||
*
|
||||
**/
|
||||
AAC_DECODER_ERROR configure(uint8_t *conf, const uint32_t &length) {
|
||||
return dec->configure(conf, length);
|
||||
}
|
||||
|
||||
// write AAC data to be converted to PCM data
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
return dec->write(data, len);
|
||||
}
|
||||
|
||||
// provides detailed information about the stream
|
||||
CStreamInfo audioInfoEx(){
|
||||
return dec->audioInfo();
|
||||
}
|
||||
|
||||
// provides common information
|
||||
AudioInfo audioInfo() override {
|
||||
AudioInfo result;
|
||||
CStreamInfo i = audioInfoEx();
|
||||
result.channels = i.numChannels;
|
||||
result.sample_rate = i.sampleRate;
|
||||
result.bits_per_sample = 16;
|
||||
return result;
|
||||
}
|
||||
|
||||
// release the resources
|
||||
void end() override {
|
||||
TRACED();
|
||||
dec->end();
|
||||
}
|
||||
|
||||
virtual operator bool() override {
|
||||
return (bool)*dec;
|
||||
}
|
||||
|
||||
aac_fdk::AACDecoderFDK *driver() {
|
||||
return dec;
|
||||
}
|
||||
|
||||
static void audioChangeCallback(CStreamInfo &info){
|
||||
if (audioChangeFDK!=nullptr){
|
||||
AudioInfo base;
|
||||
base.channels = info.numChannels;
|
||||
base.sample_rate = info.sampleRate;
|
||||
base.bits_per_sample = 16;
|
||||
// notify audio change
|
||||
audioChangeFDK->setAudioInfo(base);
|
||||
}
|
||||
}
|
||||
|
||||
void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
audioChangeFDK = &bi;
|
||||
// register audio change handler
|
||||
dec->setInfoCallback(audioChangeCallback);
|
||||
}
|
||||
|
||||
protected:
|
||||
aac_fdk::AACDecoderFDK *dec=nullptr;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Encodes PCM data to the AAC format and writes the result to a stream
|
||||
* This is basically just a wrapper using https://github.com/pschatzmann/arduino-fdk-aac
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AACEncoderFDK : public AudioEncoder {
|
||||
|
||||
public:
|
||||
|
||||
AACEncoderFDK(){
|
||||
enc = new aac_fdk::AACEncoderFDK();
|
||||
}
|
||||
|
||||
AACEncoderFDK(Print &out_stream){
|
||||
enc = new aac_fdk::AACEncoderFDK();
|
||||
enc->setOutput(out_stream);
|
||||
}
|
||||
|
||||
~AACEncoderFDK(){
|
||||
delete enc;
|
||||
}
|
||||
|
||||
/// Defines the output
|
||||
void setOutput(Print &out_stream) override {
|
||||
enc->setOutput(out_stream);
|
||||
}
|
||||
|
||||
/** @brief Total encoder bitrate. This parameter is
|
||||
mandatory and interacts with ::AACENC_BITRATEMODE.
|
||||
- CBR: Bitrate in bits/second.
|
||||
- VBR: Variable bitrate. Bitrate argument will
|
||||
be ignored. See \ref suppBitrates for details. */
|
||||
virtual void setBitrate(int bitrate){
|
||||
enc->setBitrate(bitrate);
|
||||
}
|
||||
|
||||
/** @brief Audio object type. See ::AUDIO_OBJECT_TYPE in FDK_audio.h.
|
||||
- 2: MPEG-4 AAC Low Complexity.
|
||||
- 5: MPEG-4 AAC Low Complexity with Spectral Band Replication
|
||||
(HE-AAC).
|
||||
- 29: MPEG-4 AAC Low Complexity with Spectral Band
|
||||
Replication and Parametric Stereo (HE-AAC v2). This
|
||||
configuration can be used only with stereo input audio data.
|
||||
- 23: MPEG-4 AAC Low-Delay.
|
||||
- 39: MPEG-4 AAC Enhanced Low-Delay. Since there is no
|
||||
::AUDIO_OBJECT_TYPE for ELD in combination with SBR defined,
|
||||
enable SBR explicitely by ::AACENC_SBR_MODE parameter. The ELD
|
||||
v2 212 configuration can be configured by ::AACENC_CHANNELMODE
|
||||
parameter.
|
||||
- 129: MPEG-2 AAC Low Complexity.
|
||||
- 132: MPEG-2 AAC Low Complexity with Spectral Band
|
||||
Replication (HE-AAC).
|
||||
|
||||
Please note that the virtual MPEG-2 AOT's basically disables
|
||||
non-existing Perceptual Noise Substitution tool in AAC encoder
|
||||
and controls the MPEG_ID flag in adts header. The virtual
|
||||
MPEG-2 AOT doesn't prohibit specific transport formats. */
|
||||
virtual void setAudioObjectType(int aot){
|
||||
enc->setAudioObjectType(aot);
|
||||
}
|
||||
|
||||
/** @brief This parameter controls the use of the afterburner feature.
|
||||
The afterburner is a type of analysis by synthesis algorithm
|
||||
which increases the audio quality but also the required
|
||||
processing power. It is recommended to always activate this if
|
||||
additional memory consumption and processing power consumption
|
||||
is not a problem. If increased MHz and memory consumption are
|
||||
an issue then the MHz and memory cost of this optional module
|
||||
need to be evaluated against the improvement in audio quality
|
||||
on a case by case basis.
|
||||
- 0: Disable afterburner (default).
|
||||
- 1: Enable afterburner. */
|
||||
virtual void setAfterburner(bool afterburner){
|
||||
enc->setAfterburner(afterburner);
|
||||
}
|
||||
|
||||
/** @brief Configure SBR independently of the chosen Audio
|
||||
Object Type ::AUDIO_OBJECT_TYPE. This parameter
|
||||
is for ELD audio object type only.
|
||||
- -1: Use ELD SBR auto configurator (default).
|
||||
- 0: Disable Spectral Band Replication.
|
||||
- 1: Enable Spectral Band Replication. */
|
||||
virtual void setSpectralBandReplication(int eld_sbr){
|
||||
enc->setSpectralBandReplication(eld_sbr);
|
||||
}
|
||||
|
||||
/** @brief Bitrate mode. Configuration can be different
|
||||
kind of bitrate configurations:
|
||||
- 0: Constant bitrate, use bitrate according
|
||||
to ::AACENC_BITRATE. (default) Within none
|
||||
LD/ELD ::AUDIO_OBJECT_TYPE, the CBR mode makes
|
||||
use of full allowed bitreservoir. In contrast,
|
||||
at Low-Delay ::AUDIO_OBJECT_TYPE the
|
||||
bitreservoir is kept very small.
|
||||
- 1: Variable bitrate mode, \ref vbrmode
|
||||
"very low bitrate".
|
||||
- 2: Variable bitrate mode, \ref vbrmode
|
||||
"low bitrate".
|
||||
- 3: Variable bitrate mode, \ref vbrmode
|
||||
"medium bitrate".
|
||||
- 4: Variable bitrate mode, \ref vbrmode
|
||||
"high bitrate".
|
||||
- 5: Variable bitrate mode, \ref vbrmode
|
||||
"very high bitrate". */
|
||||
virtual void setVariableBitrateMode(int vbr){
|
||||
enc->setVariableBitrateMode(vbr);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set the Output Buffer Size object
|
||||
*
|
||||
* @param outbuf_size
|
||||
*/
|
||||
virtual void setOutputBufferSize(int outbuf_size){
|
||||
enc->setOutputBufferSize(outbuf_size);
|
||||
}
|
||||
|
||||
/// Defines the Audio Info
|
||||
void setAudioInfo(AudioInfo from) override {
|
||||
TRACED();
|
||||
AudioEncoder::setAudioInfo(from);
|
||||
aac_fdk::AudioInfo info;
|
||||
info.channels = from.channels;
|
||||
info.sample_rate = from.sample_rate;
|
||||
info.bits_per_sample = from.bits_per_sample;
|
||||
enc->setAudioInfo(info);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Opens the encoder
|
||||
*
|
||||
* @param info
|
||||
* @return int
|
||||
*/
|
||||
virtual bool begin(AudioInfo info) override {
|
||||
TRACED();
|
||||
return enc->begin(info.channels,info.sample_rate, info.bits_per_sample);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Opens the encoder
|
||||
*
|
||||
* @param input_channels
|
||||
* @param input_sample_rate
|
||||
* @param input_bits_per_sample
|
||||
* @return int 0 => ok; error with negative number
|
||||
*/
|
||||
virtual bool begin(int input_channels=2, int input_sample_rate=44100, int input_bits_per_sample=16) {
|
||||
TRACED();
|
||||
return enc->begin(input_channels,input_sample_rate, input_bits_per_sample);
|
||||
}
|
||||
|
||||
// starts the processing
|
||||
bool begin() override {
|
||||
enc->begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
// convert PCM data to AAC
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write %d bytes", (int)len);
|
||||
return enc->write((uint8_t*)data, len);
|
||||
}
|
||||
|
||||
// release resources
|
||||
void end() override {
|
||||
TRACED();
|
||||
enc->end();
|
||||
}
|
||||
|
||||
UINT getParameter(const AACENC_PARAM param) {
|
||||
return enc->getParameter(param);
|
||||
}
|
||||
|
||||
int setParameter(AACENC_PARAM param, uint32_t value){
|
||||
return enc->setParameter(param, value);
|
||||
}
|
||||
|
||||
aac_fdk::AACEncoderFDK *driver() {
|
||||
return enc;
|
||||
}
|
||||
|
||||
const char *mime() override {
|
||||
return "audio/aac";
|
||||
}
|
||||
|
||||
operator bool() override {
|
||||
return (bool) *enc;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
aac_fdk::AACEncoderFDK *enc=nullptr;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
182
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAACHelix.h
Normal file
182
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAACHelix.h
Normal file
@@ -0,0 +1,182 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#ifndef HELIX_PRINT
|
||||
#define HELIX_PRINT
|
||||
#endif
|
||||
#include "AACDecoderHelix.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief AAC Decoder using libhelix:
|
||||
* https://github.com/pschatzmann/arduino-libhelix This is basically just a
|
||||
* simple wrapper to provide AudioInfo and AudioInfoSupport
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AACDecoderHelix : public AudioDecoder {
|
||||
public:
|
||||
AACDecoderHelix() {
|
||||
TRACED();
|
||||
aac = new libhelix::AACDecoderHelix();
|
||||
if (aac != nullptr) {
|
||||
aac->setReference(this);
|
||||
} else {
|
||||
LOGE("Not enough memory for libhelix");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief Construct a new AACDecoderMini object
|
||||
*
|
||||
* @param out_stream
|
||||
*/
|
||||
AACDecoderHelix(Print &out_stream) {
|
||||
TRACED();
|
||||
aac = new libhelix::AACDecoderHelix(out_stream);
|
||||
if (aac != nullptr) {
|
||||
aac->setReference(this);
|
||||
} else {
|
||||
LOGE("Not enough memory for libhelix");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new AACDecoderMini object. The decoded output will go to
|
||||
* the print object.
|
||||
*
|
||||
* @param out_stream
|
||||
* @param bi
|
||||
*/
|
||||
AACDecoderHelix(Print &out_stream, AudioInfoSupport &bi) {
|
||||
TRACED();
|
||||
aac = new libhelix::AACDecoderHelix(out_stream);
|
||||
if (aac != nullptr) {
|
||||
aac->setReference(this);
|
||||
} else {
|
||||
LOGE("Not enough memory for libhelix");
|
||||
}
|
||||
addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the AACDecoderMini object
|
||||
*
|
||||
*/
|
||||
~AACDecoderHelix() {
|
||||
TRACED();
|
||||
if (aac != nullptr) delete aac;
|
||||
}
|
||||
|
||||
// void setRaw(bool flag){
|
||||
// if (aac!=nullptr) aac->setRaw(flag);
|
||||
// }
|
||||
|
||||
/// Defines the output Stream
|
||||
virtual void setOutput(Print &out_stream) override {
|
||||
TRACED();
|
||||
AudioDecoder::setOutput(out_stream);
|
||||
if (aac != nullptr) aac->setOutput(out_stream);
|
||||
}
|
||||
|
||||
/// Starts the processing
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
if (aac != nullptr) {
|
||||
// aac->setDelay(CODEC_DELAY_MS);
|
||||
aac->setInfoCallback(infoCallback, this);
|
||||
aac->begin();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Releases the reserved memory
|
||||
virtual void end() override {
|
||||
TRACED();
|
||||
if (aac != nullptr) aac->end();
|
||||
}
|
||||
|
||||
virtual _AACFrameInfo audioInfoEx() { return aac->audioInfo(); }
|
||||
|
||||
AudioInfo audioInfo() override {
|
||||
AudioInfo result;
|
||||
auto i = audioInfoEx();
|
||||
if (i.nChans != 0 && i.bitsPerSample != 0 && i.sampRateOut != 0) {
|
||||
result.channels = i.nChans;
|
||||
result.sample_rate = i.sampRateOut;
|
||||
result.bits_per_sample = i.bitsPerSample;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo info) override {
|
||||
this->info = info;
|
||||
if (info_notifications_active) {
|
||||
notifyAudioChange(info);
|
||||
}
|
||||
}
|
||||
|
||||
/// Write AAC data to decoder
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("AACDecoderHelix::write: %d", (int)len);
|
||||
if (aac == nullptr) return 0;
|
||||
int open = len;
|
||||
int processed = 0;
|
||||
uint8_t *data8 = (uint8_t *)data;
|
||||
while (open > 0) {
|
||||
int act_write =
|
||||
aac->write(data8 + processed, min(open, DEFAULT_BUFFER_SIZE));
|
||||
open -= act_write;
|
||||
processed += act_write;
|
||||
}
|
||||
return processed;
|
||||
}
|
||||
|
||||
/// checks if the class is active
|
||||
virtual operator bool() override { return aac != nullptr && (bool)*aac; }
|
||||
|
||||
void flush() {
|
||||
// aac->flush();
|
||||
}
|
||||
|
||||
/// notifies the subscriber about a change
|
||||
static void infoCallback(_AACFrameInfo &i, void *ref) {
|
||||
AACDecoderHelix *p_helix = (AACDecoderHelix *)ref;
|
||||
if (p_helix != nullptr) {
|
||||
TRACED();
|
||||
AudioInfo baseInfo;
|
||||
baseInfo.channels = i.nChans;
|
||||
baseInfo.sample_rate = i.sampRateOut;
|
||||
baseInfo.bits_per_sample = i.bitsPerSample;
|
||||
// p_helix->audioChangeAACHelix->setAudioInfo(baseInfo);
|
||||
LOGW("sample_rate: %d", i.sampRateOut);
|
||||
p_helix->setAudioInfo(baseInfo);
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides the maximum frame size - this is allocated on the heap and you
|
||||
/// can reduce the heap size my minimizing this value
|
||||
size_t maxFrameSize() { return aac->maxFrameSize(); }
|
||||
|
||||
/// Define your optimized maximum frame size
|
||||
void setMaxFrameSize(size_t len) { aac->setMaxFrameSize(len); }
|
||||
|
||||
void setAudioInfoNotifications(bool active) {
|
||||
info_notifications_active = active;
|
||||
}
|
||||
|
||||
/// Provides the maximum pwm buffer size - this is allocated on the heap and
|
||||
/// you can reduce the heap size my minimizing this value
|
||||
size_t maxPCMSize() { return aac->maxPCMSize(); }
|
||||
|
||||
/// Define your optimized maximum pwm buffer size
|
||||
void setMaxPCMSize(size_t len) { aac->setMaxPCMSize(len); }
|
||||
|
||||
protected:
|
||||
libhelix::AACDecoderHelix *aac = nullptr;
|
||||
bool info_notifications_active = true;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
334
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecADPCM.h
Normal file
334
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecADPCM.h
Normal file
@@ -0,0 +1,334 @@
|
||||
#pragma once
|
||||
#include "ADPCM.h" // https://github.com/pschatzmann/adpcm
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for ADPCM. Depends on https://github.com/pschatzmann/adpcm
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ADPCMDecoder : public AudioDecoderExt {
|
||||
public:
|
||||
ADPCMDecoder() = default;
|
||||
|
||||
ADPCMDecoder(AVCodecID id, int blockSize = ADAPCM_DEFAULT_BLOCK_SIZE) {
|
||||
setBlockSize(blockSize);
|
||||
setId(id);
|
||||
}
|
||||
|
||||
/// Destructor
|
||||
~ADPCMDecoder() {
|
||||
if (p_decoder) delete p_decoder;
|
||||
}
|
||||
|
||||
// (re) defines the codec id: set the block size first
|
||||
void setId(AVCodecID id) {
|
||||
codec_id = id;
|
||||
if (p_decoder != nullptr) {
|
||||
setImplementation();
|
||||
}
|
||||
}
|
||||
|
||||
// defines the block size (= size of encoded frame)
|
||||
void setBlockSize(int blockSize) override {
|
||||
block_size = blockSize;
|
||||
if (p_decoder == nullptr) return;
|
||||
p_decoder->setBlockSize(blockSize);
|
||||
}
|
||||
|
||||
/// Provides the block size (= size of encoded frame) (only available after
|
||||
/// calling begin)
|
||||
int blockSize() {
|
||||
if (p_decoder == nullptr) return block_size;
|
||||
return p_decoder->blockSize();
|
||||
}
|
||||
|
||||
/// Provides the frame size (size of decoded frame) (only available after
|
||||
/// calling begin)
|
||||
int frameSize() {
|
||||
if (p_decoder == nullptr) return 0;
|
||||
return p_decoder->frameSize() * 2;
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
if (p_decoder == nullptr) {
|
||||
setImplementation();
|
||||
}
|
||||
if (is_started) return true;
|
||||
current_byte = 0;
|
||||
LOGI("sample_rate: %d, channels: %d", info.sample_rate, info.channels);
|
||||
p_decoder->begin(info.sample_rate, info.channels);
|
||||
LOGI("frameSize: %d", (int)frameSize());
|
||||
LOGI("blockSize: %d", (int)blockSize());
|
||||
block_size = p_decoder->blockSize();
|
||||
assert(block_size > 0);
|
||||
assert(p_decoder->frameSize() > 0);
|
||||
adpcm_block.resize(block_size);
|
||||
|
||||
notifyAudioChange(info);
|
||||
is_started = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
if (p_decoder != nullptr) p_decoder->end();
|
||||
adpcm_block.resize(0);
|
||||
is_started = false;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
TRACED();
|
||||
|
||||
uint8_t *input_buffer8 = (uint8_t *)data;
|
||||
LOGD("write: %d", (int)len);
|
||||
for (int j = 0; j < len; j++) {
|
||||
decode(input_buffer8[j]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
void flush() {
|
||||
if (p_decoder != nullptr) p_decoder->flush();
|
||||
}
|
||||
|
||||
operator bool() override { return is_started; }
|
||||
|
||||
protected:
|
||||
adpcm_ffmpeg::ADPCMDecoder *p_decoder = nullptr;
|
||||
Vector<uint8_t> adpcm_block;
|
||||
Print *p_print = nullptr;
|
||||
int current_byte = 0;
|
||||
int block_size = ADAPCM_DEFAULT_BLOCK_SIZE;
|
||||
AVCodecID codec_id = AV_CODEC_ID_ADPCM_MS;
|
||||
bool is_started = false;
|
||||
|
||||
virtual bool decode(uint8_t byte) {
|
||||
if (p_decoder == nullptr) return false;
|
||||
adpcm_block[current_byte++] = byte;
|
||||
|
||||
if (current_byte >= block_size) {
|
||||
TRACED();
|
||||
adpcm_ffmpeg::AVFrame &frame =
|
||||
p_decoder->decode(&adpcm_block[0], block_size);
|
||||
// print the result
|
||||
int16_t *data = (int16_t *)frame.data[0];
|
||||
size_t byte_count = frame.nb_samples * sizeof(int16_t) * info.channels;
|
||||
size_t written = p_print->write((uint8_t *)data, byte_count);
|
||||
if (written != byte_count) {
|
||||
LOGE("decode %d -> %d -> %d", block_size, (int)byte_count,
|
||||
(int)written);
|
||||
} else {
|
||||
LOGD("decode %d -> %d -> %d", block_size, (int)byte_count,
|
||||
(int)written);
|
||||
}
|
||||
|
||||
// restart from array begin
|
||||
current_byte = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// change the decoder implementation
|
||||
void setImplementation() {
|
||||
// delete the old decoder
|
||||
if (p_decoder != nullptr) {
|
||||
p_decoder->end();
|
||||
delete p_decoder;
|
||||
p_decoder = nullptr;
|
||||
}
|
||||
|
||||
if (codec_id == AV_CODEC_ID_ADPCM_IMA_AMV) {
|
||||
info.sample_rate = 22050;
|
||||
info.channels = 1;
|
||||
info.bits_per_sample = 16;
|
||||
}
|
||||
p_decoder = adpcm_ffmpeg::ADPCMDecoderFactory::create(codec_id);
|
||||
if (p_decoder != nullptr) {
|
||||
p_decoder->setCodecID(codec_id);
|
||||
p_decoder->setBlockSize(block_size);
|
||||
} else {
|
||||
LOGE("Decoder not implemented");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for ADPCM - Depends on https://github.com/pschatzmann/adpcm
|
||||
* @ingroup codecs
|
||||
* @ingroup p_encoder->
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ADPCMEncoder : public AudioEncoderExt {
|
||||
public:
|
||||
ADPCMEncoder() = default;
|
||||
|
||||
ADPCMEncoder(AVCodecID id, int blockSize = ADAPCM_DEFAULT_BLOCK_SIZE) {
|
||||
setId(id);
|
||||
setBlockSize(blockSize);
|
||||
}
|
||||
|
||||
/// Destructor
|
||||
~ADPCMEncoder() {
|
||||
if (p_encoder != nullptr) delete p_encoder;
|
||||
}
|
||||
|
||||
/// (re) defines the codec id
|
||||
void setId(AVCodecID id) {
|
||||
codec_id = id;
|
||||
if (p_encoder != nullptr) {
|
||||
setImplementation();
|
||||
}
|
||||
}
|
||||
|
||||
/// (re) defines the block size
|
||||
void setBlockSize(int blockSize) {
|
||||
block_size = blockSize;
|
||||
if (p_encoder == nullptr) return;
|
||||
p_encoder->setBlockSize(blockSize);
|
||||
}
|
||||
|
||||
/// Provides the block size (size of encoded frame) (only available after
|
||||
/// calling begin)
|
||||
int blockSize() override {
|
||||
if (p_encoder == nullptr) return 0;
|
||||
return p_encoder->blockSize();
|
||||
}
|
||||
|
||||
/// Provides the frame size (size of decoded frame) (only available after
|
||||
/// calling begin)
|
||||
int frameSize() {
|
||||
if (p_encoder == nullptr) return 0;
|
||||
return p_encoder->frameSize() * 2;
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
if (p_encoder == nullptr) {
|
||||
setImplementation();
|
||||
};
|
||||
if (is_started) return true;
|
||||
LOGI("sample_rate: %d, channels: %d", info.sample_rate, info.channels);
|
||||
p_encoder->begin(info.sample_rate, info.channels);
|
||||
LOGI("frameSize: %d", (int)frameSize());
|
||||
LOGI("blockSize: %d", (int)blockSize());
|
||||
assert(info.sample_rate != 0);
|
||||
assert(p_encoder->frameSize() != 0);
|
||||
total_samples = p_encoder->frameSize() * info.channels;
|
||||
pcm_block.resize(total_samples);
|
||||
current_sample = 0;
|
||||
|
||||
is_started = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
pcm_block.resize(0);
|
||||
if (p_encoder == nullptr) return;
|
||||
p_encoder->end();
|
||||
is_started = false;
|
||||
}
|
||||
|
||||
const char *mime() override { return "audio/adpcm"; }
|
||||
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
operator bool() override { return is_started; }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", (int)len);
|
||||
int16_t *data16 = (int16_t *)data;
|
||||
for (int j = 0; j < len / 2; j++) {
|
||||
encode(data16[j]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// provides the frame duration in us (for rtsp)
|
||||
virtual uint32_t frameDurationUs() override {
|
||||
if (p_encoder == nullptr || info.sample_rate == 0) {
|
||||
return 20000; // Default 20ms if not initialized
|
||||
}
|
||||
|
||||
// Get the number of samples per frame from the encoder
|
||||
int samplesPerFrame = p_encoder->frameSize();
|
||||
if (samplesPerFrame <= 0) {
|
||||
return 20000; // Default 20ms if invalid frame size
|
||||
}
|
||||
|
||||
// Calculate frame duration: (samples_per_frame / sample_rate) * 1000000 us
|
||||
uint32_t durationUs = (samplesPerFrame * 1000000) / info.sample_rate;
|
||||
return durationUs;
|
||||
}
|
||||
|
||||
protected:
|
||||
AVCodecID codec_id = AV_CODEC_ID_ADPCM_MS;
|
||||
adpcm_ffmpeg::ADPCMEncoder *p_encoder = nullptr;
|
||||
Vector<int16_t> pcm_block;
|
||||
Print *p_print = nullptr;
|
||||
bool is_started = false;
|
||||
int current_sample = 0;
|
||||
int total_samples = 0;
|
||||
int current_id = -1;
|
||||
int block_size = ADAPCM_DEFAULT_BLOCK_SIZE;
|
||||
|
||||
virtual bool encode(int16_t sample) {
|
||||
if (p_encoder == nullptr) return false;
|
||||
pcm_block[current_sample++] = sample;
|
||||
if (current_sample >= total_samples) {
|
||||
TRACED();
|
||||
adpcm_ffmpeg::AVPacket &packet =
|
||||
p_encoder->encode(&pcm_block[0], total_samples);
|
||||
if (packet.size > 0) {
|
||||
size_t written = p_print->write(packet.data, packet.size);
|
||||
if (written != packet.size) {
|
||||
LOGE("encode %d->%d->%d", 2 * total_samples, (int)packet.size,
|
||||
(int)written);
|
||||
} else {
|
||||
LOGD("encode %d->%d->%d", 2 * total_samples, (int)packet.size,
|
||||
(int)written);
|
||||
}
|
||||
}
|
||||
// restart from array begin
|
||||
current_sample = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// change the encoder implementation
|
||||
bool setImplementation() {
|
||||
bool rc = true;
|
||||
// delete the old encoder
|
||||
if (p_encoder != nullptr) {
|
||||
p_encoder->end();
|
||||
delete p_encoder;
|
||||
p_encoder = nullptr;
|
||||
}
|
||||
|
||||
if (codec_id == AV_CODEC_ID_ADPCM_IMA_AMV) {
|
||||
info.sample_rate = 22050;
|
||||
info.channels = 1;
|
||||
info.bits_per_sample = 16;
|
||||
}
|
||||
p_encoder = adpcm_ffmpeg::ADPCMEncoderFactory::create(codec_id);
|
||||
if (p_encoder != nullptr) {
|
||||
p_encoder->setCodecID(codec_id);
|
||||
p_encoder->setBlockSize(block_size);
|
||||
} else {
|
||||
LOGE("Encoder not implemented");
|
||||
rc = false;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
266
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecADPCMXQ.h
Normal file
266
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecADPCMXQ.h
Normal file
@@ -0,0 +1,266 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "adpcm-lib.h" // https://github.com/pschatzmann/arduino-adpcm-xq
|
||||
|
||||
#define DEFAULT_NOISE_SHAPING NOISE_SHAPING_OFF
|
||||
#define DEFAULT_LOOKAHEAD 0
|
||||
#define DEFAULT_BLOCKSIZE_POW2 0
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
enum class ADPCMNoiseShaping {
|
||||
AD_NOISE_SHAPING_OFF = 0, // flat noise (no shaping)
|
||||
AD_NOISE_SHAPING_STATIC = 1, // first-order highpass shaping
|
||||
AD_NOISE_SHAPING_DYNAMIC = 2
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Decoder for ADPCM-XQ. Depends on
|
||||
* https://github.com/pschatzmann/arduino-adpcm-xq
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ADPCMDecoderXQ : public AudioDecoder {
|
||||
public:
|
||||
ADPCMDecoderXQ() {
|
||||
info.sample_rate = 44100;
|
||||
info.channels = 2;
|
||||
info.bits_per_sample = 16;
|
||||
}
|
||||
|
||||
/// set bocksizes as 2^pow: range from 8 to 15
|
||||
void setBlockSizePower(int pow) {
|
||||
if (pow >= 8 && pow >= 15) {
|
||||
block_size_pow2 = pow;
|
||||
}
|
||||
}
|
||||
|
||||
/// Set look ahead bytes from 0 to 8
|
||||
void setLookahead(int value) {
|
||||
if (value <= 8) {
|
||||
lookahead = value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the noise shaping
|
||||
void setNoiseShaping(ADPCMNoiseShaping ns) { noise_shaping = (int)ns; }
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
current_byte = 0;
|
||||
if (adpcm_cnxt == nullptr) {
|
||||
adpcm_cnxt = adpcm_create_context(info.channels, lookahead, noise_shaping,
|
||||
initial_deltas);
|
||||
|
||||
if (block_size_pow2)
|
||||
block_size = 1 << block_size_pow2;
|
||||
else
|
||||
block_size = 256 * info.channels *
|
||||
(info.sample_rate < 11000 ? 1 : info.sample_rate / 11000);
|
||||
|
||||
samples_per_block =
|
||||
(block_size - info.channels * 4) * (info.channels ^ 3) + 1;
|
||||
|
||||
pcm_block.resize(samples_per_block * info.channels);
|
||||
adpcm_block.resize(block_size);
|
||||
}
|
||||
|
||||
notifyAudioChange(info);
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
if (adpcm_cnxt != nullptr) {
|
||||
adpcm_free_context(adpcm_cnxt);
|
||||
adpcm_cnxt = nullptr;
|
||||
}
|
||||
pcm_block.resize(0);
|
||||
adpcm_block.resize(0);
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() override { return adpcm_cnxt != nullptr; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
uint8_t *input_buffer8 = (uint8_t *)data;
|
||||
LOGD("write: %d", (int)len);
|
||||
for (int j = 0; j < len; j++) {
|
||||
adpcm_block[current_byte++] = input_buffer8[j];
|
||||
if (current_byte == block_size) {
|
||||
decode(current_byte);
|
||||
current_byte = 0;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
int current_byte = 0;
|
||||
void *adpcm_cnxt = nullptr;
|
||||
Vector<int16_t> pcm_block;
|
||||
Vector<uint8_t> adpcm_block;
|
||||
int32_t initial_deltas[2] = {0};
|
||||
Print *p_print = nullptr;
|
||||
int samples_per_block = 0, lookahead = DEFAULT_LOOKAHEAD,
|
||||
noise_shaping = (int)DEFAULT_NOISE_SHAPING,
|
||||
block_size_pow2 = DEFAULT_BLOCKSIZE_POW2, block_size = 0;
|
||||
|
||||
bool decode(int this_block_adpcm_samples) {
|
||||
int result = adpcm_decode_block(pcm_block.data(), adpcm_block.data(),
|
||||
block_size, info.channels);
|
||||
if (result != samples_per_block) {
|
||||
LOGE("adpcm_decode_block: %d instead %d", result,
|
||||
this_block_adpcm_samples);
|
||||
return false;
|
||||
}
|
||||
int write_size = samples_per_block * info.channels * 2;
|
||||
p_print->write((uint8_t *)pcm_block.data(), write_size);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for ADPCM-XQ - Depends on
|
||||
* https://github.com/pschatzmann/arduino-adpcm-xq
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ADPCMEncoderXQ : public AudioEncoder {
|
||||
public:
|
||||
ADPCMEncoderXQ() {
|
||||
info.sample_rate = 44100;
|
||||
info.channels = 2;
|
||||
info.bits_per_sample = 16;
|
||||
}
|
||||
|
||||
/// set bocksizes as 2^pow: range from 8 to 15
|
||||
void setBlockSizePower(int pow) {
|
||||
if (pow >= 8 && pow >= 15) {
|
||||
block_size_pow2 = pow;
|
||||
}
|
||||
}
|
||||
|
||||
/// Set look ahead bytes from 0 to 8
|
||||
void setLookahead(int value) {
|
||||
if (value <= 8) {
|
||||
lookahead = value;
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the noise shaping
|
||||
void setNoiseShaping(ADPCMNoiseShaping ns) { noise_shaping = (int)ns; }
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
|
||||
if (block_size_pow2)
|
||||
block_size = 1 << block_size_pow2;
|
||||
else
|
||||
block_size = 256 * info.channels *
|
||||
(info.sample_rate < 11000 ? 1 : info.sample_rate / 11000);
|
||||
|
||||
samples_per_block =
|
||||
(block_size - info.channels * 4) * (info.channels ^ 3) + 1;
|
||||
|
||||
pcm_block.resize(samples_per_block * info.channels);
|
||||
adpcm_block.resize(block_size);
|
||||
current_sample = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
if (adpcm_cnxt != nullptr) {
|
||||
adpcm_free_context(adpcm_cnxt);
|
||||
adpcm_cnxt = nullptr;
|
||||
}
|
||||
pcm_block.resize(0);
|
||||
adpcm_block.resize(0);
|
||||
}
|
||||
|
||||
const char *mime() override { return "audio/adpcm"; }
|
||||
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
operator bool() override { return adpcm_cnxt != nullptr; }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", (int)len);
|
||||
int16_t *input_buffer = (int16_t *)data;
|
||||
pcm_block_size = samples_per_block * info.channels;
|
||||
for (int j = 0; j < len / 2; j++) {
|
||||
pcm_block[current_sample++] = input_buffer[j];
|
||||
if (current_sample == samples_per_block * info.channels) {
|
||||
encode();
|
||||
current_sample = 0;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
int current_sample = 0;
|
||||
void *adpcm_cnxt = nullptr;
|
||||
Vector<int16_t> pcm_block;
|
||||
Vector<uint8_t> adpcm_block;
|
||||
Print *p_print = nullptr;
|
||||
int samples_per_block = 0, lookahead = DEFAULT_LOOKAHEAD,
|
||||
noise_shaping = (int)DEFAULT_NOISE_SHAPING,
|
||||
block_size_pow2 = DEFAULT_BLOCKSIZE_POW2, block_size = 0, pcm_block_size;
|
||||
bool is_first = true;
|
||||
|
||||
bool encode() {
|
||||
// if this is the first block, compute a decaying average (in reverse) so
|
||||
// that we can let the encoder know what kind of initial deltas to expect
|
||||
// (helps initializing index)
|
||||
|
||||
if (adpcm_cnxt == nullptr) {
|
||||
is_first = false;
|
||||
int32_t average_deltas[2];
|
||||
|
||||
average_deltas[0] = average_deltas[1] = 0;
|
||||
|
||||
for (int i = samples_per_block * info.channels; i -= info.channels;) {
|
||||
average_deltas[0] -= average_deltas[0] >> 3;
|
||||
average_deltas[0] +=
|
||||
abs((int32_t)pcm_block[i] - pcm_block[i - info.channels]);
|
||||
|
||||
if (info.channels == 2) {
|
||||
average_deltas[1] -= average_deltas[1] >> 3;
|
||||
average_deltas[1] +=
|
||||
abs((int32_t)pcm_block[i - 1] - pcm_block[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
average_deltas[0] >>= 3;
|
||||
average_deltas[1] >>= 3;
|
||||
|
||||
adpcm_cnxt = adpcm_create_context(info.channels, lookahead, noise_shaping,
|
||||
average_deltas);
|
||||
}
|
||||
|
||||
size_t num_bytes;
|
||||
adpcm_encode_block(adpcm_cnxt, adpcm_block.data(), &num_bytes,
|
||||
pcm_block.data(), samples_per_block);
|
||||
|
||||
if (num_bytes != block_size) {
|
||||
LOGE(
|
||||
"adpcm_encode_block() did not return expected value "
|
||||
"(expected %d, got %d)!\n",
|
||||
block_size, (int)num_bytes);
|
||||
return false;
|
||||
}
|
||||
|
||||
p_print->write(adpcm_block.data(), block_size);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
|
||||
342
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecADTS.h
Normal file
342
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecADTS.h
Normal file
@@ -0,0 +1,342 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
#ifndef SYNCWORDH
|
||||
#define SYNCWORDH 0xff
|
||||
#define SYNCWORDL 0xf0
|
||||
#endif
|
||||
|
||||
#define ERROR_FMT_CHANGE "- Invalid ADTS change: %s"
|
||||
#define ERROR_FMT "- Invalid ADTS: %s (0x%x)"
|
||||
|
||||
/**
|
||||
* @brief Structure to hold ADTS header field values
|
||||
*/
|
||||
|
||||
class ADTSParser {
|
||||
public:
|
||||
struct ADTSHeader {
|
||||
uint16_t syncword = 0;
|
||||
uint8_t id = 0;
|
||||
uint8_t layer = 0;
|
||||
uint8_t protection_absent = 0;
|
||||
uint8_t profile = 0;
|
||||
uint8_t sampling_freq_idx = 0;
|
||||
uint8_t private_bit = 0;
|
||||
uint8_t channel_cfg = 0;
|
||||
uint8_t original_copy = 0;
|
||||
uint8_t home = 0;
|
||||
uint8_t copyright_id_bit = 0;
|
||||
uint8_t copyright_id_start = 0;
|
||||
uint16_t frame_length = 0;
|
||||
uint8_t adts_buf_fullness = 0;
|
||||
uint8_t num_rawdata_blocks = 0;
|
||||
};
|
||||
|
||||
bool begin() {
|
||||
is_first = true;
|
||||
is_valid = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool parse(uint8_t *hdr) {
|
||||
header.syncword = (hdr[0] << 4) | (hdr[1] >> 4);
|
||||
// parse fixed header
|
||||
header.id = (hdr[1] >> 3) & 0b1;
|
||||
header.layer = (hdr[1] >> 1) & 0b11;
|
||||
header.protection_absent = (hdr[1]) & 0b1;
|
||||
header.profile = (hdr[2] >> 6) & 0b11;
|
||||
header.sampling_freq_idx = (hdr[2] >> 2) & 0b1111;
|
||||
header.private_bit = (hdr[2] >> 1) & 0b1;
|
||||
header.channel_cfg = ((hdr[2] & 0x01) << 2) | ((hdr[3] & 0xC0) >> 6);
|
||||
header.original_copy = (hdr[3] >> 5) & 0b1;
|
||||
header.home = (hdr[3] >> 4) & 0b1;
|
||||
// parse variable header
|
||||
header.copyright_id_bit = (hdr[3] >> 3) & 0b1;
|
||||
header.copyright_id_start = (hdr[3] >> 2) & 0b1;
|
||||
header.frame_length = ((((unsigned int)hdr[3] & 0x3)) << 11) |
|
||||
(((unsigned int)hdr[4]) << 3) | (hdr[5] >> 5);
|
||||
header.adts_buf_fullness = ((hdr[5] & 0b11111) << 6) | (hdr[6] >> 2);
|
||||
header.num_rawdata_blocks = (hdr[6]) & 0b11;
|
||||
|
||||
LOGD("id:%d layer:%d profile:%d freq:%d channel:%d frame_length:%d",
|
||||
header.id, header.layer, header.profile, getSampleRate(),
|
||||
header.channel_cfg, header.frame_length);
|
||||
|
||||
// check
|
||||
is_valid = check();
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
uint32_t getFrameLength() { return header.frame_length; };
|
||||
|
||||
void log() {
|
||||
LOGI("%s id:%d layer:%d profile:%d freq:%d channel:%d frame_length:%d",
|
||||
is_valid ? "+" : "-", header.id, header.layer, header.profile,
|
||||
getSampleRate(), header.channel_cfg, header.frame_length);
|
||||
}
|
||||
|
||||
int getSampleRate() {
|
||||
return header.sampling_freq_idx > 12
|
||||
? header.sampling_freq_idx
|
||||
: (int)adtsSamplingRates[header.sampling_freq_idx];
|
||||
}
|
||||
|
||||
bool isSyncWord(const uint8_t *buf) {
|
||||
return ((buf[0] & SYNCWORDH) == SYNCWORDH &&
|
||||
(buf[1] & SYNCWORDL) == SYNCWORDL);
|
||||
}
|
||||
|
||||
int findSyncWord(const uint8_t *buf, int nBytes, int start = 0) {
|
||||
/* find byte-aligned syncword (12 bits = 0xFFF) */
|
||||
for (int i = start; i < nBytes - 1; i++) {
|
||||
if (isSyncWord(buf + i)) return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
ADTSHeader &data() { return header; }
|
||||
|
||||
protected:
|
||||
const int adtsSamplingRates[13] = {96000, 88200, 64000, 48000, 44100,
|
||||
32000, 24000, 22050, 16000, 12000,
|
||||
11025, 8000, 7350};
|
||||
|
||||
ADTSHeader header;
|
||||
ADTSHeader header_ref;
|
||||
bool is_first = true;
|
||||
bool is_valid = false;
|
||||
|
||||
bool check() {
|
||||
if (header.syncword != 0b111111111111) {
|
||||
LOGW(ERROR_FMT, "sync", (int)header.syncword);
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.id > 6) {
|
||||
LOGW(ERROR_FMT, "id", (int)header.id);
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.sampling_freq_idx > 0xb) {
|
||||
LOGW(ERROR_FMT, "freq", (int)header.sampling_freq_idx);
|
||||
is_valid = false;
|
||||
}
|
||||
// valid value 0-7
|
||||
// if (header.channel_cfg == 0 || header.channel_cfg > 7) {
|
||||
if (header.channel_cfg > 7) {
|
||||
LOGW(ERROR_FMT, "channels", (int)header.channel_cfg);
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.frame_length > 8191) { // tymically <= 768
|
||||
LOGW(ERROR_FMT, "frame_length", (int)header.frame_length);
|
||||
is_valid = false;
|
||||
}
|
||||
// on subsequent checks we need to compare with the first header
|
||||
if (!is_first) {
|
||||
is_valid = checkRef();
|
||||
}
|
||||
if (is_valid) {
|
||||
is_first = false;
|
||||
header_ref = header;
|
||||
}
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
bool checkRef() {
|
||||
char msg[200] = "";
|
||||
bool is_valid = true;
|
||||
if (header.id != header_ref.id) {
|
||||
strcat(msg, "id ");
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.layer != header_ref.layer) {
|
||||
strcat(msg, "layer ");
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.profile != header_ref.profile) {
|
||||
strcat(msg, "profile ");
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.sampling_freq_idx != header_ref.sampling_freq_idx) {
|
||||
strcat(msg, "freq ");
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.channel_cfg != header_ref.channel_cfg) {
|
||||
strcat(msg, "channel ");
|
||||
is_valid = false;
|
||||
}
|
||||
if (header.adts_buf_fullness != header_ref.adts_buf_fullness) {
|
||||
strcat(msg, "fullness");
|
||||
is_valid = false;
|
||||
}
|
||||
if (!is_valid) {
|
||||
LOGW(ERROR_FMT_CHANGE, msg);
|
||||
}
|
||||
return is_valid;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Audio Data Transport Stream (ADTS) is a format similar to Audio Data
|
||||
* Interchange Format (ADIF), used by MPEG TS or Shoutcast to stream audio
|
||||
* defined in MPEG-2 Part 7, usually AAC. This parser extracts all valid ADTS
|
||||
* frames from the data stream ignoring other data.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ADTSDecoder : public AudioDecoder {
|
||||
public:
|
||||
ADTSDecoder() = default;
|
||||
ADTSDecoder(AudioDecoder &dec) { p_dec = &dec; };
|
||||
|
||||
bool begin() override {
|
||||
parser.begin();
|
||||
if (p_dec) p_dec->begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
parseBuffer();
|
||||
writeData(out_buffer.data(), out_buffer.available());
|
||||
out_buffer.reset();
|
||||
buffer.resize(0);
|
||||
if (p_dec) p_dec->end();
|
||||
}
|
||||
|
||||
/// Write AAC data to decoder
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGI("AACDecoderADTS::write: %d", (int)len);
|
||||
|
||||
parseBuffer();
|
||||
|
||||
// write data to buffer
|
||||
size_t result = buffer.writeArray(data, len);
|
||||
// assert(result == len);
|
||||
LOGD("buffer size: %d", buffer.available());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// checks if the class is active
|
||||
operator bool() override { return true; }
|
||||
|
||||
/// By default we write the parsed frames directly to the output:
|
||||
/// alternatively you can activate a buffer here
|
||||
void setOutputBufferSize(int size) { out_buffer.resize(size); }
|
||||
|
||||
/// Defines the parse buffer size: default is 1024
|
||||
void setParseBufferSize(int size) { buffer.resize(size); }
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
void setOutput(AudioStream &out_stream) override {
|
||||
if (p_dec) {
|
||||
p_dec->setOutput(out_stream);
|
||||
} else {
|
||||
AudioDecoder::setOutput(out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
void setOutput(AudioOutput &out_stream) override {
|
||||
if (p_dec) {
|
||||
p_dec->setOutput(out_stream);
|
||||
} else {
|
||||
AudioDecoder::setOutput(out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
void setOutput(Print &out_stream) override {
|
||||
if (p_dec) {
|
||||
p_dec->setOutput(out_stream);
|
||||
} else {
|
||||
AudioDecoder::setOutput(out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
SingleBuffer<uint8_t> buffer{DEFAULT_BUFFER_SIZE};
|
||||
SingleBuffer<uint8_t> out_buffer;
|
||||
ADTSParser parser;
|
||||
AudioDecoder *p_dec = nullptr;
|
||||
|
||||
void parseBuffer() {
|
||||
TRACED();
|
||||
|
||||
// Need at least 7 bytes for a valid ADTS header
|
||||
while (true) {
|
||||
if (buffer.available() <= 5) return;
|
||||
// Needs to contain sync word
|
||||
int syncPos = parser.findSyncWord(buffer.data(), buffer.available());
|
||||
if (syncPos < 0) {
|
||||
return;
|
||||
}
|
||||
// buffer needs to start with sync word
|
||||
if (syncPos > 0) {
|
||||
buffer.clearArray(syncPos);
|
||||
LOGI("Cleared %d bytes", syncPos);
|
||||
}
|
||||
// assert(parser.findSyncWord(buffer.data(), buffer.available()) == 0);
|
||||
// Try to parse the header
|
||||
if (parser.parse(buffer.data())) {
|
||||
// Get the frame length which includes the header
|
||||
uint16_t frameLength = parser.getFrameLength();
|
||||
if (frameLength > buffer.available()) {
|
||||
// not enough data
|
||||
return;
|
||||
}
|
||||
// write data to decoder
|
||||
if (out_buffer.size() > 0) {
|
||||
writeDataBuffered(buffer.data(), frameLength);
|
||||
} else {
|
||||
writeData(buffer.data(), frameLength);
|
||||
}
|
||||
buffer.clearArray(frameLength);
|
||||
} else {
|
||||
LOGI("Invalid ADTS header");
|
||||
// ignore data and move to next synch word
|
||||
int pos = parser.findSyncWord(buffer.data(), buffer.available(), 5);
|
||||
if (pos < 0) {
|
||||
// no more sync word found
|
||||
buffer.reset();
|
||||
} else {
|
||||
buffer.clearArray(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t writeDataBuffered(uint8_t *data, size_t size) {
|
||||
LOGI("writeDataBuffered: %d", (int)size);
|
||||
for (int j = 0; j < size; j++) {
|
||||
out_buffer.write(data[j]);
|
||||
if (out_buffer.isFull()) {
|
||||
writeData(out_buffer.data(), out_buffer.available());
|
||||
out_buffer.reset();
|
||||
}
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t writeData(uint8_t *data, size_t size) {
|
||||
LOGI("writeData: %d", (int)size);
|
||||
if (p_print) {
|
||||
size_t len = audio_tools::writeData<uint8_t>(p_print, data, size);
|
||||
assert(len == size);
|
||||
return (len == size);
|
||||
}
|
||||
if (p_dec) {
|
||||
LOGI("write to decoder: %d", (int)size);
|
||||
size_t len = audio_tools::writeDataT<uint8_t, AudioDecoder>(p_dec, data, size);
|
||||
assert(len == size);
|
||||
return (len == size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
383
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecALAC.h
Normal file
383
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecALAC.h
Normal file
@@ -0,0 +1,383 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "ALAC.h" // https://github.com/pschatzmann/codec-alac
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/// Magic Cookie
|
||||
class ALACBinaryConfig {
|
||||
public:
|
||||
void setChannels(int inNumChannels) {
|
||||
int size = (inNumChannels > 2)
|
||||
? sizeof(ALACSpecificConfig) + kChannelAtomSize +
|
||||
sizeof(ALACAudioChannelLayout)
|
||||
: sizeof(ALACSpecificConfig);
|
||||
vector.resize(size);
|
||||
}
|
||||
|
||||
uint32_t size() { return vector.size(); }
|
||||
uint8_t* data() { return vector.data(); }
|
||||
|
||||
protected:
|
||||
Vector<uint8_t> vector;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief ALAC (Apple Lossless Audio Codec) decoder. This class depends on
|
||||
* https://github.com/pschatzmann/codec-alac. This implementaion is based on
|
||||
* https://github.com/macosforge/alac
|
||||
* @note Please note that this codec usually needs a container (usually MP4):
|
||||
* The write() method expects a complete frame to be written!
|
||||
* The decoder also expects to get the config from the encoder, however we have
|
||||
* some fallback functionality that uses the AudioInfo and the frame size
|
||||
* defined in the constructor.
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
class DecoderALAC : public AudioDecoder {
|
||||
public:
|
||||
/// Default constructor: you can define your own optimized frame size
|
||||
DecoderALAC(int frameSize = kALACDefaultFrameSize) {
|
||||
// this is used when setCodecConfig() is not called with encoder info
|
||||
setFrameSize(frameSize);
|
||||
//setDefaultConfig();
|
||||
}
|
||||
|
||||
// define ALACSpecificConfig
|
||||
bool setCodecConfig(ALACSpecificConfig config) {
|
||||
return setCodecConfig((uint8_t*)&config, sizeof(config));
|
||||
}
|
||||
|
||||
/// write Magic Cookie (ALACSpecificConfig)
|
||||
bool setCodecConfig(ALACBinaryConfig cfg) {
|
||||
size_t result = setCodecConfig(cfg.data(), cfg.size());
|
||||
is_init = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
/// write Magic Cookie (ALACSpecificConfig)
|
||||
bool setCodecConfig(const uint8_t* data, size_t len) override {
|
||||
LOGI("DecoderALAC::setCodecConfig: %d", (int)len);
|
||||
// Call Init() to set up the decoder
|
||||
int32_t rc = dec.Init((void*)data, len);
|
||||
if (rc != 0) {
|
||||
LOGE("Init failed");
|
||||
return false;
|
||||
}
|
||||
LOGI("ALAC Decoder Setup - SR: %d, Channels: %d, Bits: %d, Frame Size: %d",
|
||||
(int)dec.mConfig.sampleRate, (int)dec.mConfig.numChannels,
|
||||
(int)dec.mConfig.bitDepth, (int)dec.mConfig.frameLength);
|
||||
AudioInfo tmp;
|
||||
tmp.bits_per_sample = dec.mConfig.bitDepth;
|
||||
tmp.channels = dec.mConfig.numChannels;
|
||||
tmp.sample_rate = dec.mConfig.sampleRate;
|
||||
setAudioInfo(tmp);
|
||||
is_init = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Update the global decoder info
|
||||
void setAudioInfo(AudioInfo from) override {
|
||||
AudioDecoder::setAudioInfo(from);
|
||||
dec.mConfig.sampleRate = from.sample_rate;
|
||||
dec.mConfig.numChannels = from.channels;
|
||||
dec.mConfig.bitDepth = from.bits_per_sample;
|
||||
}
|
||||
|
||||
|
||||
/// we expect the write is called for a complete frame!
|
||||
size_t write(const uint8_t* encodedFrame, size_t encodedLen) override {
|
||||
LOGD("DecoderALAC::write: %d", (int)encodedLen);
|
||||
// Make sure we have a config: we can't do this in begin because the setConfig()
|
||||
// might be called after begin()
|
||||
if (!is_init) setDefaultConfig();
|
||||
|
||||
// Make sure we have the output buffer set up
|
||||
if (result_buffer.size() != outputBufferSize()) {
|
||||
result_buffer.resize(outputBufferSize());
|
||||
}
|
||||
|
||||
// Init bit buffer
|
||||
BitBufferInit(&bits, (uint8_t*)encodedFrame, encodedLen);
|
||||
|
||||
// Decode
|
||||
uint32_t outNumSamples = 0;
|
||||
int32_t status =
|
||||
dec.Decode(&bits, result_buffer.data(), dec.mConfig.frameLength,
|
||||
dec.mConfig.numChannels, &outNumSamples);
|
||||
|
||||
if (status != 0) {
|
||||
LOGE("Decode failed with error: %d", status);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Process result
|
||||
size_t outputSize =
|
||||
outNumSamples * dec.mConfig.numChannels * dec.mConfig.bitDepth / 8;
|
||||
LOGI("DecoderALAC::write-pcm: %d", (int)outputSize);
|
||||
|
||||
// Output the result in chunks of 1k
|
||||
int open = outputSize;
|
||||
int processed = 0;
|
||||
while (open > 0) {
|
||||
int writeSize = MIN(1024, open);
|
||||
size_t written =
|
||||
p_print->write(result_buffer.data() + processed, writeSize);
|
||||
if (writeSize != written) {
|
||||
LOGE("write error: %d -> %d", (int)outputSize, (int)written);
|
||||
}
|
||||
open -= written;
|
||||
processed += written;
|
||||
}
|
||||
return encodedLen;
|
||||
}
|
||||
|
||||
operator bool() { return true; }
|
||||
|
||||
/// Set the default frame size: this will be overwritten if you call
|
||||
/// setCodecConfig()
|
||||
void setFrameSize(int frames) { dec.mConfig.frameLength = frames; }
|
||||
|
||||
/// Provides the actual frame size
|
||||
int frameSize() { return dec.mConfig.frameLength; }
|
||||
|
||||
protected:
|
||||
ALACDecoder dec;
|
||||
Vector<uint8_t> result_buffer;
|
||||
bool is_init = false;
|
||||
struct BitBuffer bits;
|
||||
|
||||
void setDefaultConfig() {
|
||||
// LOGW("Setting up default ALAC config")
|
||||
AudioInfo info = audioInfo();
|
||||
ALACSpecificConfig tmp;
|
||||
// Essential parameters for ALAC compression
|
||||
tmp.frameLength = frameSize();
|
||||
tmp.compatibleVersion = 0;
|
||||
tmp.bitDepth = info.bits_per_sample;
|
||||
tmp.pb = 40; // Rice parameter limit
|
||||
tmp.mb = 10; // Maximum prefix length for Rice coding
|
||||
tmp.kb = 14; // History multiplier
|
||||
tmp.numChannels = info.channels;
|
||||
tmp.maxRun = 255; // Maximum run length supported
|
||||
tmp.avgBitRate = 0;
|
||||
|
||||
tmp.sampleRate = info.sample_rate;
|
||||
|
||||
// Calculate max frame bytes - must account for:
|
||||
// 1. Uncompressed frame size
|
||||
// 2. ALAC frame headers
|
||||
// 3. Potential compression inefficiency
|
||||
uint32_t bytesPerSample = info.bits_per_sample / 8;
|
||||
uint32_t uncompressedFrameSize =
|
||||
frameSize() * info.channels * bytesPerSample;
|
||||
|
||||
// Add safety margins:
|
||||
// - ALAC header (~50 bytes)
|
||||
// - Worst case compression overhead (50%)
|
||||
// - Alignment padding (64 bytes)
|
||||
tmp.maxFrameBytes =
|
||||
uncompressedFrameSize + (uncompressedFrameSize / 2) + 64 + 50;
|
||||
|
||||
convertToNetworkFormat(tmp);
|
||||
setCodecConfig(tmp);
|
||||
}
|
||||
|
||||
/// Calculate the output buffer size based on the current configuration
|
||||
int outputBufferSize() {
|
||||
return dec.mConfig.frameLength * dec.mConfig.numChannels *
|
||||
dec.mConfig.bitDepth / 8;
|
||||
}
|
||||
|
||||
/// Convert to big endian so that we can use it in Init()
|
||||
void convertToNetworkFormat(ALACSpecificConfig& config) {
|
||||
config.frameLength = Swap32NtoB(config.frameLength);
|
||||
config.maxRun = Swap16NtoB((uint16_t)config.maxRun);
|
||||
config.maxFrameBytes = Swap32NtoB(config.maxFrameBytes);
|
||||
config.avgBitRate = Swap32NtoB(config.avgBitRate);
|
||||
config.sampleRate = Swap32NtoB(config.sampleRate);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief ALAC (Apple Lossless Audio Codec) encoder. This class is responsible
|
||||
* for encoding audio data into ALAC format.
|
||||
* The implementaion is based on https://github.com/macosforge/alac
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
class EncoderALAC : public AudioEncoder {
|
||||
public:
|
||||
/// Default constructor: you can define your own optimized frame size
|
||||
EncoderALAC(int frameSize = kALACDefaultFrameSize) {
|
||||
setFrameSize(frameSize);
|
||||
}
|
||||
void setOutput(Print& out_stream) override { p_print = &out_stream; };
|
||||
|
||||
bool begin() override {
|
||||
if (p_print == nullptr) {
|
||||
LOGE("No output stream set");
|
||||
return false;
|
||||
}
|
||||
// define input format
|
||||
input_format = getInputFormat();
|
||||
out_format = getOutputFormat();
|
||||
|
||||
// Setup Encoder
|
||||
enc.SetFrameSize(frame_size);
|
||||
int rc = enc.InitializeEncoder(out_format);
|
||||
|
||||
// Calculate exact buffer sizes based on frame settings
|
||||
uint32_t bytesPerSample = info.bits_per_sample / 8;
|
||||
uint32_t inputBufferSize = frame_size * info.channels * bytesPerSample;
|
||||
// Calculate output buffer size
|
||||
uint32_t outputBufferSize = inputBufferSize * 2; // Ensure enough space
|
||||
|
||||
LOGI(
|
||||
"ALAC Encoder: frame_size=%d, inputBuf=%d, outputBuf=%d, channels=%d, "
|
||||
"bits=%d",
|
||||
frame_size, inputBufferSize, outputBufferSize, info.channels,
|
||||
info.bits_per_sample);
|
||||
|
||||
in_buffer.resize(inputBufferSize);
|
||||
out_buffer.resize(outputBufferSize);
|
||||
is_started = rc == 0;
|
||||
return is_started;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
enc.Finish();
|
||||
is_started = false;
|
||||
}
|
||||
|
||||
/// Encode the audio samples into ALAC format
|
||||
size_t write(const uint8_t* data, size_t len) override {
|
||||
if (!is_started) return 0;
|
||||
LOGD("EncoderALAC::write: %d", (int)len);
|
||||
for (int j = 0; j < len; j++) {
|
||||
in_buffer.write(data[j]);
|
||||
if (in_buffer.isFull()) {
|
||||
// provide available encoded data length
|
||||
int32_t ioNumBytes = in_buffer.size();
|
||||
int rc = enc.Encode(input_format, out_format, (uint8_t*)in_buffer.data(),
|
||||
out_buffer.data(), &ioNumBytes);
|
||||
// Output encoded data
|
||||
size_t written = p_print->write(out_buffer.data(), ioNumBytes);
|
||||
if (ioNumBytes != written) {
|
||||
LOGE("write error: %d -> %d", (int)ioNumBytes, (int)written);
|
||||
}
|
||||
in_buffer.reset();
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Provide the configuration of the encoder
|
||||
ALACSpecificConfig config() {
|
||||
enc.GetConfig(cfg);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
/// Provide the magic coookie for the decoder
|
||||
ALACBinaryConfig& binaryConfig() {
|
||||
bin.setChannels(info.channels);
|
||||
uint32_t size = bin.size();
|
||||
enc.GetMagicCookie(bin.data(), &size);
|
||||
return bin;
|
||||
}
|
||||
|
||||
/// Check if the encoder is ready to encode
|
||||
operator bool() { return is_started && p_print != nullptr; }
|
||||
|
||||
/// Mime type: returns audio/alac
|
||||
const char* mime() override { return "audio/alac"; }
|
||||
|
||||
/// Defines if the encoder should use fast mode
|
||||
void setFastMode(bool fast) {
|
||||
enc.SetFastMode(fast);
|
||||
}
|
||||
|
||||
/// Defines the frame size for the decoder: default is 4096 frames
|
||||
void setFrameSize(int frames) {
|
||||
if (is_started) {
|
||||
LOGE("Can't change frame size on started encoder")
|
||||
return;
|
||||
}
|
||||
frame_size = frames;
|
||||
}
|
||||
|
||||
/// Determins the actually defined number of frames
|
||||
int frameSize() { return frame_size; }
|
||||
|
||||
protected:
|
||||
int frame_size = kALACDefaultFrameSize;
|
||||
ALACEncoder enc;
|
||||
SingleBuffer<uint8_t> in_buffer;
|
||||
Vector<uint8_t> out_buffer;
|
||||
AudioFormatDescription input_format;
|
||||
AudioFormatDescription out_format;
|
||||
ALACSpecificConfig cfg;
|
||||
ALACBinaryConfig bin;
|
||||
Print* p_print = nullptr;
|
||||
bool is_started = false;
|
||||
|
||||
AudioFormatDescription getInputFormat() {
|
||||
AudioFormatDescription result;
|
||||
memset(&result, 0, sizeof(AudioFormatDescription));
|
||||
result.mSampleRate = info.sample_rate;
|
||||
result.mFormatID = kALACFormatLinearPCM;
|
||||
result.mFormatFlags =
|
||||
kALACFormatFlagIsSignedInteger |
|
||||
kALACFormatFlagIsPacked; // Native endian, signed integer
|
||||
result.mBytesPerPacket = info.channels * (info.bits_per_sample / 8);
|
||||
result.mFramesPerPacket = 1;
|
||||
result.mBytesPerFrame = info.channels * (info.bits_per_sample / 8);
|
||||
result.mChannelsPerFrame = info.channels;
|
||||
result.mBitsPerChannel = info.bits_per_sample;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
AudioFormatDescription getOutputFormat() {
|
||||
AudioFormatDescription result;
|
||||
memset(&result, 0, sizeof(AudioFormatDescription));
|
||||
result.mSampleRate = info.sample_rate;
|
||||
result.mFormatID = kALACCodecFormat;
|
||||
result.mFormatFlags = getOutputFormatFlags(info.bits_per_sample); // or 0 ?
|
||||
result.mBytesPerPacket = 0; // Variable for compressed format
|
||||
result.mFramesPerPacket = frame_size; // Common ALAC frame size
|
||||
result.mBytesPerFrame = 0; // Variable for compressed format
|
||||
result.mChannelsPerFrame = info.channels;
|
||||
result.mBitsPerChannel = info.bits_per_sample;
|
||||
return result;
|
||||
}
|
||||
|
||||
// Adapted from CoreAudioTypes.h
|
||||
enum {
|
||||
kFormatFlag_16BitSourceData = 1,
|
||||
kFormatFlag_20BitSourceData = 2,
|
||||
kFormatFlag_24BitSourceData = 3,
|
||||
kFormatFlag_32BitSourceData = 4
|
||||
};
|
||||
|
||||
uint32_t getOutputFormatFlags(uint32_t bits) {
|
||||
switch (bits) {
|
||||
case 16:
|
||||
return kFormatFlag_16BitSourceData;
|
||||
case 20:
|
||||
return kFormatFlag_20BitSourceData;
|
||||
case 24:
|
||||
return kFormatFlag_24BitSourceData;
|
||||
case 32:
|
||||
return kFormatFlag_32BitSourceData;
|
||||
break;
|
||||
default:
|
||||
LOGE("Unsupported bit depth: %d", bits);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
174
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAMRNB.h
Normal file
174
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAMRNB.h
Normal file
@@ -0,0 +1,174 @@
|
||||
#pragma once
|
||||
#include "AMRNB.h" // https://github.com/pschatzmann/codec-amr
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief AMR Narrowband Decoder
|
||||
* See https://github.com/pschatzmann/codec-amr
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class AMRNBDecoder : public AudioDecoder {
|
||||
public:
|
||||
/// Default Constructor with valid mode values:
|
||||
/// NB_475,NB_515,NB_59,NB_67,NB_74,NB_795,NB_102,NB_122 (e.g.
|
||||
/// AMRNB::Mode::NB_475)
|
||||
AMRNBDecoder(AMRNB::Mode mode) {
|
||||
setMode(mode);
|
||||
info.channels = 1;
|
||||
info.sample_rate = 8000;
|
||||
}
|
||||
|
||||
~AMRNBDecoder() override = default;
|
||||
|
||||
void setMode(AMRNB::Mode mode) {
|
||||
this->mode = mode;
|
||||
amr.setMode(mode);
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
notifyAudioChange(audioInfo());
|
||||
buffer.resize(amr.getEncodedFrameSizeBytes());
|
||||
return getOutput() != nullptr;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo from) {
|
||||
if (from.bits_per_sample != 16) {
|
||||
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
|
||||
}
|
||||
if (from.sample_rate != 8000) {
|
||||
LOGE("Invalid sample rate: %d", from.sample_rate);
|
||||
}
|
||||
if (from.channels != 1) {
|
||||
LOGE("Invalid channels: %d", from.channels);
|
||||
}
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
buffer.write(data[j]);
|
||||
if (buffer.isFull()) {
|
||||
int result_samples = amr.getFrameSizeSamples();
|
||||
int16_t result[result_samples];
|
||||
int size =
|
||||
amr.decode(buffer.data(), buffer.size(), result, result_samples);
|
||||
if (size > 0) {
|
||||
if (getOutput() != nullptr) {
|
||||
getOutput()->write((uint8_t *)result, size * sizeof(int16_t));
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Provides the block size (size of encoded frame)
|
||||
int blockSize() {
|
||||
amr.setMode(mode);
|
||||
return amr.getEncodedFrameSizeBytes();
|
||||
}
|
||||
|
||||
/// Provides the frame size (size of decoded frame)
|
||||
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
|
||||
|
||||
operator bool() override { return getOutput() != nullptr; }
|
||||
|
||||
protected:
|
||||
AMRNB amr;
|
||||
AMRNB::Mode mode;
|
||||
SingleBuffer<uint8_t> buffer{0};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief AMR NB Encoder
|
||||
* See https://github.com/pschatzmann/codec-amr
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class AMRNBEncoder : public AudioEncoder {
|
||||
public:
|
||||
/// Default Constructor with valid mode values:
|
||||
/// NB_475,NB_515,NB_59,NB_67,NB_74,NB_795,NB_102,NB_122 (e.g.
|
||||
/// AMRNB::Mode::NB_475) AMRNBDecoder(AMRNB::Mode mode) {
|
||||
AMRNBEncoder(AMRNB::Mode mode) {
|
||||
setMode(mode);
|
||||
info.channels = 1;
|
||||
info.sample_rate = 8000;
|
||||
}
|
||||
|
||||
~AMRNBEncoder() override = default;
|
||||
|
||||
void setMode(AMRNB::Mode mode) {
|
||||
this->mode = mode;
|
||||
amr.setMode(mode);
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
buffer.resize(frameSize());
|
||||
return getOutput() != nullptr;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo from) {
|
||||
if (from.bits_per_sample != 16) {
|
||||
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
|
||||
}
|
||||
if (from.sample_rate != 8000) {
|
||||
LOGE("Invalid sample rate: %d", from.sample_rate);
|
||||
}
|
||||
if (from.channels != 1) {
|
||||
LOGE("Invalid channels: %d", from.channels);
|
||||
}
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
buffer.write(data[j]);
|
||||
if (buffer.isFull()) {
|
||||
int result_bytes = blockSize();
|
||||
uint8_t result[result_bytes];
|
||||
int size =
|
||||
amr.encode((int16_t *)buffer.data(),
|
||||
buffer.size() / sizeof(int16_t), result, result_bytes);
|
||||
if (size > 0) {
|
||||
if (getOutput() != nullptr) {
|
||||
getOutput()->write(result, size);
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Provides the block size (size of encoded frame)
|
||||
int blockSize() {
|
||||
amr.setMode(mode);
|
||||
return amr.getEncodedFrameSizeBytes();
|
||||
}
|
||||
|
||||
/// Provides the frame size (size of decoded frame)
|
||||
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
|
||||
|
||||
const char *mime() { return "audio/amr"; }
|
||||
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
Print *getOutput() { return p_print; }
|
||||
|
||||
protected:
|
||||
AMRNB amr;
|
||||
AMRNB::Mode mode;
|
||||
SingleBuffer<uint8_t> buffer{0};
|
||||
Print *p_print = nullptr;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
169
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAMRWB.h
Normal file
169
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAMRWB.h
Normal file
@@ -0,0 +1,169 @@
|
||||
#pragma once
|
||||
#include "AMRWB.h" // https://github.com/pschatzmann/codec-amr
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief AMR Wideband Decoder
|
||||
* See https://github.com/pschatzmann/codec-amr
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class AMRWBDecoder : public AudioDecoder {
|
||||
public:
|
||||
/// Default constructor with valid mode values: WB_6_60,WB_8_85,WB_12_65,WB_14_25,
|
||||
/// WB_15_85,WB_18_25,WB_19_85,WB_23_05,WB_23_85 (e.g. AMRWB::Mode::WB_6_60)
|
||||
AMRWBDecoder(AMRWB::Mode mode) {
|
||||
setMode(mode);
|
||||
info.channels = 1;
|
||||
info.sample_rate = 16000;
|
||||
}
|
||||
|
||||
~AMRWBDecoder() override = default;
|
||||
|
||||
bool begin() {
|
||||
notifyAudioChange(audioInfo());
|
||||
buffer.resize(amr.getEncodedFrameSizeBytes());
|
||||
return getOutput() != nullptr;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo from) {
|
||||
if (from.bits_per_sample != 16) {
|
||||
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
|
||||
}
|
||||
if (from.sample_rate != 8000) {
|
||||
LOGE("Invalid sample rate: %d", from.sample_rate);
|
||||
}
|
||||
if (from.channels != 1) {
|
||||
LOGE("Invalid channels: %d", from.channels);
|
||||
}
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
buffer.write(data[j]);
|
||||
if (buffer.isFull()) {
|
||||
int result_samples = amr.getFrameSizeSamples();
|
||||
int16_t result[result_samples];
|
||||
int size =
|
||||
amr.decode(buffer.data(), buffer.size(), result, result_samples);
|
||||
if (size > 0) {
|
||||
if (getOutput() != nullptr) {
|
||||
getOutput()->write((uint8_t *)result, size * sizeof(int16_t));
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Provides the block size (size of encoded frame)
|
||||
int blickSize() { return amr.getEncodedFrameSizeBytes(); }
|
||||
|
||||
/// Provides the frame size (size of decoded frame)
|
||||
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
|
||||
|
||||
void setMode(AMRWB::Mode mode) {
|
||||
this->mode = mode;
|
||||
amr.setMode(mode);
|
||||
}
|
||||
|
||||
operator bool() override { return getOutput() != nullptr; }
|
||||
|
||||
protected:
|
||||
AMRWB amr;
|
||||
AMRWB::Mode mode;
|
||||
SingleBuffer<uint8_t> buffer{0};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief AMR Wideband Encoder
|
||||
* See https://github.com/pschatzmann/codec-amr
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class AMRWBEncoder : public AudioEncoder {
|
||||
public:
|
||||
/// Default constructor with valid mode values: WB_6_60,WB_8_85,WB_12_65,WB_14_25,
|
||||
/// WB_15_85,WB_18_25,WB_19_85,WB_23_05,WB_23_85 (e.g. AMRWB::Mode::WB_6_60)
|
||||
AMRWBEncoder(AMRWB::Mode mode) {
|
||||
setMode(mode);
|
||||
info.channels = 1;
|
||||
info.sample_rate = 16000;
|
||||
}
|
||||
|
||||
~AMRWBEncoder() override = default;
|
||||
|
||||
void setMode(AMRWB::Mode mode) {
|
||||
this->mode = mode;
|
||||
amr.setMode(mode);
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
buffer.resize(frameSize());
|
||||
return getOutput() != nullptr;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo from) {
|
||||
if (from.bits_per_sample != 16) {
|
||||
LOGE("Invalid bits per sample: %d", from.bits_per_sample);
|
||||
}
|
||||
if (from.sample_rate != 8000) {
|
||||
LOGE("Invalid sample rate: %d", from.sample_rate);
|
||||
}
|
||||
if (from.channels != 1) {
|
||||
LOGE("Invalid channels: %d", from.channels);
|
||||
}
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
buffer.write(data[j]);
|
||||
if (buffer.isFull()) {
|
||||
int result_bytes = blockSize();
|
||||
uint8_t result[result_bytes];
|
||||
int size =
|
||||
amr.encode((int16_t *)buffer.data(),
|
||||
buffer.size() / sizeof(int16_t), result, result_bytes);
|
||||
if (size > 0) {
|
||||
if (getOutput() != nullptr) {
|
||||
getOutput()->write(result, size);
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Provides the block size (size of encoded frame)
|
||||
int blockSize() {
|
||||
amr.setMode(mode);
|
||||
return amr.getEncodedFrameSizeBytes();
|
||||
}
|
||||
|
||||
/// Provides the frame size (size of decoded frame)
|
||||
int frameSize() { return amr.getFrameSizeSamples() * sizeof(int16_t); }
|
||||
|
||||
const char *mime() { return "audio/amr"; }
|
||||
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
Print *getOutput() { return p_print; }
|
||||
|
||||
protected:
|
||||
AMRWB amr;
|
||||
AMRWB::Mode mode;
|
||||
SingleBuffer<uint8_t> buffer{0};
|
||||
Print *p_print = nullptr;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
300
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAPTX.h
Normal file
300
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecAPTX.h
Normal file
@@ -0,0 +1,300 @@
|
||||
/**
|
||||
* @file CodecAptx.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief Codec for aptx using https://github.com/pschatzmann/libopenaptx
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*
|
||||
* @copyright Copyright (c) 2022
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "openaptx.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for OpenAptx. Depends on
|
||||
* https://github.com/pschatzmann/libopenaptx
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class APTXDecoder : public AudioDecoder {
|
||||
public:
|
||||
APTXDecoder(bool isHd = false) {
|
||||
is_hd = isHd;
|
||||
info.sample_rate = 44100;
|
||||
info.channels = 2;
|
||||
info.bits_per_sample = isHd ? 24 : 16;
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
ctx = aptx_init(is_hd);
|
||||
is_first_write = true;
|
||||
notifyAudioChange(info);
|
||||
return ctx != nullptr;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
bool dropped = aptx_decode_sync_finish(ctx);
|
||||
aptx_finish(ctx);
|
||||
ctx = nullptr;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return ctx != nullptr; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGI("write: %d", len);
|
||||
bool is_ok = true;
|
||||
size_t dropped;
|
||||
int synced;
|
||||
|
||||
if (is_first_write) {
|
||||
is_first_write = false;
|
||||
if (!checkPrefix(data, len)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
output_buffer.resize(len * 10);
|
||||
memset(output_buffer.data(), 0, output_buffer.size());
|
||||
processed = aptx_decode_sync(ctx, (const uint8_t *)data, len,
|
||||
output_buffer.data(), output_buffer.size(),
|
||||
&written, &synced, &dropped);
|
||||
|
||||
checkSync(synced, dropped, is_ok);
|
||||
|
||||
// If we have not decoded all supplied samples then decoding unrecoverable
|
||||
// failed
|
||||
if (processed != len) {
|
||||
LOGE("aptX decoding reqested: %d eff: %d", len, processed);
|
||||
is_ok = false;
|
||||
}
|
||||
|
||||
writeData(written, is_ok);
|
||||
|
||||
return is_ok ? len : 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct aptx_context *ctx = nullptr;
|
||||
Print *p_print = nullptr;
|
||||
bool is_first_write = true;
|
||||
Vector<uint8_t> output_buffer;
|
||||
bool is_hd;
|
||||
size_t processed;
|
||||
size_t written;
|
||||
bool syncing;
|
||||
|
||||
/// Converts the data to 16 bit and writes it to final output
|
||||
void writeData(size_t written, bool &is_ok) {
|
||||
if (written > 0) {
|
||||
int samples = written / 3;
|
||||
LOGI("written: %d", written);
|
||||
LOGI("samples: %d", samples);
|
||||
int24_t *p_int24 = (int24_t *)output_buffer.data();
|
||||
int16_t *p_int16 = (int16_t *)output_buffer.data();
|
||||
for (int j = 0; j < samples; j++) {
|
||||
p_int16[j] = p_int24[j].getAndScale16();
|
||||
}
|
||||
|
||||
if (p_print->write((uint8_t *)output_buffer.data(), samples * 2) !=
|
||||
samples * 2) {
|
||||
LOGE("aptX decoding failed to write decoded data");
|
||||
is_ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks the syncronization
|
||||
void checkSync(bool synced, bool dropped, bool &is_ok) {
|
||||
/* Check all possible states of synced, syncing and dropped status */
|
||||
if (!synced) {
|
||||
if (!syncing) {
|
||||
LOGE("aptX decoding failed, synchronizing");
|
||||
syncing = true;
|
||||
is_ok = false;
|
||||
}
|
||||
if (dropped) {
|
||||
LOGE("aptX synchronization successful, dropped %lu byte%s",
|
||||
(unsigned long)dropped, (dropped != 1) ? "s" : "");
|
||||
syncing = false;
|
||||
is_ok = true;
|
||||
}
|
||||
if (!syncing) {
|
||||
LOGE("aptX decoding failed, synchronizing");
|
||||
syncing = true;
|
||||
is_ok = false;
|
||||
}
|
||||
} else {
|
||||
if (dropped) {
|
||||
if (!syncing) LOGE("aptX decoding failed, synchronizing");
|
||||
LOGE("aptX synchronization successful, dropped %lu byte%s",
|
||||
(unsigned long)dropped, (dropped != 1) ? "s" : "");
|
||||
syncing = false;
|
||||
is_ok = false;
|
||||
} else if (syncing) {
|
||||
LOGI("aptX synchronization successful");
|
||||
syncing = false;
|
||||
is_ok = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks the prefix of the received data
|
||||
bool checkPrefix(const void *input_buffer, size_t length) {
|
||||
bool result = true;
|
||||
if (length >= 4 && memcmp(input_buffer, "\x4b\xbf\x4b\xbf", 4) == 0) {
|
||||
if (is_hd) {
|
||||
LOGE("aptX audio stream (not aptX HD)");
|
||||
result = false;
|
||||
}
|
||||
} else if (length >= 6 &&
|
||||
memcmp(input_buffer, "\x73\xbe\xff\x73\xbe\xff", 6) == 0) {
|
||||
if (!is_hd) {
|
||||
LOGE("aptX HD audio stream");
|
||||
result = false;
|
||||
}
|
||||
} else {
|
||||
if (length >= 4 && memcmp(input_buffer, "\x6b\xbf\x6b\xbf", 4) == 0) {
|
||||
LOGE("standard aptX audio stream - not supported");
|
||||
result = false;
|
||||
} else {
|
||||
LOGE("No aptX nor aptX HD audio stream");
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for OpenAptx - Depends on
|
||||
* https://github.com/pschatzmann/libopenaptx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class APTXEncoder : public AudioEncoder {
|
||||
public:
|
||||
APTXEncoder(bool isHd = false) {
|
||||
is_hd = isHd;
|
||||
info.sample_rate = 44100;
|
||||
info.channels = 2;
|
||||
info.bits_per_sample = isHd ? 24 : 16;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
input_buffer.resize(4 * 2);
|
||||
output_buffer.resize(100 * (is_hd ? 6 : 4));
|
||||
|
||||
LOGI("input_buffer.size: %d", input_buffer.size());
|
||||
LOGI("output_buffer.size: %d", output_buffer.size());
|
||||
LOGI("is_hd: %s", is_hd ? "true" : "false");
|
||||
ctx = aptx_init(is_hd);
|
||||
return ctx!=nullptr;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
if (ctx != nullptr) {
|
||||
size_t output_written = 0;
|
||||
aptx_encode_finish(ctx, output_buffer.data(), output_buffer.size(),
|
||||
&output_written);
|
||||
if (output_written > 0) {
|
||||
// write result to final output
|
||||
int written = p_print->write((const uint8_t *)output_buffer.data(),
|
||||
output_written);
|
||||
if (written != output_written) {
|
||||
LOGE("write requested: %d eff: %d", output_written, written);
|
||||
}
|
||||
}
|
||||
aptx_finish(ctx);
|
||||
ctx = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
virtual const char *mime() { return "audio/aptx"; }
|
||||
|
||||
virtual void setAudioInfo(AudioInfo info) {
|
||||
AudioEncoder::setAudioInfo(info);
|
||||
switch (info.bits_per_sample) {
|
||||
case 16:
|
||||
is_hd = false;
|
||||
break;
|
||||
case 24:
|
||||
is_hd = true;
|
||||
break;
|
||||
default:
|
||||
LOGE("invalid bits_per_sample: %d", info.bits_per_sample);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return ctx != nullptr; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGI("write: %d", len);
|
||||
if (ctx == nullptr) return 0;
|
||||
size_t output_written = 0;
|
||||
|
||||
// process all bytes
|
||||
int16_t *in_ptr16 = (int16_t *)data;
|
||||
int in_samples = len / 2;
|
||||
for (int j = 0; j < in_samples; j++) {
|
||||
input_buffer[input_pos++].setAndScale16(in_ptr16[j]);
|
||||
|
||||
// if input_buffer is full we encode
|
||||
if (input_pos >= input_buffer.size()) {
|
||||
size_t result = aptx_encode(
|
||||
ctx, (const uint8_t *)input_buffer.data(), input_buffer.size() * 3,
|
||||
output_buffer.data() + output_pos,
|
||||
output_buffer.size() - output_pos, &output_written);
|
||||
|
||||
output_pos += output_written;
|
||||
|
||||
if (result != input_buffer.size() * 3) {
|
||||
LOGW("encode requested: %d, eff: %d", input_buffer.size() * 3,
|
||||
result);
|
||||
}
|
||||
|
||||
// if output buffer is full we write the result
|
||||
if (output_pos + output_pos >= output_buffer.size()) {
|
||||
int written =
|
||||
p_print->write((const uint8_t *)output_buffer.data(), output_pos);
|
||||
if (written != output_pos) {
|
||||
LOGE("write requested: %d eff: %d", output_pos, written);
|
||||
}
|
||||
// restart at beginning of output buffer
|
||||
output_pos = 0;
|
||||
}
|
||||
// restart at beginning of input buffer
|
||||
input_pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool is_hd;
|
||||
Vector<int24_t> input_buffer{4 * 2};
|
||||
Vector<uint8_t> output_buffer;
|
||||
int input_pos = 0;
|
||||
int output_pos = 0;
|
||||
Print *p_print = nullptr;
|
||||
struct aptx_context *ctx = nullptr;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
301
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecBase64.h
Normal file
301
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecBase64.h
Normal file
@@ -0,0 +1,301 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
|
||||
enum Base46Logic { NoCR, CRforFrame, CRforWrite };
|
||||
static char encoding_table[] = {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
|
||||
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
|
||||
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'};
|
||||
static int mod_table[] = {0, 2, 1};
|
||||
|
||||
static const int B64index[256] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 62, 63, 62, 62, 63, 52, 53, 54, 55, 56, 57,
|
||||
58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6,
|
||||
7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
|
||||
25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
|
||||
37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
|
||||
|
||||
/**
|
||||
* @brief DecoderBase64 - Converts a Base64 encoded Stream into the original
|
||||
* data stream. Decoding only gives a valid result if we start at a limit of 4
|
||||
* bytes. We therefore use by default a newline to determine a valid start
|
||||
* boundary.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class DecoderBase64 : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor for a new DecoderBase64 object
|
||||
*/
|
||||
|
||||
DecoderBase64() { TRACED(); }
|
||||
|
||||
/**
|
||||
* @brief Constructor for a new DecoderBase64 object
|
||||
*
|
||||
* @param out_buffeream Output Stream to which we write the decoded result
|
||||
*/
|
||||
DecoderBase64(Print &out) {
|
||||
TRACED();
|
||||
setOutput(out);
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out) override { p_print = &out; }
|
||||
|
||||
/// We expect new lines to delimit the individual lines
|
||||
void setNewLine(Base46Logic logic) { newline_logic = logic; }
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
is_valid = newline_logic == NoCR;
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACED();
|
||||
// deconde ramaining bytes
|
||||
int len = buffer.available();
|
||||
uint8_t tmp[len];
|
||||
buffer.readArray(tmp, len);
|
||||
decodeLine(tmp, len);
|
||||
|
||||
active = false;
|
||||
buffer.resize(0);
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
if (p_print == nullptr) return 0;
|
||||
TRACED();
|
||||
addToBuffer((uint8_t *)data, len);
|
||||
int decode_size = 4; // maybe we should increase this ?
|
||||
while (buffer.available() >= decode_size) {
|
||||
uint8_t tmp[decode_size];
|
||||
buffer.readArray(tmp, decode_size);
|
||||
decodeLine(tmp, decode_size);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
operator bool() override { return active; }
|
||||
|
||||
protected:
|
||||
bool active = false;
|
||||
bool is_valid = false;
|
||||
Base46Logic newline_logic = CRforFrame;
|
||||
Vector<uint8_t> result;
|
||||
RingBuffer<uint8_t> buffer{1500};
|
||||
AudioInfo info;
|
||||
|
||||
void decodeLine(uint8_t *data, size_t byteCount) {
|
||||
LOGD("decode: %d", (int)byteCount);
|
||||
int len = byteCount;
|
||||
|
||||
unsigned char *p = (unsigned char *)data;
|
||||
int pad = len > 0 && (len % 4 || p[len - 1] == '=');
|
||||
const size_t L = ((len + 3) / 4 - pad) * 4;
|
||||
result.resize(L / 4 * 3 + pad);
|
||||
memset(result.data(), 0, result.size());
|
||||
|
||||
for (size_t i = 0, j = 0; i < L; i += 4) {
|
||||
int32_t n = static_cast<int32_t>(B64index[p[i]]) << 18 | B64index[p[i + 1]] << 12 |
|
||||
B64index[p[i + 2]] << 6 | B64index[p[i + 3]];
|
||||
result[j++] = n >> 16;
|
||||
result[j++] = n >> 8 & 0xFF;
|
||||
result[j++] = n & 0xFF;
|
||||
}
|
||||
if (pad) {
|
||||
int32_t n = static_cast<int32_t>(B64index[p[L]]) << 18 | B64index[p[L + 1]] << 12;
|
||||
result[result.size() - 1] = n >> 16;
|
||||
|
||||
if (len > L + 2 && p[L + 2] != '=') {
|
||||
n |= B64index[p[L + 2]] << 6;
|
||||
result.push_back(n >> 8 & 0xFF);
|
||||
}
|
||||
}
|
||||
writeBlocking(p_print, result.data(), result.size());
|
||||
}
|
||||
|
||||
void addToBuffer(uint8_t *data, size_t len) {
|
||||
TRACED();
|
||||
if (buffer.size() < len) {
|
||||
buffer.resize(len);
|
||||
}
|
||||
// syncronize to find a valid start position
|
||||
int start = 0;
|
||||
if (!is_valid) {
|
||||
for (int j = 0; j < len; j++) {
|
||||
if (data[j] == '\n') {
|
||||
start = j;
|
||||
is_valid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (is_valid) {
|
||||
// remove white space
|
||||
for (int j = start; j < len; j++) {
|
||||
if (!isspace(data[j])) {
|
||||
buffer.write(data[j]);
|
||||
} else if (data[j] == '\n') {
|
||||
int offset = buffer.available() % 4;
|
||||
if (offset > 0) {
|
||||
LOGW("Resync %d (-%d)...", buffer.available(), offset);
|
||||
uint8_t tmp[4];
|
||||
buffer.readArray(tmp, offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
LOGD("buffer: %d, is_valid: %s", buffer.available(),
|
||||
is_valid ? "true" : "false");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief EncoderBase64s - Encodes the input data into a Base64 string.
|
||||
* By default each audio frame is followed by a new line, so that we can
|
||||
* easily resynchronize the reading of a data stream. The generation
|
||||
* of the new line can be configured with the setNewLine() method.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncoderBase64 : public AudioEncoder {
|
||||
public:
|
||||
// Empty Conbuffeructor - the output buffeream must be provided with begin()
|
||||
EncoderBase64() {}
|
||||
|
||||
// Conbuffeructor providing the output buffeream
|
||||
EncoderBase64(Print &out) { p_print = &out; }
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_buffeream) override {
|
||||
p_print = &out_buffeream;
|
||||
}
|
||||
|
||||
/// Provides "text/base64"
|
||||
const char *mime() override { return "text/base64"; }
|
||||
|
||||
/// We add a new line after each write
|
||||
void setNewLine(Base46Logic flag) { newline_logic = flag; }
|
||||
|
||||
/// starts the processing using the actual RAWAudioInfo
|
||||
virtual bool begin() override {
|
||||
is_open = true;
|
||||
frame_size = info.bits_per_sample * info.channels / 8;
|
||||
if (newline_logic != NoCR) {
|
||||
if (frame_size==0){
|
||||
LOGW("AudioInfo not defined");
|
||||
// assume frame size
|
||||
frame_size = 4;
|
||||
}
|
||||
p_print->write('\n');
|
||||
flush();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override { is_open = false; }
|
||||
|
||||
/// Writes PCM data to be encoded as RAW
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("EncoderBase64::write: %d", (int)len);
|
||||
|
||||
switch (newline_logic) {
|
||||
case NoCR:
|
||||
case CRforWrite:
|
||||
encodeLine(data, len);
|
||||
break;
|
||||
case CRforFrame: {
|
||||
int frames = len / frame_size;
|
||||
int open = len;
|
||||
int offset = 0;
|
||||
while (open > 0) {
|
||||
int write_size = min(frame_size, open);
|
||||
encodeLine(data + offset, write_size);
|
||||
open -= write_size;
|
||||
offset += write_size;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
operator bool() override { return is_open; }
|
||||
|
||||
bool isOpen() { return is_open; }
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
bool is_open;
|
||||
Base46Logic newline_logic = CRforFrame;
|
||||
Vector<uint8_t> ret;
|
||||
AudioInfo info;
|
||||
int frame_size;
|
||||
|
||||
void flush() {
|
||||
#if defined(ESP32)
|
||||
# if ESP_IDF_VERSION > ESP_IDF_VERSION_VAL(3, 3, 5)
|
||||
p_print->flush();
|
||||
# endif
|
||||
#else
|
||||
p_print->flush();
|
||||
#endif
|
||||
}
|
||||
|
||||
void encodeLine(const uint8_t *data, size_t input_length) {
|
||||
LOGD("EncoderBase64::encodeLine: %d", (int)input_length);
|
||||
int output_length = 4 * ((input_length + 2) / 3);
|
||||
if (ret.size() < output_length + 1) {
|
||||
ret.resize(output_length + 1);
|
||||
}
|
||||
|
||||
for (int i = 0, j = 0; i < input_length;) {
|
||||
uint32_t octet_a = i < input_length ? (unsigned char)data[i++] : 0;
|
||||
uint32_t octet_b = i < input_length ? (unsigned char)data[i++] : 0;
|
||||
uint32_t octet_c = i < input_length ? (unsigned char)data[i++] : 0;
|
||||
|
||||
uint32_t triple = (octet_a << 0x10) + (octet_b << 0x08) + octet_c;
|
||||
|
||||
ret[j++] = encoding_table[(triple >> 3 * 6) & 0x3F];
|
||||
ret[j++] = encoding_table[(triple >> 2 * 6) & 0x3F];
|
||||
ret[j++] = encoding_table[(triple >> 1 * 6) & 0x3F];
|
||||
ret[j++] = encoding_table[(triple >> 0 * 6) & 0x3F];
|
||||
}
|
||||
|
||||
for (int i = 0; i < mod_table[input_length % 3]; i++)
|
||||
ret[output_length - 1 - i] = '=';
|
||||
|
||||
// add a new line to the end
|
||||
if (newline_logic != NoCR) {
|
||||
ret[output_length] = '\n';
|
||||
output_length++;
|
||||
}
|
||||
|
||||
writeBlocking(p_print, ret.data(), output_length);
|
||||
flush();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
133
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecBasic.h
Normal file
133
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecBasic.h
Normal file
@@ -0,0 +1,133 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/CodecG7xx.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief DecoderBasic - supports mime type audio/basic
|
||||
* Requires https://github.com/pschatzmann/arduino-libg7xx
|
||||
* The content of the "audio/basic" subtype is single channel audio
|
||||
* encoded using 8bit ISDN mu-law [PCM] at a sample rate of 8000 Hz.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class DecoderBasic : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new DecoderBasic object
|
||||
*/
|
||||
|
||||
DecoderBasic() { TRACED(); }
|
||||
|
||||
/**
|
||||
* @brief Construct a new DecoderBasic object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
*/
|
||||
DecoderBasic(Print &out_stream, bool active = true) {
|
||||
TRACED();
|
||||
setOutput(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new DecoderBasic object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
* @param bi Object that will be notified about the Audio Formt (Changes)
|
||||
*/
|
||||
|
||||
DecoderBasic(Print &out_stream, AudioInfoSupport &bi) {
|
||||
TRACED();
|
||||
setOutput(out_stream);
|
||||
addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override {
|
||||
decoder.setOutput(out_stream);
|
||||
}
|
||||
|
||||
void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
decoder.addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
AudioInfo audioInfo() override { return decoder.audioInfo(); }
|
||||
|
||||
bool begin(AudioInfo info) {
|
||||
decoder.setAudioInfo(info);
|
||||
return decoder.begin();
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
return decoder.begin();
|
||||
}
|
||||
|
||||
void end() override { decoder.end(); }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
return decoder.write((uint8_t *)data, len);
|
||||
}
|
||||
|
||||
virtual operator bool() override { return decoder; }
|
||||
|
||||
protected:
|
||||
G711_ULAWDecoder decoder;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief EncoderBasic - supports mime type audio/basic.
|
||||
* The content of the "audio/basic" subtype is single channel audio
|
||||
* encoded using 8bit ISDN mu-law [PCM] at a sample rate of 8000 Hz.
|
||||
* Requires https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncoderBasic : public AudioEncoder {
|
||||
public:
|
||||
// Empty Constructor - the output stream must be provided with begin()
|
||||
EncoderBasic() {}
|
||||
|
||||
// Constructor providing the output stream
|
||||
EncoderBasic(Print &out) { setOutput(out); }
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out) override { encoder.setOutput(out); }
|
||||
|
||||
/// Provides "audio/pcm"
|
||||
const char *mime() override { return "audio/basic"; }
|
||||
|
||||
/// We actually do nothing with this
|
||||
virtual void setAudioInfo(AudioInfo from) override {
|
||||
AudioEncoder::setAudioInfo(from);
|
||||
encoder.setAudioInfo(from);
|
||||
}
|
||||
|
||||
/// starts the processing using the actual RAWAudioInfo
|
||||
bool begin() override { return encoder.begin(); }
|
||||
|
||||
/// stops the processing
|
||||
void end() override { encoder.end(); }
|
||||
|
||||
/// Writes PCM data to be encoded as RAW
|
||||
virtual size_t write(const uint8_t *in_ptr, size_t in_size) override {
|
||||
return encoder.write((uint8_t *)in_ptr, in_size);
|
||||
}
|
||||
|
||||
operator bool() override {
|
||||
return encoder;
|
||||
}
|
||||
|
||||
bool isOpen() { return encoder; }
|
||||
|
||||
protected:
|
||||
G711_ULAWEncoder encoder;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,99 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/AudioEncoded.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief CodecChain - allows to chain multiple decoders and encoders together
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class CodecChain : public AudioDecoder, AudioEncoder {
|
||||
public:
|
||||
CodecChain() = default;
|
||||
CodecChain(AudioDecoder &decoder) { addDecoder(decoder); }
|
||||
CodecChain(AudioEncoder &encoder) { addEncoder(encoder); }
|
||||
|
||||
/// Adds a decoder to the chain
|
||||
void addDecoder(AudioDecoder &decoder) {
|
||||
EncodedAudioStream stream;
|
||||
stream.setDecoder(&decoder);
|
||||
streams.push_back(stream);
|
||||
if (streams.size() > 1) {
|
||||
streams[streams.size() - 2].setOutput(streams[streams.size() - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds an encoder to the chain
|
||||
void addEncoder(AudioEncoder &encoder) {
|
||||
EncodedAudioStream stream;
|
||||
stream.setEncoder(&encoder);
|
||||
streams.push_back(stream);
|
||||
if (streams.size() > 1) {
|
||||
streams[streams.size() - 2].setOutput(streams[streams.size() - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
void setOutput(Print &out_stream) override {
|
||||
p_print = &out_stream;
|
||||
if (streams.size() > 0) streams[streams.size() - 1].setOutput(out_stream);
|
||||
}
|
||||
|
||||
void setOutput(AudioStream &out_stream) override {
|
||||
p_print = &out_stream;
|
||||
if (streams.size() > 0) streams[streams.size() - 1].setOutput(out_stream);
|
||||
}
|
||||
|
||||
void setOutput(AudioOutput &out_stream) override {
|
||||
p_print = &out_stream;
|
||||
if (streams.size() > 0) streams[streams.size() - 1].setOutput(out_stream);
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo from) override {
|
||||
AudioDecoder::setAudioInfo(from);
|
||||
for (auto &stream : streams) {
|
||||
stream.setAudioInfo(from);
|
||||
}
|
||||
}
|
||||
|
||||
void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
for (auto &stream : streams) {
|
||||
stream.addNotifyAudioChange(bi);
|
||||
}
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
if (streams.size() == 0) return 0;
|
||||
return streams[0].write(data, len);
|
||||
}
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
bool begin() {
|
||||
is_active = true;
|
||||
for (auto &stream : streams) {
|
||||
stream.begin();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
is_active = false;
|
||||
for (auto &stream : streams) {
|
||||
stream.end();
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns nullptr
|
||||
const char *mime() { return nullptr; }
|
||||
|
||||
protected:
|
||||
Vector<EncodedAudioStream> streams;
|
||||
bool is_active = false;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
301
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecCodec2.h
Normal file
301
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecCodec2.h
Normal file
@@ -0,0 +1,301 @@
|
||||
/**
|
||||
* @file CodecCodec2.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief Codec2 Codec using https://github.com/pschatzmann/arduino-codec2
|
||||
* The codec was developed by David Grant Rowe, with support and cooperation of
|
||||
* other researchers (e.g., Jean-Marc Valin from Opus). Codec 2 consists of
|
||||
* 3200, 2400, 1600, 1400, 1300, 1200, 700 and 450 bit/s codec modes. It
|
||||
* outperforms most other low-bitrate speech codecs. For example, it uses half
|
||||
* the bandwidth of Advanced Multi-Band Excitation to encode speech with similar
|
||||
* quality. The speech codec uses 16-bit PCM sampled audio, and outputs packed
|
||||
* digital bytes. When sent packed digital bytes, it outputs PCM sampled audio.
|
||||
* The audio sample rate is fixed at 8 kHz.
|
||||
*
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "codec2.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/// Convert bits per sample to Codec2 mode
|
||||
int getCodec2Mode(int bits_per_second) {
|
||||
switch (bits_per_second) {
|
||||
case 3200:
|
||||
return CODEC2_MODE_3200;
|
||||
case 2400:
|
||||
return CODEC2_MODE_2400;
|
||||
case 1600:
|
||||
return CODEC2_MODE_1600;
|
||||
case 1400:
|
||||
return CODEC2_MODE_1400;
|
||||
case 1300:
|
||||
return CODEC2_MODE_1300;
|
||||
case 1200:
|
||||
return CODEC2_MODE_1200;
|
||||
case 700:
|
||||
return CODEC2_MODE_700C;
|
||||
case 450:
|
||||
return CODEC2_MODE_450;
|
||||
default:
|
||||
LOGE(
|
||||
"Unsupported sample rate: use 3200, 2400, 1600, 1400, 1300, 1200, "
|
||||
"700 or 450");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Decoder for Codec2. Depends on
|
||||
* https://github.com/pschatzmann/arduino-libcodec2.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Codec2Decoder : public AudioDecoder {
|
||||
public:
|
||||
Codec2Decoder(int bps = 3200) {
|
||||
info.sample_rate = 8000;
|
||||
info.channels = 1;
|
||||
info.bits_per_sample = 16;
|
||||
setBitsPerSecond(bps);
|
||||
}
|
||||
/// sets bits per second: 3200, 2400, 1600, 1400, 1300, 1200, 700 and 450
|
||||
/// bit/s
|
||||
virtual void setBitsPerSecond(int bps) { bits_per_second = bps; }
|
||||
|
||||
int bitsPerSecond() { return bits_per_second; }
|
||||
|
||||
virtual bool begin() {
|
||||
TRACEI();
|
||||
|
||||
int mode = getCodec2Mode(bits_per_second);
|
||||
if (mode == -1) {
|
||||
LOGE("invalid bits_per_second")
|
||||
return false;
|
||||
}
|
||||
if (info.channels != 1) {
|
||||
LOGE("Only 1 channel supported")
|
||||
return false;
|
||||
}
|
||||
if (info.bits_per_sample != 16) {
|
||||
LOGE("Only 16 bps are supported")
|
||||
return false;
|
||||
}
|
||||
if (info.sample_rate != 8000) {
|
||||
LOGW("Sample rate should be 8000: %d", info.sample_rate);
|
||||
}
|
||||
|
||||
p_codec2 = codec2_create(mode);
|
||||
if (p_codec2 == nullptr) {
|
||||
LOGE("codec2_create");
|
||||
return false;
|
||||
}
|
||||
|
||||
result_buffer.resize(bytesCompressed());
|
||||
input_buffer.resize(bytesCompressed() );
|
||||
|
||||
assert(input_buffer.size()>0);
|
||||
assert(result_buffer.size()>0);
|
||||
|
||||
notifyAudioChange(info);
|
||||
LOGI("bytesCompressed:%d", bytesCompressed());
|
||||
LOGI("bytesUncompressed:%d", bytesUncompressed());
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
int bytesCompressed() {
|
||||
return p_codec2 != nullptr ? codec2_bytes_per_frame(p_codec2) : 0;
|
||||
}
|
||||
|
||||
int bytesUncompressed() {
|
||||
return p_codec2 != nullptr
|
||||
? codec2_samples_per_frame(p_codec2) * sizeof(int16_t)
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
codec2_destroy(p_codec2);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *p_byte = (uint8_t *)data;
|
||||
for (int j = 0; j < len; j++) {
|
||||
processByte(p_byte[j]);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
struct CODEC2 *p_codec2;
|
||||
bool is_active = false;
|
||||
Vector<uint8_t> input_buffer;
|
||||
Vector<uint8_t> result_buffer;
|
||||
int input_pos = 0;
|
||||
int bits_per_second = 0;
|
||||
|
||||
/// Build decoding buffer and decode when frame is full
|
||||
void processByte(uint8_t byte) {
|
||||
// add byte to buffer
|
||||
input_buffer[input_pos++] = byte;
|
||||
|
||||
// decode if buffer is full
|
||||
if (input_pos >= input_buffer.size()) {
|
||||
codec2_decode(p_codec2, (short*)result_buffer.data(), input_buffer.data());
|
||||
int written = p_print->write((uint8_t *)result_buffer.data(), result_buffer.size());
|
||||
if (written != result_buffer.size()){
|
||||
LOGE("write: %d written: %d", result_buffer.size(), written);
|
||||
} else {
|
||||
LOGD("write: %d written: %d", result_buffer.size(), written);
|
||||
}
|
||||
delay(2);
|
||||
input_pos = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for Codec2 - Depends on
|
||||
* https://github.com/pschatzmann/arduino-libcodec2.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Codec2Encoder : public AudioEncoder {
|
||||
public:
|
||||
Codec2Encoder(int bps = 3200) {
|
||||
info.sample_rate = 8000;
|
||||
info.channels = 1;
|
||||
info.bits_per_sample = 16;
|
||||
setBitsPerSecond(bps);
|
||||
}
|
||||
|
||||
/// sets bits per second: 3200, 2400, 1600, 1400, 1300, 1200, 700 and 450
|
||||
/// bit/s
|
||||
virtual void setBitsPerSecond(int bps) { bits_per_second = bps; }
|
||||
|
||||
int bitsPerSecond() { return bits_per_second; }
|
||||
|
||||
int bytesCompressed() {
|
||||
return p_codec2 != nullptr ? codec2_bytes_per_frame(p_codec2) : 0;
|
||||
}
|
||||
|
||||
int bytesUncompressed() {
|
||||
return p_codec2 != nullptr
|
||||
? codec2_samples_per_frame(p_codec2) * sizeof(int16_t)
|
||||
: 0;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
|
||||
int mode = getCodec2Mode(bits_per_second);
|
||||
if (mode == -1) {
|
||||
LOGE("invalid bits_per_second")
|
||||
return false;
|
||||
}
|
||||
if (info.channels != 1) {
|
||||
LOGE("Only 1 channel supported")
|
||||
return false;
|
||||
}
|
||||
if (info.bits_per_sample != 16) {
|
||||
LOGE("Only 16 bps are supported")
|
||||
return false;
|
||||
}
|
||||
if (info.sample_rate != 8000) {
|
||||
LOGW("Sample rate should be 8000: %d", info.sample_rate);
|
||||
}
|
||||
|
||||
p_codec2 = codec2_create(mode);
|
||||
if (p_codec2 == nullptr) {
|
||||
LOGE("codec2_create");
|
||||
return false;
|
||||
}
|
||||
|
||||
input_buffer.resize(bytesCompressed());
|
||||
result_buffer.resize(bytesUncompressed());
|
||||
assert(input_buffer.size()>0);
|
||||
assert(result_buffer.size()>0);
|
||||
LOGI("bytesCompressed:%d", bytesCompressed());
|
||||
LOGI("bytesUncompressed:%d", bytesUncompressed());
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
codec2_destroy(p_codec2);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual const char *mime() { return "audio/codec2"; }
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
size_t write(const uint8_t *in_ptr, size_t in_size) override {
|
||||
LOGD("write: %d", in_size);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
// encode bytes
|
||||
uint8_t *p_byte = (uint8_t *)in_ptr;
|
||||
for (int j = 0; j < in_size; j++) {
|
||||
processByte(p_byte[j]);
|
||||
}
|
||||
return in_size;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
struct CODEC2 *p_codec2 = nullptr;
|
||||
bool is_active = false;
|
||||
int buffer_pos = 0;
|
||||
Vector<uint8_t> input_buffer;
|
||||
Vector<uint8_t> result_buffer;
|
||||
int bits_per_second = 0;
|
||||
|
||||
// add byte to decoding buffer and decode if buffer is full
|
||||
void processByte(uint8_t byte) {
|
||||
input_buffer[buffer_pos++] = byte;
|
||||
if (buffer_pos >= input_buffer.size()) {
|
||||
// encode
|
||||
codec2_encode(p_codec2, result_buffer.data(),
|
||||
(short*)input_buffer.data());
|
||||
int written = p_print->write(result_buffer.data(), result_buffer.size());
|
||||
if(written!=result_buffer.size()){
|
||||
LOGE("write: %d written: %d", result_buffer.size(), written);
|
||||
} else {
|
||||
LOGD("write: %d written: %d", result_buffer.size(), written);
|
||||
}
|
||||
buffer_pos = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
113
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecCopy.h
Normal file
113
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecCopy.h
Normal file
@@ -0,0 +1,113 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#if defined(ARDUINO) && !defined(IS_MIN_DESKTOP)
|
||||
#include "Print.h"
|
||||
#endif
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Dummy Decoder which just copies the provided data to the output.
|
||||
* You can define if it is PCM data.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class CopyDecoder : public AudioDecoder {
|
||||
public:
|
||||
|
||||
CopyDecoder(bool isPcm = false){
|
||||
is_pcm = isPcm;
|
||||
}
|
||||
|
||||
CopyDecoder(Print &out_stream) { TRACED(); pt_print=&out_stream; }
|
||||
|
||||
CopyDecoder(Print &out_stream, AudioInfoSupport &bi) {pt_print=&out_stream;}
|
||||
|
||||
~CopyDecoder() {}
|
||||
|
||||
virtual void setOutput(Print &out_stream) {pt_print=&out_stream;}
|
||||
|
||||
bool begin() { return true; }
|
||||
|
||||
void end() {}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
TRACED();
|
||||
if (pt_print == nullptr) {
|
||||
LOGE("No output stream defined for CopyDecoder");
|
||||
return 0;
|
||||
}
|
||||
return pt_print->write((uint8_t*)data,len);
|
||||
}
|
||||
|
||||
operator bool() { return true; }
|
||||
|
||||
/// The result is encoded data - by default this is false
|
||||
virtual bool isResultPCM() { return is_pcm;}
|
||||
|
||||
/// Defines that the source and therefor the result is also PCM data
|
||||
void setResultPCM(bool pcm){ is_pcm = pcm;}
|
||||
|
||||
protected:
|
||||
Print *pt_print=nullptr;
|
||||
bool is_pcm = false;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Dummy Encoder which just copies the provided data to the output
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class CopyEncoder : public AudioEncoder {
|
||||
public:
|
||||
CopyEncoder() { TRACED(); }
|
||||
|
||||
CopyEncoder(Print &out_stream) { TRACED(); pt_print=&out_stream; }
|
||||
|
||||
CopyEncoder(Print &out_stream, AudioInfoSupport &bi) {pt_print=&out_stream;}
|
||||
|
||||
~CopyEncoder() {}
|
||||
|
||||
virtual void setOutput(Print &out_stream) {pt_print=&out_stream;}
|
||||
|
||||
bool begin() { return true;}
|
||||
|
||||
void end() {}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
if (pt_print == nullptr) {
|
||||
LOGE("No output stream defined for CopyDecoder");
|
||||
return 0;
|
||||
}
|
||||
return pt_print->write((uint8_t*)data,len);
|
||||
}
|
||||
|
||||
operator bool() { return true; }
|
||||
|
||||
/// Provides the mime type of the encoded data
|
||||
const char *mime() {return mime_type;}
|
||||
|
||||
/// Defines the mime type
|
||||
void setMime(const char *mime) { mime_type = mime; }
|
||||
|
||||
|
||||
protected:
|
||||
Print *pt_print=nullptr;
|
||||
const char *mime_type = "audio/pcm";
|
||||
};
|
||||
|
||||
/// @brief Alias for CopyEncoder to handle PCM audio encoding (no actual encoding)
|
||||
/// @ingroup codecs
|
||||
using PCMEncoder = CopyEncoder;
|
||||
|
||||
/// @brief Alias for CopyDecoder to handle PCM audio decoding (no actual decoding)
|
||||
/// @ingroup codecs
|
||||
using PCMDecoder = CopyDecoder;
|
||||
|
||||
} // namespace audio_tools
|
||||
|
||||
654
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecDSF.h
Normal file
654
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecDSF.h
Normal file
@@ -0,0 +1,654 @@
|
||||
/**
|
||||
* @file CodecDSF.h
|
||||
* @brief DSF (DSD Stream File) format decoder implementation
|
||||
* @author pschatzmann
|
||||
* @copyright GPLv3
|
||||
*
|
||||
* This file contains the implementation of a DSF decoder that converts Direct
|
||||
* Stream Digital (DSD) audio data to Pulse Code Modulation (PCM) format. The
|
||||
* decoder supports the DSF file format which is commonly used for
|
||||
* high-resolution audio distribution.
|
||||
*
|
||||
* Key features:
|
||||
* - DSF file header parsing and validation
|
||||
* - DSD bitstream to PCM conversion with configurable decimation
|
||||
* - BiQuad low-pass filtering for anti-aliasing
|
||||
* - Streaming-compatible operation for real-time processing
|
||||
* - Support for stereo DSD files (DSD64 and higher sample rates)
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
// #pragma GCC optimize("Ofast")
|
||||
#pragma GCC optimize("O3")
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/AudioFilter/Filter.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
|
||||
/**
|
||||
* @defgroup dsd DSD Audio
|
||||
* @ingroup codecs
|
||||
* @brief Direct Stream Digital (DSD) audio format support
|
||||
*/
|
||||
|
||||
/// Buffer size for DSD data processing - must accommodate decimation step
|
||||
#define DSD_BUFFER_SIZE 1024 * 2
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Metadata structure for DSF (DSD Stream File) format
|
||||
* @ingroup dsd
|
||||
* @author pschatzmann
|
||||
*
|
||||
* Contains format information and metadata extracted from DSF file headers,
|
||||
* including DSD sample rates, data sizes, and calculated PCM conversion
|
||||
* parameters.
|
||||
*/
|
||||
struct DSFMetadata : public AudioInfo {
|
||||
DSFMetadata() = default;
|
||||
DSFMetadata(int rate) { sample_rate = rate; }
|
||||
uint32_t dsd_sample_rate =
|
||||
0; ///< DSD sample rate (e.g. 2822400 Hz for DSD64)
|
||||
uint64_t dsd_data_bytes = 0; ///< Size of DSD bitstream data in bytes
|
||||
uint8_t dsd_bits = 1; ///< BitSize always 1!
|
||||
uint64_t pcm_frames = 0; ///< Estimated number of PCM frames after conversion
|
||||
float duration_sec = 0; ///< Approximate audio duration in seconds
|
||||
uint32_t dsd_buffer_size =
|
||||
DSD_BUFFER_SIZE; ///< Internal buffer size for DSD processing
|
||||
float filter_q = 0.5f; //1.41f;
|
||||
float filter_cutoff = 0.4f; ///< Cutoff frequency as fraction of Nyquist
|
||||
int output_buffer_size = 1024;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Header structures for DSF (DSD Stream File) format
|
||||
* @ingroup dsd
|
||||
*
|
||||
* These packed structures define the binary layout of DSF file headers,
|
||||
* allowing direct parsing of the file format without manual byte manipulation.
|
||||
*/
|
||||
|
||||
/// DSF file prefix containing file identification and basic information
|
||||
struct __attribute__((packed)) DSDPrefix {
|
||||
char id[4]; // "DSD "
|
||||
uint64_t chunkSize; // 28
|
||||
uint64_t fileSize; // total file size
|
||||
uint64_t metadataOffset; // offset to "ID3 " chunk (0 if none)
|
||||
};
|
||||
|
||||
/// DSF format chunk containing audio format parameters
|
||||
struct __attribute__((packed)) DSFFormat {
|
||||
char id[4]; // "fmt "
|
||||
uint64_t chunkSize; // 52
|
||||
uint32_t formatVersion; // 1
|
||||
uint32_t formatID; // 0
|
||||
uint32_t channelType; // e.g., 2 for stereo
|
||||
uint32_t channelNum; // number of channels
|
||||
uint32_t samplingFrequency; // e.g., 2822400
|
||||
uint32_t bitsPerSample; // 1
|
||||
uint64_t sampleCount; // total samples per channel
|
||||
uint32_t blockSizePerChannel; // e.g., 4096
|
||||
uint32_t reserved; // 0
|
||||
};
|
||||
|
||||
/// DSF data chunk header containing audio data size information
|
||||
struct __attribute__((packed)) DSFDataHeader {
|
||||
char id[4]; // "data"
|
||||
uint64_t chunkSize; // size of DSD data
|
||||
// followed by: uint8_t rawData[chunkSize];
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief DSF (DSD Stream File) format decoder
|
||||
* @ingroup dsd
|
||||
* @author pschatzmann
|
||||
*
|
||||
* Decodes DSF files containing Direct Stream Digital (DSD) audio data and
|
||||
* converts it to PCM format. DSF is a file format that stores DSD audio
|
||||
* streams, commonly used for high-resolution audio. This decoder:
|
||||
*
|
||||
* - Parses DSF file headers to extract format information
|
||||
* - Buffers incoming DSD bitstream data
|
||||
* - Applies decimation and low-pass filtering for anti-aliasing
|
||||
* - Outputs converted PCM audio samples
|
||||
*
|
||||
* The decoder uses BiQuad low-pass filters for high-quality anti-aliasing
|
||||
* during the DSD to PCM conversion process, replacing traditional FIR filter
|
||||
* implementations for better performance and modularity.
|
||||
*
|
||||
* @note Supports mono and stereo DSD files with sample rates >= 2.8224 MHz
|
||||
* (DSD64)
|
||||
*
|
||||
*/
|
||||
class DSFDecoder : public AudioDecoder {
|
||||
public:
|
||||
DSFDecoder() = default;
|
||||
DSFDecoder(DSFMetadata metaData) { setMetaData(metaData); };
|
||||
|
||||
AudioInfo audioInfo() override { return meta; }
|
||||
|
||||
/// Can be used to set up alternative sample rate (default is 44100 Hz) and
|
||||
/// bits
|
||||
void setAudioInfo(AudioInfo from) override {
|
||||
TRACED();
|
||||
AudioDecoder::setAudioInfo(from);
|
||||
meta.copyFrom(from);
|
||||
if (isHeaderAvailable()){
|
||||
// Ensure PCM buffer is allocated based on the new audio info
|
||||
int buffer_size = getOutputBufferSize();
|
||||
pcmBuffer.resize(buffer_size);
|
||||
channelAccum.resize(meta.channels);
|
||||
channelIntegrator.resize(meta.channels);
|
||||
|
||||
setupTargetPCMRate();
|
||||
setupDecimationStep();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initialize the decoder
|
||||
* @return true if initialization successful
|
||||
*
|
||||
* Sets up the decoder state, initializes buffers, and configures the low-pass
|
||||
* filters with default parameters. The filters are initialized with a cutoff
|
||||
* frequency of 40% of the Nyquist frequency to provide effective
|
||||
* anti-aliasing.
|
||||
*/
|
||||
bool begin() {
|
||||
TRACED();
|
||||
dsdBuffer.resize(meta.dsd_buffer_size);
|
||||
dsdBuffer.reset();
|
||||
headerParsed = false;
|
||||
headerSize = 0;
|
||||
dataSize = 0;
|
||||
filePos = 0;
|
||||
decimationStep = 64;
|
||||
max_value = 0;
|
||||
|
||||
// update decimaten step & filter parameters
|
||||
isActive = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override { isActive = false; }
|
||||
|
||||
/**
|
||||
* @brief Get DSF file metadata
|
||||
* @return Reference to DSFMetadata structure containing format information
|
||||
*
|
||||
* Returns metadata extracted from the DSF file header, including DSD sample
|
||||
* rate, data size, estimated PCM frames, and calculated duration.
|
||||
*/
|
||||
const DSFMetadata getMetadata() { return meta; }
|
||||
|
||||
void setMetaData(DSFMetadata metaData) {
|
||||
meta = metaData;
|
||||
AudioDecoder::setAudioInfo(meta);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check if decoder is ready
|
||||
* @return true if DSF header has been successfully parsed
|
||||
*
|
||||
* Indicates whether the decoder has successfully parsed the DSF file header
|
||||
* and is ready to process audio data.
|
||||
*/
|
||||
bool isHeaderAvailable() { return headerParsed; }
|
||||
|
||||
operator bool() { return isActive; }
|
||||
|
||||
/**
|
||||
* @brief Main entry point for processing incoming DSF data
|
||||
* @param data Incoming DSF file data bytes
|
||||
* @param len Number of bytes in data buffer
|
||||
* @return Number of bytes consumed (always returns len for streaming
|
||||
* compatibility)
|
||||
*
|
||||
* Processes incoming DSF file data in two phases:
|
||||
* 1. Header parsing: Extracts format information from DSF file header
|
||||
* 2. Audio processing: Buffers DSD data and converts to PCM output
|
||||
*
|
||||
* The method is designed for streaming operation and always reports full
|
||||
* consumption of input data for compatibility with streaming frameworks.
|
||||
*/
|
||||
size_t write(const uint8_t* data, size_t len) {
|
||||
LOGD("write: %u", (unsigned)len);
|
||||
size_t i = 0;
|
||||
|
||||
// Phase 1: Parse DSF header to extract format information
|
||||
i += processHeader(data, len, i);
|
||||
|
||||
// Phase 2: Process audio data (buffer DSD + convert to PCM)
|
||||
if (headerParsed && i < len) {
|
||||
i += processDSDData(data, len, i);
|
||||
}
|
||||
|
||||
return len; // Always report full consumption for streaming compatibility
|
||||
}
|
||||
|
||||
protected:
|
||||
// Header parsing state
|
||||
size_t headerSize; ///< Current size of accumulated header data
|
||||
bool headerParsed = false; ///< Flag indicating if header parsing is complete
|
||||
bool isActive = false; ///< Flag indicating if decoder is active and ready
|
||||
uint64_t dataSize; ///< Size of audio data section in bytes
|
||||
size_t filePos; ///< Current position in DSF file
|
||||
|
||||
// Processing buffers and state
|
||||
SingleBuffer<uint8_t> pcmBuffer{0}; ///< Buffer for PCM output samples -
|
||||
///< supports multi-channel up to 32-bit
|
||||
Vector<float> channelAccum; ///< Accumulator for each channel during DSD to
|
||||
///< PCM conversion
|
||||
Vector<LowPassFilter<float>>
|
||||
channelFilters; ///< Anti-aliasing filters for each channel
|
||||
RingBuffer<uint8_t> dsdBuffer{0}; ///< Ring buffer for DSD data
|
||||
uint32_t decimationStep; ///< Decimation factor for DSD to PCM conversion
|
||||
Vector<float> channelIntegrator; ///< Integrator state for each channel (for
|
||||
///< better DSD conversion)
|
||||
|
||||
// Metadata
|
||||
DSFMetadata meta; ///< Extracted DSF file metadata
|
||||
float max_value = 0.0f;
|
||||
|
||||
/// The buffer size is defined in the metadata: it must be at least 1 frame
|
||||
int getOutputBufferSize() {
|
||||
int frame_size = meta.bits_per_sample / 8 * meta.channels;
|
||||
if (meta.bits_per_sample == 24) frame_size = 4 * meta.channels;
|
||||
int buffer_size = frame_size;
|
||||
if (meta.output_buffer_size > buffer_size)
|
||||
buffer_size = meta.output_buffer_size;
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Process header data until header is complete or data is exhausted
|
||||
* @param data Input data buffer
|
||||
* @param len Length of input data
|
||||
* @param startPos Starting position in input buffer
|
||||
* @return Number of bytes processed for header parsing
|
||||
*
|
||||
* Accumulates header bytes and attempts to parse the DSF file header.
|
||||
* When a complete and valid header is found, sets headerParsed flag and
|
||||
* updates decimation parameters.
|
||||
*/
|
||||
size_t processHeader(const uint8_t* data, size_t len, size_t startPos) {
|
||||
if (headerParsed) return 0;
|
||||
LOGI("processHeader: %u (%u)", (unsigned)len, (unsigned)startPos);
|
||||
|
||||
// Check for DSD header magic
|
||||
if (memcmp(data, "DSD ", 4) != 0) {
|
||||
LOGE("Invalid DSF header magic");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dataPos = findTag("data", data, len);
|
||||
int fmtPos = findTag("fmt ", data, len);
|
||||
if (dataPos < 0 || fmtPos < 0) {
|
||||
LOGE("DSF header not found in data (fmt: %d, data: %d)", fmtPos, dataPos);
|
||||
return 0; // No valid header found
|
||||
}
|
||||
// parse the data
|
||||
parseFMT(data + fmtPos, len - fmtPos);
|
||||
parseData(data + dataPos, len - dataPos);
|
||||
headerParsed = true;
|
||||
|
||||
// update audio info and initialize filters
|
||||
setAudioInfo(meta);
|
||||
|
||||
return dataPos + sizeof(DSFDataHeader);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Process DSD audio data: buffer it and convert to PCM when possible
|
||||
* @param data Input data buffer containing DSD audio data
|
||||
* @param len Length of input data
|
||||
* @param startPos Starting position in input buffer
|
||||
* @return Number of bytes processed for audio data
|
||||
*
|
||||
* Buffers incoming DSD data and triggers PCM conversion when sufficient
|
||||
* data is available for processing.
|
||||
*/
|
||||
size_t processDSDData(const uint8_t* data, size_t len, size_t startPos) {
|
||||
LOGD("processDSDData: %u (%u)", (unsigned)len, (unsigned)startPos);
|
||||
size_t bytesProcessed = 0;
|
||||
|
||||
// Buffer as much DSD data as possible
|
||||
bytesProcessed += bufferDSDData(data, len, startPos);
|
||||
|
||||
// Convert buffered DSD data to PCM output
|
||||
convertDSDToPCM();
|
||||
|
||||
return bytesProcessed;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Buffer incoming DSD data into ring buffer
|
||||
* @param data Input data buffer
|
||||
* @param len Length of input data
|
||||
* @param startPos Starting position in input buffer
|
||||
* @return Number of bytes successfully buffered
|
||||
*
|
||||
* Copies DSD data bytes into the internal ring buffer until either all
|
||||
* data is consumed or the buffer becomes full.
|
||||
*/
|
||||
size_t bufferDSDData(const uint8_t* data, size_t len, size_t startPos) {
|
||||
int write_len = len - startPos;
|
||||
if (write_len > dsdBuffer.availableForWrite()) {
|
||||
write_len = dsdBuffer.availableForWrite();
|
||||
}
|
||||
dsdBuffer.writeArray(data + startPos, write_len);
|
||||
filePos += write_len;
|
||||
|
||||
return write_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert buffered DSD data to PCM samples and output them
|
||||
*
|
||||
* Performs the core DSD to PCM conversion process using integrator-based
|
||||
* approach:
|
||||
* 1. Integrates DSD bits over the decimation period for each channel
|
||||
* 2. Converts DSD bits to analog values (-1 or +1) with proper delta-sigma
|
||||
* handling
|
||||
* 3. Applies low-pass filtering to remove high-frequency noise
|
||||
* 4. Converts filtered values to PCM samples
|
||||
* 5. Outputs PCM samples for all channels
|
||||
*
|
||||
* The conversion uses BiQuad low-pass filters for anti-aliasing, providing
|
||||
* better audio quality than simple decimation.
|
||||
*
|
||||
* DSF format uses byte interleaving: each byte contains 8 DSD samples for one
|
||||
* channel, and channels are interleaved at the byte level (not bit level).
|
||||
*/
|
||||
void convertDSDToPCM() {
|
||||
while (hasEnoughData()) {
|
||||
// Initialize accumulators
|
||||
for (int ch = 0; ch < meta.channels; ch++) {
|
||||
channelAccum[ch] = 0.0f;
|
||||
}
|
||||
// Initialize integrator states
|
||||
for (int ch = 0; ch < meta.channels; ch++) {
|
||||
channelIntegrator[ch] = 0.0f;
|
||||
}
|
||||
|
||||
// Accumulate DSD samples over decimation period
|
||||
// DSF uses byte interleaving: bytes alternate between channels
|
||||
int bytesPerDecimationStep = decimationStep / 8;
|
||||
int samplesProcessed = 0;
|
||||
|
||||
for (int i = 0; i < bytesPerDecimationStep && !dsdBuffer.isEmpty(); i++) {
|
||||
for (int ch = 0; ch < meta.channels && !dsdBuffer.isEmpty(); ch++) {
|
||||
uint8_t dsdByte;
|
||||
if (dsdBuffer.read(dsdByte)) {
|
||||
// Each byte contains 8 DSD samples for the current channel
|
||||
// Use integrator-based approach for better DSD conversion
|
||||
for (int bit = 0; bit < 8; bit++) {
|
||||
int channelBit = (dsdByte >> (7 - bit)) & 1; // MSB first in DSF
|
||||
|
||||
// Delta-sigma integration: accumulate the difference
|
||||
channelIntegrator[ch] += channelBit ? 1.0f : -1.0f;
|
||||
|
||||
// Apply decay to prevent DC buildup
|
||||
channelIntegrator[ch] *= 0.9999f;
|
||||
}
|
||||
|
||||
// Add integrated value to channel accumulator
|
||||
channelAccum[ch] += channelIntegrator[ch];
|
||||
samplesProcessed += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float samplesPerChannel = samplesProcessed / meta.channels;
|
||||
|
||||
if (samplesPerChannel > 0) {
|
||||
for (int ch = 0; ch < meta.channels; ch++) {
|
||||
// Normalize by sample count and apply scaling factor
|
||||
channelAccum[ch] = channelAccum[ch] / samplesPerChannel * 0.8f;
|
||||
if (meta.filter_cutoff > 0.0f &&
|
||||
meta.filter_q > 0.0f) { // Only apply filter if configured
|
||||
// Apply low-pass filter to remove high-frequency noise
|
||||
channelAccum[ch] = channelFilters[ch].process(channelAccum[ch]);
|
||||
}
|
||||
//Serial.print(channelAccum[ch]);
|
||||
//Serial.print(" ");
|
||||
|
||||
// Convert to PCM sample and store in buffer
|
||||
writePCMSample(clip(channelAccum[ch]));
|
||||
}
|
||||
}
|
||||
|
||||
//Serial.println();
|
||||
|
||||
// Output the PCM samples for all channels
|
||||
if (pcmBuffer.isFull()) {
|
||||
size_t frameSize = pcmBuffer.available();
|
||||
size_t written =
|
||||
getOutput()->write((uint8_t*)pcmBuffer.data(), frameSize);
|
||||
if (written != frameSize) {
|
||||
LOGE(
|
||||
"Failed to write PCM samples: expected %zu bytes, wrote %zu "
|
||||
"bytes",
|
||||
frameSize, written);
|
||||
}
|
||||
pcmBuffer.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Clips audio values to valid range
|
||||
* @param value Input audio value
|
||||
* @return Clipped value in range [-1.0, 1.0]
|
||||
*
|
||||
* Ensures that filtered audio values stay within the valid range to
|
||||
* prevent clipping artifacts in the final PCM output.
|
||||
*/
|
||||
float clip(float value) {
|
||||
if (value > 1.0f) return 1.0f;
|
||||
if (value < -1.0f) return -1.0f;
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Set up low-pass filters for all channels
|
||||
*
|
||||
* Initializes anti-aliasing filters for each audio channel with appropriate
|
||||
* cutoff frequency (40% of Nyquist frequency) for the current sample rate.
|
||||
* This ensures proper anti-aliasing performance during DSD to PCM
|
||||
* conversion.
|
||||
*/
|
||||
void setupTargetPCMRate() {
|
||||
TRACEI();
|
||||
|
||||
// Initialize filters for the correct number of channels
|
||||
if (meta.sample_rate > 0 && meta.channels > 0) {
|
||||
float cutoffFreq =
|
||||
meta.sample_rate * meta.filter_cutoff; // 40% of Nyquist frequency
|
||||
channelFilters.resize(meta.channels);
|
||||
for (int i = 0; i < meta.channels; i++) {
|
||||
channelFilters[i].begin(cutoffFreq, meta.sample_rate, meta.filter_q);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Calculate optimal decimation step for DSD to PCM conversion
|
||||
*
|
||||
* Calculates the decimation factor as the ratio of DSD sample rate to
|
||||
* target PCM sample rate. Clamps the value between 64 and 512 to ensure
|
||||
* reasonable processing efficiency and audio quality while maintaining good
|
||||
* anti-aliasing performance.
|
||||
*/
|
||||
void setupDecimationStep() {
|
||||
TRACEI();
|
||||
if (meta.sample_rate == 0 || meta.dsd_sample_rate == 0) {
|
||||
LOGE("Invalid sample rates: DSD=%u, PCM=%u",
|
||||
(unsigned)meta.dsd_sample_rate, (unsigned)meta.sample_rate);
|
||||
return;
|
||||
}
|
||||
|
||||
decimationStep = meta.dsd_sample_rate / meta.sample_rate;
|
||||
if (decimationStep < 64) {
|
||||
LOGW("Decimation step %u too low, setting to 64",
|
||||
(unsigned)decimationStep);
|
||||
decimationStep = 64;
|
||||
}
|
||||
if (decimationStep > 512) {
|
||||
LOGW("Decimation step %u too high, setting to 512",
|
||||
(unsigned)decimationStep);
|
||||
decimationStep = 512;
|
||||
}
|
||||
|
||||
// Ensure decimation step is multiple of 8 for clean byte processing
|
||||
decimationStep = (decimationStep / 8) * 8;
|
||||
if (decimationStep < 64) decimationStep = 64;
|
||||
|
||||
LOGI("Decimation step set to %u for DSD rate %u and target PCM rate %u",
|
||||
(unsigned)decimationStep, (unsigned)meta.dsd_sample_rate,
|
||||
(unsigned)meta.sample_rate);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check if sufficient DSD data is available for conversion
|
||||
* @return true if enough data is buffered for one decimation step
|
||||
*
|
||||
* Determines if the DSD buffer contains enough data to perform one
|
||||
* decimation step of DSD to PCM conversion. For DSF format with byte
|
||||
* interleaving, we need enough bytes for all channels over the decimation
|
||||
* period.
|
||||
*/
|
||||
bool hasEnoughData() {
|
||||
// DSF uses byte interleaving: each decimation step needs enough bytes
|
||||
// to cover all channels. Each byte contains 8 DSD samples for one
|
||||
// channel.
|
||||
int bytesPerDecimationStep = (decimationStep / 8) * meta.channels;
|
||||
if (bytesPerDecimationStep < meta.channels)
|
||||
bytesPerDecimationStep = meta.channels;
|
||||
|
||||
return dsdBuffer.available() >= bytesPerDecimationStep;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert filtered DSD value to PCM sample in the buffer
|
||||
* @param filteredValue The filtered DSD value (range -1.0 to 1.0)
|
||||
* @param channel Channel index (0 for left/mono, 1 for right)
|
||||
*/
|
||||
void writePCMSample(float filteredValue) {
|
||||
switch (meta.bits_per_sample) {
|
||||
case 8: {
|
||||
int8_t buffer8 = static_cast<int8_t>(filteredValue * 127.0f);
|
||||
pcmBuffer.write(buffer8);
|
||||
break;
|
||||
}
|
||||
case 16: {
|
||||
int16_t buffer16 = static_cast<int16_t>(filteredValue * 32767.0f);
|
||||
pcmBuffer.writeArray((uint8_t*)&buffer16, sizeof(int16_t));
|
||||
break;
|
||||
}
|
||||
case 24: {
|
||||
int24_t buffer24 =
|
||||
static_cast<int24_t>(filteredValue * 8388607.0f); // 2^23 - 1
|
||||
pcmBuffer.writeArray((uint8_t*)&buffer24, sizeof(int24_t));
|
||||
break;
|
||||
}
|
||||
case 32: {
|
||||
int32_t buffer32 =
|
||||
static_cast<int32_t>(filteredValue * 2147483647.0f); // 2^31 -
|
||||
pcmBuffer.writeArray((uint8_t*)&buffer32, sizeof(int32_t));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOGE("Unsupported bits per sample: %d", meta.bits_per_sample);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Find a specific tag within binary data
|
||||
* @param tag The tag string to search for (e.g., "fmt ", "data")
|
||||
* @param data The binary data buffer to search in
|
||||
* @param len The length of the data buffer
|
||||
* @return The position of the tag if found, -1 if not found
|
||||
*
|
||||
* Searches for DSF chunk identifiers within the file data. Used to locate
|
||||
* format and data sections within the DSF file structure.
|
||||
*/
|
||||
int findTag(const char* tag, const uint8_t* data, size_t len) {
|
||||
int taglen = strlen(tag);
|
||||
uint32_t* pt;
|
||||
for (int j = 0; j < len - taglen; j++) {
|
||||
if (memcmp(tag, data + j, taglen) == 0) {
|
||||
return j; // Found the tag at position j
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parse DSF format chunk to extract audio parameters
|
||||
* @param data Pointer to the fmt chunk data
|
||||
* @param len Length of available data
|
||||
* @return true if parsing was successful, false otherwise
|
||||
*
|
||||
* Extracts essential audio format information from the DSF format chunk,
|
||||
* including channel count, DSD sample rate, and validates the parameters
|
||||
* are within acceptable ranges for processing.
|
||||
*/
|
||||
bool parseFMT(const uint8_t* data, size_t len) {
|
||||
TRACEI();
|
||||
if (len < sizeof(DSFFormat)) {
|
||||
LOGE("FMT section too short to parse DSF format header");
|
||||
return false; // Not enough data to parse
|
||||
}
|
||||
DSFFormat* fmt = (DSFFormat*)data;
|
||||
meta.channels = fmt->channelNum;
|
||||
// Fallback to channel type if channels is 0
|
||||
if (meta.channels == 0) meta.channels = fmt->channelType;
|
||||
meta.dsd_sample_rate = fmt->samplingFrequency;
|
||||
|
||||
// Validate channel count
|
||||
if (meta.channels == 0 || meta.channels > 8) {
|
||||
LOGE("Invalid channel count: %u (must be 1-8)", (unsigned)meta.channels);
|
||||
return false;
|
||||
}
|
||||
|
||||
LOGI("channels: %u, DSD sample rate: %u", (unsigned)meta.channels,
|
||||
(unsigned)meta.dsd_sample_rate);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parse DSF data chunk to extract audio data information
|
||||
* @param data Pointer to the data chunk
|
||||
* @param len Length of available data
|
||||
* @return true if parsing was successful, false otherwise
|
||||
*
|
||||
* Extracts audio data size information and calculates estimated playback
|
||||
* duration and total PCM frames that will be produced after DSD to PCM
|
||||
* conversion is complete.
|
||||
*/
|
||||
bool parseData(const uint8_t* data, size_t len) {
|
||||
TRACEI();
|
||||
if (len < sizeof(DSFDataHeader)) {
|
||||
LOGE("Data section too short to parse DSF data header");
|
||||
return false; // Not enough data to parse
|
||||
}
|
||||
DSFDataHeader* header = (DSFDataHeader*)data;
|
||||
dataSize = header->chunkSize;
|
||||
meta.dsd_data_bytes = dataSize;
|
||||
|
||||
uint64_t totalBits = dataSize * 8;
|
||||
uint64_t totalDSDSamples = totalBits / meta.channels;
|
||||
uint64_t totalPCMFrames =
|
||||
totalDSDSamples / (meta.dsd_sample_rate / meta.sample_rate);
|
||||
meta.pcm_frames = totalPCMFrames;
|
||||
meta.duration_sec = (float)totalPCMFrames / meta.sample_rate;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
444
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecFLAC.h
Normal file
444
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecFLAC.h
Normal file
@@ -0,0 +1,444 @@
|
||||
/**
|
||||
* @file CodecFLAC.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief FLAC Codec using https://github.com/pschatzmann/arduino-libflac
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Net.h"
|
||||
#include "flac.h"
|
||||
|
||||
#ifndef FLAC_READ_TIMEOUT_MS
|
||||
#define FLAC_READ_TIMEOUT_MS 10000
|
||||
#endif
|
||||
|
||||
#ifndef FLAC_BUFFER_SIZE
|
||||
#define FLAC_BUFFER_SIZE (8 * 1024)
|
||||
#endif
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for FLAC. Depends on https://github.com/pschatzmann/arduino-libflac. We support an efficient streaming API and an very memory intensitiv standard interface. So
|
||||
* you should prefer the streaming interface where you call setOutput() before the begin and copy() in the loop.
|
||||
* Validated with http://www.2l.no/hires/
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FLACDecoder : public StreamingDecoder {
|
||||
public:
|
||||
/// Default Constructor
|
||||
FLACDecoder(bool isOgg=false) {
|
||||
is_ogg = isOgg;
|
||||
}
|
||||
|
||||
/// Destructor - calls end();
|
||||
~FLACDecoder() { end(); }
|
||||
|
||||
void setTimeout(uint64_t readTimeout=FLAC_READ_TIMEOUT_MS) {
|
||||
read_timeout_ms = readTimeout;
|
||||
}
|
||||
void setOgg(bool isOgg) {
|
||||
is_ogg = isOgg;
|
||||
}
|
||||
|
||||
AudioInfo audioInfo() {
|
||||
AudioInfo info;
|
||||
info.sample_rate = FLAC__stream_decoder_get_sample_rate(decoder);
|
||||
info.channels = FLAC__stream_decoder_get_channels(decoder);
|
||||
info.bits_per_sample = 16; // only 16 is supported
|
||||
return info;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
is_active = false;
|
||||
if (decoder == nullptr) {
|
||||
if ((decoder = FLAC__stream_decoder_new()) == NULL) {
|
||||
LOGE("ERROR: allocating decoder");
|
||||
is_active = false;
|
||||
return false;
|
||||
}
|
||||
LOGI("FLAC__stream_decoder_new");
|
||||
}
|
||||
|
||||
// if it is already active we close it
|
||||
auto state = FLAC__stream_decoder_get_state(decoder);
|
||||
if (state != FLAC__STREAM_DECODER_UNINITIALIZED){
|
||||
FLAC__stream_decoder_finish(decoder);
|
||||
}
|
||||
|
||||
// deactivate md5 checking
|
||||
FLAC__stream_decoder_set_md5_checking(decoder, is_md5_checing);
|
||||
|
||||
// init decoder
|
||||
if (is_ogg){
|
||||
init_status = FLAC__stream_decoder_init_ogg_stream( decoder, read_callback, nullptr, nullptr, nullptr, nullptr, write_callback, nullptr, error_callback, this);
|
||||
} else {
|
||||
init_status = FLAC__stream_decoder_init_stream( decoder, read_callback, nullptr, nullptr, nullptr, nullptr, write_callback, nullptr, error_callback, this);
|
||||
}
|
||||
|
||||
if (init_status != FLAC__STREAM_DECODER_INIT_STATUS_OK) {
|
||||
LOGE("ERROR: initializing decoder: %s", FLAC__StreamDecoderInitStatusString[init_status]);
|
||||
is_active = false;
|
||||
return false;
|
||||
}
|
||||
LOGI("FLAC is open");
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() {
|
||||
TRACEI();
|
||||
if (decoder != nullptr){
|
||||
flush();
|
||||
FLAC__stream_decoder_delete(decoder);
|
||||
decoder = nullptr;
|
||||
}
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
/// Process all data in the buffer
|
||||
void flush() {
|
||||
while(FLAC__stream_decoder_process_single(decoder));
|
||||
}
|
||||
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
|
||||
/// Stream Interface: Process a single frame - only relevant when input stream has been defined
|
||||
bool copy() {
|
||||
LOGD("copy");
|
||||
if (!is_active) {
|
||||
LOGW("FLAC not active");
|
||||
return false;
|
||||
}
|
||||
if (p_input == nullptr) {
|
||||
LOGE("setInput was not called");
|
||||
return false;
|
||||
}
|
||||
if (!FLAC__stream_decoder_process_single(decoder)) {
|
||||
LOGE("FLAC__stream_decoder_process_single");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Activate/deactivate md5 checking: call this before calling begin()
|
||||
void setMD5(bool flag){
|
||||
is_md5_checing = flag;
|
||||
}
|
||||
|
||||
/// returns true of the stream is ogg
|
||||
bool isOgg() const { return is_ogg; }
|
||||
|
||||
/// Provides "audio/flac" or "audio/ogg"
|
||||
const char *mime() override { return is_ogg ? "audio/ogg; codecs=flac" : "audio/flac"; }
|
||||
|
||||
|
||||
protected:
|
||||
bool is_active = false;
|
||||
bool is_ogg = false;
|
||||
bool is_md5_checing = false;
|
||||
AudioInfo info;
|
||||
FLAC__StreamDecoder *decoder = nullptr;
|
||||
FLAC__StreamDecoderInitStatus init_status;
|
||||
uint64_t time_last_read = 0;
|
||||
uint64_t read_timeout_ms = FLAC_READ_TIMEOUT_MS;
|
||||
|
||||
|
||||
/// Check if input is directly from stream - instead of writes
|
||||
bool isInputFromStream() { return p_input != nullptr; }
|
||||
|
||||
/// Error callback
|
||||
static void error_callback(const FLAC__StreamDecoder *decoder,
|
||||
FLAC__StreamDecoderErrorStatus status,
|
||||
void *client_data) {
|
||||
LOGE(FLAC__StreamDecoderErrorStatusString[status]);
|
||||
}
|
||||
|
||||
size_t readBytes(uint8_t *data, size_t len) override {
|
||||
return p_input->readBytes(data, len);
|
||||
}
|
||||
|
||||
/// Callback which reads from stream
|
||||
static FLAC__StreamDecoderReadStatus read_callback(const FLAC__StreamDecoder *decoder, FLAC__byte result_buffer[],size_t *bytes, void *client_data) {
|
||||
FLAC__StreamDecoderReadStatus result = FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
|
||||
LOGD("read_callback: %d", (int) *bytes);
|
||||
FLACDecoder *self = (FLACDecoder *)client_data;
|
||||
if (self == nullptr || !self->is_active) {
|
||||
return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
|
||||
}
|
||||
|
||||
// get data directly from stream
|
||||
*bytes = self->readBytes(result_buffer, *bytes);
|
||||
LOGD("-> %d", (int) *bytes);
|
||||
if (self->isEof(*bytes)){
|
||||
result = FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
|
||||
self->is_active = false;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// We return eof when we were subsequently getting 0 bytes for the timeout period.
|
||||
bool isEof(int bytes) {
|
||||
bool result = false;
|
||||
if (bytes==0){
|
||||
delay(5);
|
||||
} else {
|
||||
time_last_read=millis();
|
||||
}
|
||||
if (millis() - time_last_read >= read_timeout_ms){
|
||||
result = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Output decoded result to final output stream
|
||||
static FLAC__StreamDecoderWriteStatus write_callback(const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame,const FLAC__int32 *const buffer[], void *client_data) {
|
||||
LOGD("write_callback: %u", (unsigned)frame->header.blocksize);
|
||||
FLACDecoder *self = (FLACDecoder *)client_data;
|
||||
|
||||
AudioInfo actual_info = self->audioInfo();
|
||||
if (self->info != actual_info){
|
||||
self->info = actual_info;
|
||||
self->info.logInfo();
|
||||
int bps = FLAC__stream_decoder_get_bits_per_sample(decoder);
|
||||
if (bps!=16){
|
||||
LOGI("Converting from %d bits", bps);
|
||||
}
|
||||
self->info = actual_info;
|
||||
self->notifyAudioChange(self->info);
|
||||
}
|
||||
|
||||
// write audio data
|
||||
int bps = FLAC__stream_decoder_get_bits_per_sample(decoder);
|
||||
int16_t result_frame[actual_info.channels];
|
||||
|
||||
switch(bps){
|
||||
case 8:
|
||||
for (int j = 0; j < frame->header.blocksize; j++) {
|
||||
for (int i = 0; i < actual_info.channels; i++) {
|
||||
//self->output_buffer[j*actual_info.channels + i] = buffer[i][j]<<8;
|
||||
result_frame[i] = buffer[i][j]<<8;
|
||||
}
|
||||
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
for (int j = 0; j < frame->header.blocksize; j++) {
|
||||
for (int i = 0; i < actual_info.channels; i++) {
|
||||
result_frame[i] = buffer[i][j];
|
||||
}
|
||||
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
for (int j = 0; j < frame->header.blocksize; j++) {
|
||||
for (int i = 0; i < actual_info.channels; i++) {
|
||||
result_frame[i] = buffer[i][j] >> 8;
|
||||
}
|
||||
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
for (int j = 0; j < frame->header.blocksize; j++) {
|
||||
for (int i = 0; i < actual_info.channels; i++) {
|
||||
result_frame[i] = buffer[i][j] >> 16;
|
||||
}
|
||||
self->p_print->write((uint8_t *)result_frame, sizeof(result_frame));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
LOGE("Unsupported bps: %d", bps);
|
||||
}
|
||||
|
||||
return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief FLACEncoder
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FLACEncoder : public AudioEncoder {
|
||||
public:
|
||||
/// Default Constructor
|
||||
FLACEncoder(bool isOgg = false) {
|
||||
setOgg(isOgg);
|
||||
}
|
||||
|
||||
/// Destructor - calls end();
|
||||
~FLACEncoder() { end(); }
|
||||
|
||||
void setOgg(bool isOgg) {
|
||||
is_ogg = isOgg;
|
||||
}
|
||||
|
||||
bool isOgg() {return is_ogg;}
|
||||
|
||||
void setBlockSize(int size){
|
||||
flac_block_size = size;
|
||||
}
|
||||
|
||||
int blockSize() {return flac_block_size; }
|
||||
|
||||
void setCompressionLevel(int level){
|
||||
flac_compression_level = level;
|
||||
}
|
||||
|
||||
int compressionLevel() {return flac_compression_level;}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
/// Provides "audio/pcm"
|
||||
const char *mime() override { return "audio/flac"; }
|
||||
|
||||
/// We update the audio information which will be used in the begin method
|
||||
virtual void setAudioInfo(AudioInfo from) override {
|
||||
cfg = from;
|
||||
cfg.logInfo();
|
||||
}
|
||||
|
||||
/// starts the processing using the actual AudioInfo
|
||||
virtual bool begin() override {
|
||||
TRACED();
|
||||
if (p_encoder==nullptr){
|
||||
p_encoder = FLAC__stream_encoder_new();
|
||||
if (p_encoder==nullptr){
|
||||
LOGE("FLAC__stream_encoder_new");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
is_open = false;
|
||||
|
||||
FLAC__stream_encoder_set_channels(p_encoder, cfg.channels);
|
||||
FLAC__stream_encoder_set_bits_per_sample(p_encoder, cfg.bits_per_sample);
|
||||
FLAC__stream_encoder_set_sample_rate(p_encoder, cfg.sample_rate);
|
||||
FLAC__stream_encoder_set_blocksize(p_encoder, flac_block_size);
|
||||
FLAC__stream_encoder_set_compression_level(p_encoder, flac_compression_level);
|
||||
|
||||
// setup stream
|
||||
FLAC__StreamEncoderInitStatus status;
|
||||
if (is_ogg){
|
||||
status = FLAC__stream_encoder_init_ogg_stream(p_encoder, nullptr, write_callback, nullptr, nullptr, nullptr, this);
|
||||
} else {
|
||||
status = FLAC__stream_encoder_init_stream(p_encoder, write_callback, nullptr, nullptr, nullptr, this);
|
||||
}
|
||||
if (status != FLAC__STREAM_ENCODER_INIT_STATUS_OK) {
|
||||
LOGE("ERROR: initializing decoder: %s", FLAC__StreamEncoderInitStatusString[status]);
|
||||
if (status==FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR){
|
||||
LOGE(" -> %s", FLAC__StreamEncoderStateString[FLAC__stream_encoder_get_state(p_encoder)]);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
is_open = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// starts the processing
|
||||
bool begin(Print &out) {
|
||||
p_print = &out;
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override {
|
||||
TRACED();
|
||||
if (p_encoder != nullptr) {
|
||||
FLAC__stream_encoder_delete(p_encoder);
|
||||
p_encoder = nullptr;
|
||||
is_open = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Writes FLAC Packet
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (!is_open || p_print == nullptr) return 0;
|
||||
LOGD("write: %zu", len);
|
||||
size_t result = 0;
|
||||
int samples=0;
|
||||
int frames=0;
|
||||
int32_t *data32=nullptr;
|
||||
switch(cfg.bits_per_sample){
|
||||
case 16:
|
||||
samples = len / sizeof(int16_t);
|
||||
frames = samples / cfg.channels;
|
||||
writeBuffer((int16_t*)data, samples);
|
||||
data32 = buffer.data();
|
||||
break;
|
||||
|
||||
case 24:
|
||||
case 32:
|
||||
samples = len / sizeof(int32_t);
|
||||
frames = samples / cfg.channels;
|
||||
data32 = (int32_t*) data;
|
||||
break;
|
||||
|
||||
default:
|
||||
LOGE("bits_per_sample not supported: %d", (int) cfg.bits_per_sample);
|
||||
break;
|
||||
}
|
||||
|
||||
if (frames>0){
|
||||
if (FLAC__stream_encoder_process_interleaved(p_encoder, data32, frames)){
|
||||
result = len;
|
||||
} else {
|
||||
LOGE("FLAC__stream_encoder_process_interleaved");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
operator bool() override { return is_open; }
|
||||
|
||||
bool isOpen() { return is_open; }
|
||||
|
||||
protected:
|
||||
AudioInfo cfg;
|
||||
Vector<FLAC__int32> buffer;
|
||||
Print *p_print = nullptr;
|
||||
FLAC__StreamEncoder *p_encoder=nullptr;
|
||||
bool is_open = false;
|
||||
bool is_ogg = false;
|
||||
int flac_block_size = 512; // small value to minimize allocated memory
|
||||
int flac_compression_level = 8;
|
||||
|
||||
static FLAC__StreamEncoderWriteStatus write_callback(const FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, uint32_t samples, uint32_t current_frame, void *client_data){
|
||||
FLACEncoder *self = (FLACEncoder *)client_data;
|
||||
if (self->p_print!=nullptr){
|
||||
size_t written = self->p_print->write((uint8_t*)buffer, bytes);
|
||||
if (written!=bytes){
|
||||
LOGE("write_callback %zu -> %zu", bytes, written);
|
||||
return FLAC__STREAM_ENCODER_WRITE_STATUS_FATAL_ERROR;
|
||||
}
|
||||
}
|
||||
return FLAC__STREAM_ENCODER_WRITE_STATUS_OK;
|
||||
}
|
||||
|
||||
void writeBuffer(int16_t * data, size_t samples) {
|
||||
buffer.resize(samples);
|
||||
for (int j=0;j<samples;j++){
|
||||
buffer[j] = data[j];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
|
||||
@@ -0,0 +1,203 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
#include "foxen-flac.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
#define FOXEN_IN_BUFFER_SIZE 1024 * 2
|
||||
#define FOXEN_OUT_BUFFER_SIZE 1024 * 4
|
||||
|
||||
/**
|
||||
* @brief Foxen FLAC Decoder using https://github.com/astoeckel/libfoxenflac
|
||||
* Unlike FLACDecoder which is a streaming decoder, this is a simple
|
||||
* AudioDecoder implementation.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FLACDecoderFoxen : public AudioDecoder {
|
||||
public:
|
||||
FLACDecoderFoxen() = default;
|
||||
|
||||
/// Default Constructor
|
||||
FLACDecoderFoxen(int maxBlockSize, int maxChannels,
|
||||
bool convertTo16Bits = true, bool releaseOnEnd = false) {
|
||||
is_convert_to_16 = convertTo16Bits;
|
||||
max_block_size = maxBlockSize;
|
||||
max_channels = maxChannels;
|
||||
is_release_memory_on_end = releaseOnEnd;
|
||||
};
|
||||
|
||||
/// Destructor - calls end();
|
||||
~FLACDecoderFoxen() { end(); }
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
is_active = false;
|
||||
size_t foxen_size = fx_flac_size(max_block_size, max_channels);
|
||||
foxen_data.resize(foxen_size);
|
||||
flac = fx_flac_init(foxen_data.data(), max_block_size, max_channels);
|
||||
|
||||
if (flac != nullptr) {
|
||||
is_active = true;
|
||||
write_buffer.resize(in_buffer_size);
|
||||
out.resize(out_buffer_size);
|
||||
} else {
|
||||
LOGE("not enough memory");
|
||||
if (is_stop_on_error) stop();
|
||||
}
|
||||
|
||||
return is_active;
|
||||
}
|
||||
|
||||
void end() {
|
||||
TRACEI();
|
||||
flush();
|
||||
if (flac != nullptr && is_release_memory_on_end) {
|
||||
foxen_data.resize(0);
|
||||
write_buffer.resize(0);
|
||||
out.resize(0);
|
||||
}
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", len);
|
||||
// no processing if not active
|
||||
if (!is_active) return 0;
|
||||
|
||||
size_t result = write_buffer.writeArray(data, len);
|
||||
LOGD("write_buffer availabe: %d", write_buffer.available());
|
||||
|
||||
while (write_buffer.available() > 0) {
|
||||
if (!decode()) break;
|
||||
}
|
||||
|
||||
// if the buffer is full we could not decode anything
|
||||
if (write_buffer.available() == write_buffer.size()) {
|
||||
LOGE("Decoder did not consume any data");
|
||||
if (is_stop_on_error) stop();
|
||||
}
|
||||
|
||||
LOGD("write: %d -> %d", len, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
void flush() { decode(); }
|
||||
|
||||
operator bool() override { return is_active; }
|
||||
|
||||
/// Defines the input buffer size (default is 2k)
|
||||
void setInBufferSize(int size) { in_buffer_size = size; }
|
||||
|
||||
/// Defines the number of 32 bit samples for providing the result (default is
|
||||
/// 4k)
|
||||
void setOutBufferSize(int size) { out_buffer_size = size; }
|
||||
|
||||
/// Defines the maximum FLAC blocksize: drives the buffer allocation
|
||||
void setMaxBlockSize(int size) { max_block_size = size; }
|
||||
|
||||
/// Defines the maximum number of channels: drives the buffer allocation
|
||||
void setMaxChannels(int ch) { max_channels = ch; }
|
||||
|
||||
/// Select between 16 and 32 bit output: the default is 16 bits
|
||||
void set32Bit(bool flag) { is_convert_to_16 = !flag; }
|
||||
|
||||
protected:
|
||||
fx_flac_t *flac = nullptr;
|
||||
SingleBuffer<uint8_t> write_buffer{0};
|
||||
Vector<int32_t> out;
|
||||
Vector<uint8_t> foxen_data{0};
|
||||
bool is_active = false;
|
||||
bool is_convert_to_16 = true;
|
||||
bool is_stop_on_error = true;
|
||||
bool is_release_memory_on_end = false;
|
||||
int bits_eff = 0;
|
||||
int max_block_size = 5 * 1024;
|
||||
int max_channels = 2;
|
||||
int in_buffer_size = FOXEN_IN_BUFFER_SIZE;
|
||||
int out_buffer_size = FOXEN_OUT_BUFFER_SIZE;
|
||||
|
||||
bool decode() {
|
||||
TRACED();
|
||||
if (!is_active) return false;
|
||||
uint32_t out_len = out.size();
|
||||
uint32_t buf_len = write_buffer.available();
|
||||
uint32_t buf_len_result = buf_len;
|
||||
int rc = fx_flac_process(flac, write_buffer.data(), &buf_len_result,
|
||||
out.data(), &out_len);
|
||||
// assert(out_len <= FOXEN_OUT_BUFFER_SIZE);
|
||||
|
||||
switch (rc) {
|
||||
case FLAC_END_OF_METADATA: {
|
||||
processMetadata();
|
||||
} break;
|
||||
|
||||
case FLAC_ERR: {
|
||||
LOGE("FLAC decoder in error state!");
|
||||
if (is_stop_on_error) stop();
|
||||
} break;
|
||||
|
||||
default: {
|
||||
if (out_len > 0) {
|
||||
LOGD("Providing data: %d samples", out_len);
|
||||
if (is_convert_to_16) {
|
||||
write16BitData(out_len);
|
||||
} else {
|
||||
write32BitData(out_len);
|
||||
}
|
||||
}
|
||||
} break;
|
||||
}
|
||||
LOGD("processed: %d bytes of %d -> %d samples", buf_len_result, buf_len,
|
||||
out_len);
|
||||
// removed processed bytes from buffer
|
||||
write_buffer.clearArray(buf_len_result);
|
||||
return buf_len_result > 0 || out_len > 0;
|
||||
}
|
||||
|
||||
void write32BitData(int out_len) {
|
||||
TRACED();
|
||||
// write the result to the output destination
|
||||
writeBlocking(p_print, (uint8_t *)out.data(), out_len * sizeof(int32_t));
|
||||
}
|
||||
|
||||
void write16BitData(int out_len) {
|
||||
TRACED();
|
||||
// in place convert to 16 bits
|
||||
int16_t *out16 = (int16_t *)out.data();
|
||||
for (int j = 0; j < out_len; j++) {
|
||||
out16[j] = out.data()[j] >> 16; // 65538;
|
||||
}
|
||||
// write the result to the output destination
|
||||
LOGI("writeBlocking: %d", out_len * sizeof(int16_t));
|
||||
writeBlocking(p_print, (uint8_t *)out.data(), out_len * sizeof(int16_t));
|
||||
}
|
||||
|
||||
void processMetadata() {
|
||||
bits_eff = fx_flac_get_streaminfo(flac, FLAC_KEY_SAMPLE_SIZE);
|
||||
int info_blocksize = fx_flac_get_streaminfo(flac, FLAC_KEY_MAX_BLOCK_SIZE);
|
||||
|
||||
LOGI("bits: %d", bits_eff);
|
||||
LOGI("blocksize: %d", info_blocksize);
|
||||
// assert(bits_eff == 32);
|
||||
info.sample_rate = fx_flac_get_streaminfo(flac, FLAC_KEY_SAMPLE_RATE);
|
||||
info.channels = fx_flac_get_streaminfo(flac, FLAC_KEY_N_CHANNELS);
|
||||
info.bits_per_sample = is_convert_to_16 ? 16 : bits_eff;
|
||||
info.logInfo();
|
||||
if (info.channels > max_channels) {
|
||||
LOGE("max channels too low: %d -> %d", max_channels, info.channels);
|
||||
if (is_stop_on_error) stop();
|
||||
}
|
||||
if (info_blocksize > max_block_size) {
|
||||
LOGE("max channels too low: %d -> %d", max_block_size, info_blocksize);
|
||||
if (is_stop_on_error) stop();
|
||||
}
|
||||
notifyAudioChange(info);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
/**
|
||||
* @brief Factory for creating new decoders based on the mime type or id
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
class CodecFactory {
|
||||
public:
|
||||
bool addDecoder(const char* id, AudioDecoder* (*cb)()) {
|
||||
if (id == nullptr || cb == nullptr) return false;
|
||||
DecoderFactoryLine line;
|
||||
line.id = id;
|
||||
line.cb = cb;
|
||||
decoders.push_back(line);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool addEncoder(const char* id, AudioEncoder* (*cb)()) {
|
||||
if (id == nullptr || cb == nullptr) return false;
|
||||
EncoderFactoryLine line;
|
||||
line.id = id;
|
||||
line.cb = cb;
|
||||
encoders.push_back(line);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// create a new decoder instance
|
||||
AudioDecoder* createDecoder(const char* str) {
|
||||
for (auto& line : decoders) {
|
||||
if (line.id.equals(str)) {
|
||||
return line.cb();
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
/// create a new encoder instance
|
||||
AudioEncoder* createEncoder(const char* str) {
|
||||
for (auto& line : encoders) {
|
||||
if (line.id.equals(str)) {
|
||||
return line.cb();
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct DecoderFactoryLine {
|
||||
Str id;
|
||||
AudioDecoder* (*cb)() = nullptr;
|
||||
};
|
||||
struct EncoderFactoryLine {
|
||||
Str id;
|
||||
AudioEncoder* (*cb)() = nullptr;
|
||||
};
|
||||
Vector<DecoderFactoryLine> decoders;
|
||||
Vector<EncoderFactoryLine> encoders;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
142
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecFloat.h
Normal file
142
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecFloat.h
Normal file
@@ -0,0 +1,142 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief DecoderFloat - Converts Stream of floats into 2 byte integers
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class DecoderFloat : public AudioDecoder {
|
||||
public:
|
||||
/// Empty Constructor
|
||||
DecoderFloat() = default;
|
||||
/**
|
||||
* @brief Construct a new DecoderFloat object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
*/
|
||||
DecoderFloat(Print &out_stream, bool active=true){
|
||||
TRACED();
|
||||
p_print = &out_stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new DecoderFloat object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
* @param bi Object that will be notified about the Audio Formt (Changes)
|
||||
*/
|
||||
|
||||
DecoderFloat(Print &out_stream, AudioInfoSupport &bi){
|
||||
TRACED();
|
||||
p_print = &out_stream;
|
||||
addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override {
|
||||
p_print = &out_stream;
|
||||
}
|
||||
|
||||
/// Converts data from float to int16_t
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (p_print==nullptr) return 0;
|
||||
int samples = len/sizeof(float);
|
||||
buffer.resize(samples);
|
||||
float* p_float = (float*) data;
|
||||
for (int j=0;j<samples;j++){
|
||||
buffer[j] = p_float[j]*32767;
|
||||
}
|
||||
return p_print->write((uint8_t*)buffer.data(), samples*sizeof(int16_t)) * 2;
|
||||
}
|
||||
|
||||
virtual operator bool() override {
|
||||
return p_print!=nullptr;;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print=nullptr;
|
||||
Vector<int16_t> buffer;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief EncoderFloats - Encodes 16 bit PCM data stream to floats
|
||||
* data.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncoderFloat : public AudioEncoder {
|
||||
public:
|
||||
/// Empty Constructor
|
||||
EncoderFloat() = default;
|
||||
|
||||
/// Constructor providing the output stream
|
||||
EncoderFloat(Print &out){
|
||||
p_print = &out;
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override {
|
||||
p_print = &out_stream;
|
||||
}
|
||||
|
||||
/// Provides "audio/pcm"
|
||||
const char* mime() override{
|
||||
return mime_pcm;
|
||||
}
|
||||
|
||||
/// starts the processing using the actual RAWAudioInfo
|
||||
virtual bool begin() override{
|
||||
is_open = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// starts the processing
|
||||
bool begin(Print &out) {
|
||||
p_print = &out;
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override {
|
||||
is_open = false;
|
||||
}
|
||||
|
||||
/// Converts data from int16_t to float
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (p_print==nullptr) return 0;
|
||||
int16_t *pt16 = (int16_t*)data;
|
||||
size_t samples = len / sizeof(int16_t);
|
||||
buffer.resize(samples);
|
||||
for (size_t j=0;j<samples;j++){
|
||||
buffer[j] = static_cast<float>(pt16[j]) / 32768.0;
|
||||
}
|
||||
return p_print->write((uint8_t*)buffer.data(), samples*sizeof(float));
|
||||
}
|
||||
|
||||
operator bool() override {
|
||||
return is_open;
|
||||
}
|
||||
|
||||
bool isOpen(){
|
||||
return is_open;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print* p_print=nullptr;;
|
||||
volatile bool is_open;
|
||||
Vector<float> buffer;
|
||||
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
197
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecG722.h
Normal file
197
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecG722.h
Normal file
@@ -0,0 +1,197 @@
|
||||
/**
|
||||
* @file CodecG.722.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief G.722 Codec using https://github.com/pschatzmann/arduino-libg722
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "g722_codec.h"
|
||||
|
||||
// size in bytes
|
||||
#define G722_PCM_SIZE 80
|
||||
#define G722_ENC_SIZE 40
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for G.722. Depends on
|
||||
* https://github.com/pschatzmann/arduino-libg722.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G722Decoder : public AudioDecoder {
|
||||
public:
|
||||
G722Decoder() = default;
|
||||
|
||||
/// Defines the options for the G.722 Codec: G722_SAMPLE_RATE_8000,G722_PACKED
|
||||
void setOptions(int options){
|
||||
this->options = options;
|
||||
}
|
||||
|
||||
virtual bool begin() {
|
||||
TRACEI();
|
||||
input_buffer.resize(10);
|
||||
result_buffer.resize(40);
|
||||
|
||||
g722_dctx = g722_decoder_new(info.sample_rate, options);
|
||||
if (g722_dctx == nullptr) {
|
||||
LOGE("g722_decoder_new");
|
||||
return false;
|
||||
}
|
||||
|
||||
notifyAudioChange(info);
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
g722_decoder_destroy(g722_dctx);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *p_byte = (uint8_t *)data;
|
||||
for (int j = 0; j < len; j++) {
|
||||
processByte(p_byte[j]);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
G722_DEC_CTX *g722_dctx=nullptr;
|
||||
Vector<uint8_t> input_buffer;
|
||||
Vector<uint8_t> result_buffer;
|
||||
int options = G722_SAMPLE_RATE_8000;
|
||||
int input_pos = 0;
|
||||
bool is_active = false;
|
||||
|
||||
/// Build decoding buffer and decode when frame is full
|
||||
void processByte(uint8_t byte) {
|
||||
// add byte to buffer
|
||||
input_buffer[input_pos++] = byte;
|
||||
|
||||
// decode if buffer is full
|
||||
if (input_pos >= input_buffer.size()) {
|
||||
int result_samples = g722_decode(g722_dctx, input_buffer.data(), input_buffer.size(),
|
||||
(int16_t *)result_buffer.data());
|
||||
|
||||
if (result_samples*2>result_buffer.size()){
|
||||
LOGE("Decoder:Result buffer too small: %d -> %d",result_buffer.size(),result_samples*2);
|
||||
}
|
||||
|
||||
p_print->write(result_buffer.data(), result_samples);
|
||||
input_pos = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for G.722 - Depends on
|
||||
* https://github.com/pschatzmann/arduino-libg722.
|
||||
* Inspired by g722enc.c
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G722Encoder : public AudioEncoder {
|
||||
public:
|
||||
G722Encoder() = default;
|
||||
|
||||
/// Defines the options for the G.722 Codec: G722_SAMPLE_RATE_8000,G722_PACKED
|
||||
void setOptions(int options){
|
||||
this->options = options;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
if (info.channels != 1) {
|
||||
LOGW("1 channel expected, was: %d", info.channels);
|
||||
}
|
||||
|
||||
g722_ectx = g722_encoder_new(info.sample_rate, options);
|
||||
if (g722_ectx == NULL) {
|
||||
LOGE("g722_encoder_new");
|
||||
return false;
|
||||
}
|
||||
|
||||
input_buffer.resize(G722_PCM_SIZE);
|
||||
result_buffer.resize(G722_ENC_SIZE);
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
g722_encoder_destroy(g722_ectx);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual const char *mime() { return "audio/g722"; }
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
// encode bytes
|
||||
uint8_t *p_byte = (uint8_t *)data;
|
||||
for (int j = 0; j < len; j++) {
|
||||
processByte(p_byte[j]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
G722_ENC_CTX *g722_ectx = nullptr;
|
||||
Vector<uint8_t> input_buffer;
|
||||
Vector<uint8_t> result_buffer;
|
||||
int options = G722_SAMPLE_RATE_8000;
|
||||
int buffer_pos = 0;
|
||||
bool is_active = false;
|
||||
|
||||
// add byte to decoding buffer and decode if buffer is full
|
||||
void processByte(uint8_t byte) {
|
||||
input_buffer[buffer_pos++] = byte;
|
||||
if (buffer_pos >= input_buffer.size()) {
|
||||
// convert for little endian
|
||||
int samples = input_buffer.size() / 2;
|
||||
// encode
|
||||
int result_len = g722_encode(g722_ectx,(const int16_t*) input_buffer.data(), samples,
|
||||
result_buffer.data());
|
||||
if (result_len>result_buffer.size()){
|
||||
LOGE("Encoder:Result buffer too small: %d -> %d",result_buffer.size(),result_len);
|
||||
}
|
||||
p_print->write(result_buffer.data(), result_len);
|
||||
buffer_pos = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
411
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecG7xx.h
Normal file
411
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecG7xx.h
Normal file
@@ -0,0 +1,411 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
extern "C"{
|
||||
#include "g72x.h"
|
||||
}
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Supported codecs by G7xxDecoder and G7xxEncoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*
|
||||
*/
|
||||
enum G7xxCODEC_e {g723_24, g721, g723_40, others};
|
||||
|
||||
/**
|
||||
* @brief g723_24, g721, g723_40 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class G7xxDecoder : public AudioDecoder {
|
||||
public:
|
||||
G7xxDecoder(G7xxCODEC_e codec) {
|
||||
info.channels = 1;
|
||||
info.sample_rate = 8000;
|
||||
info.bits_per_sample = 16;
|
||||
|
||||
switch(codec){
|
||||
case g723_24:
|
||||
dec_routine = g723_24_decoder;
|
||||
dec_bits = 3;
|
||||
break;
|
||||
|
||||
case g721:
|
||||
dec_routine = g721_decoder;
|
||||
dec_bits = 4;
|
||||
break;
|
||||
|
||||
case g723_40:
|
||||
dec_routine = g723_40_decoder;
|
||||
dec_bits = 5;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo info) override {
|
||||
bool ok = true;
|
||||
if (info.channels!=1){
|
||||
LOGE("channels must be 1 instead of %d", info.channels);
|
||||
ok = false;
|
||||
}
|
||||
if (info.sample_rate!=8000){
|
||||
LOGE("sample_rate must be 8000 instead of %d", info.sample_rate);
|
||||
ok = false;
|
||||
}
|
||||
if (info.bits_per_sample!=16){
|
||||
LOGE("bits_per_sample must be 16 instead of %d", info.bits_per_sample);
|
||||
ok = false;
|
||||
}
|
||||
if (ok) AudioDecoder::setAudioInfo(info);
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
in_buffer = 0;
|
||||
in_bits = 0;
|
||||
out_size = sizeof(int16_t);
|
||||
g72x_init_state(&state);
|
||||
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *p_byte = (uint8_t *)data;
|
||||
for (int j = 0; j < len; j++) {
|
||||
sample = (*dec_routine)(p_byte[j], AUDIO_ENCODING_LINEAR, &state);
|
||||
p_print->write((uint8_t*)&sample, out_size);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
int input_pos = 0;
|
||||
bool is_active = false;
|
||||
int16_t sample;
|
||||
unsigned char code;
|
||||
int n;
|
||||
struct g72x_state state;
|
||||
int out_size;
|
||||
int (*dec_routine)(int code, int out_coding, struct g72x_state* state_ptr);
|
||||
int dec_bits;
|
||||
unsigned int in_buffer = 0;
|
||||
int in_bits = 0;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief g723_24, g721, g723_40 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G7xxEncoder : public AudioEncoder {
|
||||
public:
|
||||
G7xxEncoder(G7xxCODEC_e codec) {
|
||||
info.channels = 1;
|
||||
info.sample_rate = 8000;
|
||||
info.bits_per_sample = 16;
|
||||
|
||||
switch(codec){
|
||||
|
||||
case g721:
|
||||
enc_routine = g721_encoder;
|
||||
enc_bits = 4;
|
||||
p_mime = "audio/g721";
|
||||
break;
|
||||
|
||||
case g723_24:
|
||||
enc_routine = g723_24_encoder;
|
||||
enc_bits = 3;
|
||||
p_mime = "audio/g723_24";
|
||||
break;
|
||||
|
||||
case g723_40:
|
||||
enc_routine = g723_40_encoder;
|
||||
enc_bits = 5;
|
||||
p_mime = "audio/g723_40";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
g72x_init_state(&state);
|
||||
out_buffer = 0;
|
||||
out_bits = 0;
|
||||
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
const char *mime() override { return p_mime; }
|
||||
|
||||
virtual void setAudioInfo(AudioInfo info) {
|
||||
bool ok = true;
|
||||
if (info.channels!=1){
|
||||
LOGE("channels must be 1 instead of %d", info.channels);
|
||||
ok = false;
|
||||
}
|
||||
if (info.sample_rate!=8000){
|
||||
LOGE("sample_rate must be 8000 instead of %d", info.sample_rate);
|
||||
ok = false;
|
||||
}
|
||||
if (info.bits_per_sample!=16){
|
||||
LOGE("bits_per_sample must be 16 instead of %d", info.bits_per_sample);
|
||||
ok = false;
|
||||
}
|
||||
if (ok) AudioEncoder::setAudioInfo(info);
|
||||
}
|
||||
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
// encode bytes
|
||||
int16_t *p_16 = (int16_t *)data;
|
||||
int samples = len / sizeof(int16_t);
|
||||
for (int j = 0; j < samples; j++) {
|
||||
code = (*enc_routine)(p_16[j], AUDIO_ENCODING_LINEAR, &state);
|
||||
p_print->write(&code, 1);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
bool is_active = false;
|
||||
const char *p_mime = nullptr;
|
||||
int resid;
|
||||
struct g72x_state state;
|
||||
unsigned char sample_char;
|
||||
int16_t sample_int16;
|
||||
unsigned char code;
|
||||
int (*enc_routine)(int sample, int in_coding, struct g72x_state* state_ptr);
|
||||
int enc_bits;
|
||||
unsigned int out_buffer = 0;
|
||||
int out_bits = 0;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 32Kbps G721 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G721Decoder : public G7xxDecoder {
|
||||
public:
|
||||
G721Decoder() : G7xxDecoder(g721) {};
|
||||
};
|
||||
/**
|
||||
* @brief 32Kbps G721 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G721Encoder : public G7xxEncoder {
|
||||
public:
|
||||
G721Encoder() : G7xxEncoder(g721) {};
|
||||
};
|
||||
/**
|
||||
* @brief 24Kbps G723 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G723_24Decoder : public G7xxDecoder {
|
||||
public:
|
||||
G723_24Decoder() : G7xxDecoder(g723_24) {};
|
||||
};
|
||||
/**
|
||||
* @brief 24Kbps G723 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G723_24Encoder : public G7xxEncoder {
|
||||
public:
|
||||
G723_24Encoder() : G7xxEncoder(g723_24) {};
|
||||
};
|
||||
/**
|
||||
* @brief 40Kbps G723 Decoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G723_40Decoder : public G7xxDecoder {
|
||||
public:
|
||||
G723_40Decoder() : G7xxDecoder(g723_40) {};
|
||||
};
|
||||
/**
|
||||
* @brief 40Kbps G723 Encoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G723_40Encoder : public G7xxEncoder {
|
||||
public:
|
||||
G723_40Encoder() : G7xxEncoder(g723_40) {};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 64 kbit/s g711 ULOW Encoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* Supported encoder parameters: linear2alaw2, linear2ulaw
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G711Encoder : public G7xxEncoder {
|
||||
public:
|
||||
G711Encoder(uint8_t(*enc)(int)) : G7xxEncoder(others) {
|
||||
this->enc = enc;
|
||||
assert(this->enc!=nullptr);
|
||||
};
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
// encode bytes
|
||||
int samples = len/2;
|
||||
int16_t *p_16 = (int16_t *)data;
|
||||
uint8_t buffer[samples];
|
||||
for (int j = 0; j < samples; j++) {
|
||||
buffer[j] = enc(p_16[j]);
|
||||
}
|
||||
p_print->write(buffer,samples);
|
||||
return len;
|
||||
}
|
||||
protected:
|
||||
uint8_t(*enc)(int)=nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 64 kbit/s g711 ULOW Decoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* Supported decoder parameters: alaw2linear, ulaw2linear
|
||||
* @author Phil Schatzmann
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G711Decoder : public G7xxDecoder {
|
||||
public:
|
||||
G711Decoder(int (*dec)(uint8_t a_val)) : G7xxDecoder(others) {
|
||||
this->dec = dec;
|
||||
assert(this->dec!=nullptr);
|
||||
};
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
// decode bytes
|
||||
uint8_t *p_8 = (uint8_t *)data;
|
||||
for (int j = 0; j < len; j++) {
|
||||
int16_t result = dec(p_8[j]);
|
||||
p_print->write((uint8_t*)&result,sizeof(int16_t));
|
||||
}
|
||||
return len;
|
||||
}
|
||||
protected:
|
||||
int (*dec)(uint8_t a_val)=nullptr;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief 64 kbit/s g711 ALOW Encoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G711_ALAWEncoder : public G711Encoder {
|
||||
public:
|
||||
G711_ALAWEncoder() : G711Encoder(linear2alaw) {};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 64 kbit/s g711 ALOW Decoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G711_ALAWDecoder : public G711Decoder {
|
||||
public:
|
||||
G711_ALAWDecoder() : G711Decoder(alaw2linear) {};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 64 kbit/s g711 ULOW Encoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G711_ULAWEncoder : public G711Encoder {
|
||||
public:
|
||||
G711_ULAWEncoder() : G711Encoder(linear2ulaw) {};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 64 kbit/s g711 ULOW Decoder based on https://github.com/pschatzmann/arduino-libg7xx
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class G711_ULAWDecoder : public G711Decoder {
|
||||
public:
|
||||
G711_ULAWDecoder() : G711Decoder(ulaw2linear) {};
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
236
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecGSM.h
Normal file
236
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecGSM.h
Normal file
@@ -0,0 +1,236 @@
|
||||
/**
|
||||
* @file CodecGSM.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief GSM Codec using https://github.com/pschatzmann/arduino-libgsm
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "gsm.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for GSM. Depends on
|
||||
* https://github.com/pschatzmann/arduino-libgsm.
|
||||
* Inspired by gsmdec.c
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class GSMDecoder : public AudioDecoder {
|
||||
public:
|
||||
GSMDecoder() {
|
||||
info.sample_rate = 8000;
|
||||
info.channels = 1;
|
||||
}
|
||||
|
||||
virtual bool begin() {
|
||||
TRACEI();
|
||||
// 160 13-bit samples
|
||||
result_buffer.resize(160 * sizeof(int16_t));
|
||||
// gsm_frame of 33 bytes
|
||||
input_buffer.resize(33);
|
||||
|
||||
v_gsm = gsm_create();
|
||||
notifyAudioChange(info);
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
gsm_destroy(v_gsm);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int j = 0; j < len; j++) {
|
||||
processByte(data[j]);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
gsm v_gsm;
|
||||
bool is_active = false;
|
||||
Vector<uint8_t> input_buffer;
|
||||
Vector<uint8_t> result_buffer;
|
||||
int input_pos = 0;
|
||||
|
||||
/// Build decoding buffer and decode when frame is full
|
||||
void processByte(uint8_t byte) {
|
||||
// add byte to buffer
|
||||
input_buffer[input_pos++] = byte;
|
||||
|
||||
// decode if buffer is full
|
||||
if (input_pos >= input_buffer.size()) {
|
||||
if (gsm_decode(v_gsm, input_buffer.data(), (gsm_signal*)result_buffer.data())!=0){
|
||||
LOGE("gsm_decode");
|
||||
}
|
||||
|
||||
//fromBigEndian(result_buffer);
|
||||
// scale to 13 to 16-bit samples
|
||||
scale(result_buffer);
|
||||
|
||||
p_print->write(result_buffer.data(), result_buffer.size());
|
||||
input_pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void scale(Vector<uint8_t> &vector){
|
||||
int16_t *pt16 = (int16_t *)vector.data();
|
||||
for (int j = 0; j < vector.size() / 2; j++) {
|
||||
if (abs(pt16[j])<=4095){
|
||||
pt16[j] = pt16[j] * 8;
|
||||
} else if(pt16[j]<0){
|
||||
pt16[j] = -32767;
|
||||
} else if(pt16[j]>0){
|
||||
pt16[j] = 32767;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fromBigEndian(Vector<uint8_t> &vector){
|
||||
int size = vector.size() / 2;
|
||||
int16_t *data16 = (int16_t*) vector.data();
|
||||
for (int i=0; i<size; i++){
|
||||
data16[i] = ntohs(data16[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for GSM - Depends on
|
||||
* https://github.com/pschatzmann/arduino-libgsm.
|
||||
* Inspired by gsmenc.c
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class GSMEncoder : public AudioEncoder {
|
||||
public:
|
||||
GSMEncoder(bool scaling=true) {
|
||||
info.sample_rate = 8000;
|
||||
info.channels = 1;
|
||||
scaling_active = scaling;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
|
||||
if (info.sample_rate != 8000) {
|
||||
LOGW("Sample rate is supposed to be 8000 - it was %d", info.sample_rate);
|
||||
}
|
||||
if (info.channels != 1) {
|
||||
LOGW("channels is supposed to be 1 - it was %d", info.channels);
|
||||
}
|
||||
|
||||
v_gsm = gsm_create();
|
||||
// 160 13-bit samples
|
||||
input_buffer.resize(160 * sizeof(int16_t));
|
||||
// gsm_frame of 33 bytes
|
||||
result_buffer.resize(33);
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
gsm_destroy(v_gsm);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual const char *mime() { return "audio/gsm"; }
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
// encode bytes
|
||||
for (int j = 0; j < len; j++) {
|
||||
processByte(data[j]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
gsm v_gsm;
|
||||
bool is_active = false;
|
||||
int buffer_pos = 0;
|
||||
bool scaling_active;
|
||||
Vector<uint8_t> input_buffer;
|
||||
Vector<uint8_t> result_buffer;
|
||||
|
||||
// add byte to decoding buffer and decode if buffer is full
|
||||
void processByte(uint8_t byte) {
|
||||
input_buffer[buffer_pos++] = byte;
|
||||
if (buffer_pos >= input_buffer.size()) {
|
||||
scaleValues(input_buffer);
|
||||
// toBigEndian(input_buffer);
|
||||
// encode
|
||||
gsm_encode(v_gsm, (gsm_signal*)input_buffer.data(), result_buffer.data());
|
||||
size_t written = p_print->write(result_buffer.data(), result_buffer.size());
|
||||
assert(written == result_buffer.size());
|
||||
buffer_pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void toBigEndian(Vector<uint8_t> &vector){
|
||||
int size = vector.size() / 2;
|
||||
int16_t *data16 = (int16_t*) vector.data();
|
||||
for (int i=0; i<size; i++){
|
||||
data16[i] = htons(data16[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void scaleValues(Vector<uint8_t> &vector) {
|
||||
int16_t *pt16 = (int16_t *)vector.data();
|
||||
int size = vector.size() / 2;
|
||||
if (scaling_active){
|
||||
// scale to 16 to 13-bit samples
|
||||
for (int j = 0; j < size; j++) {
|
||||
pt16[j] = pt16[j] / 8;
|
||||
}
|
||||
} else {
|
||||
// clip value to 13-bits
|
||||
for (int j = 0; j < size; j++) {
|
||||
if ( pt16[j]>4095){
|
||||
pt16[j] = 4095;
|
||||
}
|
||||
if ( pt16[j]<-4095){
|
||||
pt16[j] = -4095;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/CodecAACHelix.h"
|
||||
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
|
||||
#include "AudioTools/AudioCodecs/CodecWAV.h"
|
||||
#include "AudioTools/AudioCodecs/MultiDecoder.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief MP3 and AAC Decoder using libhelix:
|
||||
* https://github.com/pschatzmann/arduino-libhelix. We dynamically create a MP3
|
||||
* or AAC decoder dependent on the provided audio format. In addition WAV files
|
||||
* are also supported
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class DecoderHelix : public MultiDecoder {
|
||||
public:
|
||||
DecoderHelix() {
|
||||
// register supported codecs with their mime type
|
||||
addDecoder(mp3, "audio/mpeg");
|
||||
addDecoder(aac, "audio/aac");
|
||||
addDecoder(wav, "audio/vnd.wave");
|
||||
}
|
||||
|
||||
protected:
|
||||
MP3DecoderHelix mp3;
|
||||
AACDecoderHelix aac;
|
||||
WAVDecoder wav;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
192
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecILBC.h
Normal file
192
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecILBC.h
Normal file
@@ -0,0 +1,192 @@
|
||||
/**
|
||||
* @file CodecILBC.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief Codec for ilbc using https://github.com/pschatzmann/libilbc
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*
|
||||
* @copyright Copyright (c) 2022
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "iLBC.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for iLBC. Depends on
|
||||
* https://github.com/pschatzmann/libilbc
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ILBCDecoder : public AudioDecoder {
|
||||
public:
|
||||
ILBCDecoder(EnumLBCFrameSize frameSize = ms30, bool useEnhancer = true) {
|
||||
info.sample_rate = 8000;
|
||||
info.channels = 1;
|
||||
info.bits_per_sample = 16;
|
||||
frame_size = frameSize;
|
||||
use_enhancer = useEnhancer;
|
||||
}
|
||||
|
||||
~ILBCDecoder(){
|
||||
end();
|
||||
}
|
||||
|
||||
virtual bool begin() {
|
||||
TRACEI();
|
||||
if (p_print==nullptr){
|
||||
LOGE("Output not defined");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (p_ilbc==nullptr){
|
||||
p_ilbc = new iLBCDecode(frame_size, use_enhancer);
|
||||
}
|
||||
|
||||
// setup buffer
|
||||
decoded_buffer.resize(p_ilbc->getSamples());
|
||||
encoded_buffer.resize(p_ilbc->getEncodedBytes());
|
||||
|
||||
// update audio information
|
||||
notifyAudioChange(info);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
delete p_ilbc;
|
||||
p_ilbc = nullptr;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return p_ilbc != nullptr; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
if (p_ilbc==nullptr) return 0;
|
||||
LOGI("write: %d", len);
|
||||
int samples = len / sizeof(int16_t);
|
||||
int16_t *p_samples = (int16_t *)data;
|
||||
for (int j=0;j<samples;j++){
|
||||
encoded_buffer[encoded_buffer_pos++]=p_samples[j];
|
||||
if (encoded_buffer_pos>=encoded_buffer.size()){
|
||||
memset(decoded_buffer.data(),0,decoded_buffer.size()*sizeof(int16_t));
|
||||
p_ilbc->decode(encoded_buffer.data(), decoded_buffer.data());
|
||||
if (p_print!=nullptr){
|
||||
p_print->write((uint8_t*)decoded_buffer.data(), decoded_buffer.size()*sizeof(int16_t));
|
||||
delay(2);
|
||||
}
|
||||
encoded_buffer_pos = 0;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
iLBCDecode *p_ilbc = nullptr;
|
||||
Vector<int16_t> decoded_buffer{0};
|
||||
Vector<uint8_t> encoded_buffer{0};
|
||||
int16_t encoded_buffer_pos = 0;
|
||||
EnumLBCFrameSize frame_size;
|
||||
bool use_enhancer;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for iLBC - Depends on
|
||||
* https://github.com/pschatzmann/libopenilbc
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ILBCEncoder : public AudioEncoder {
|
||||
public:
|
||||
ILBCEncoder(EnumLBCFrameSize frameSize = ms30) {
|
||||
info.sample_rate = 8000;
|
||||
info.channels = 1;
|
||||
info.bits_per_sample = 16;
|
||||
frame_size = frameSize;
|
||||
}
|
||||
|
||||
~ILBCEncoder(){
|
||||
end();
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
if (p_print==nullptr){
|
||||
LOGE("Output not defined");
|
||||
return false;
|
||||
}
|
||||
if (info.bits_per_sample!=16){
|
||||
LOGE("bits_per_sample must be 16: %d",info.bits_per_sample);
|
||||
return false;
|
||||
}
|
||||
if (info.sample_rate!=8000){
|
||||
LOGW("The sample rate should be 8000: %d", info.sample_rate);
|
||||
}
|
||||
if (info.channels!=1){
|
||||
LOGW("channels should be 1: %d", info.channels);
|
||||
}
|
||||
if (p_ilbc==nullptr){
|
||||
p_ilbc = new iLBCEncode(frame_size);
|
||||
}
|
||||
decoded_buffer.resize(p_ilbc->getSamples());
|
||||
encoded_buffer.resize(p_ilbc->getEncodedBytes());
|
||||
decoded_buffer_pos = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
if (p_ilbc != nullptr) {
|
||||
delete p_ilbc;
|
||||
p_ilbc = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
virtual const char *mime() { return "audio/ilbc"; }
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return p_ilbc != nullptr; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
if (p_ilbc==nullptr) return 0;
|
||||
LOGI("write: %d", len);
|
||||
|
||||
int samples = len / sizeof(int16_t);
|
||||
int16_t *p_samples = (int16_t *)data;
|
||||
|
||||
for (int j=0;j<samples;j++){
|
||||
decoded_buffer[decoded_buffer_pos++]=p_samples[j];
|
||||
if (decoded_buffer_pos>=decoded_buffer.size()){
|
||||
memset(encoded_buffer.data(),0,encoded_buffer.size());
|
||||
p_ilbc->encode(decoded_buffer.data(), encoded_buffer.data());
|
||||
if (p_print!=nullptr){
|
||||
p_print->write(encoded_buffer.data(), encoded_buffer.size());
|
||||
}
|
||||
decoded_buffer_pos = 0;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
iLBCEncode *p_ilbc = nullptr;
|
||||
Vector<float> decoded_buffer{0};
|
||||
Vector<uint8_t> encoded_buffer{0};
|
||||
int16_t decoded_buffer_pos = 0;
|
||||
EnumLBCFrameSize frame_size;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
123
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecL16.h
Normal file
123
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecL16.h
Normal file
@@ -0,0 +1,123 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief DecoderL16 - Converts an 16 Bit Stream into 16Bits network byte order.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class DecoderL16 : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new DecoderL16 object
|
||||
*/
|
||||
|
||||
DecoderL16() { TRACED(); }
|
||||
|
||||
/**
|
||||
* @brief Construct a new DecoderL16 object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
*/
|
||||
DecoderL16(Print &out_stream, bool active = true) {
|
||||
TRACED();
|
||||
p_print = &out_stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new DecoderL16 object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
* @param bi Object that will be notified about the Audio Formt (Changes)
|
||||
*/
|
||||
|
||||
DecoderL16(Print &out_stream, AudioInfoSupport &bi) {
|
||||
TRACED();
|
||||
setOutput(out_stream);
|
||||
addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (p_print == nullptr)
|
||||
return 0;
|
||||
int16_t *data16 = (int16_t *)data;
|
||||
for (int j = 0; j < len / 2; j++) {
|
||||
data16[j] = ntohs(data16[j]);
|
||||
}
|
||||
return p_print->write((uint8_t *)data, len);
|
||||
}
|
||||
|
||||
virtual operator bool() override { return p_print!=nullptr; }
|
||||
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief EncoderL16s - Condenses 16 bit PCM data stream to 8 bits
|
||||
* data.
|
||||
* Most microcontrollers can not process 8 bit audio data directly. 8 bit data
|
||||
* however is very memory efficient and helps if you need to store audio on
|
||||
* constrained resources. This encoder translates 16bit data into 8bit data.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncoderL16 : public AudioEncoder {
|
||||
public:
|
||||
// Empty Constructor - the output stream must be provided with begin()
|
||||
EncoderL16() {}
|
||||
|
||||
// Constructor providing the output stream
|
||||
EncoderL16(Print &out) { p_print = &out; }
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
/// Provides "audio/pcm"
|
||||
const char *mime() override { return "audio/l16"; }
|
||||
|
||||
/// starts the processing using the actual RAWAudioInfo
|
||||
virtual bool begin() override { is_open = true; return true;}
|
||||
|
||||
/// starts the processing
|
||||
bool begin(Print &out) {
|
||||
p_print = &out;
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override { is_open = false; }
|
||||
|
||||
/// Writes PCM data to be encoded as RAW
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (p_print == nullptr)
|
||||
return 0;
|
||||
|
||||
int16_t *data16 = (int16_t *)data;
|
||||
for (int j = 0; j < len / 2; j++) {
|
||||
data16[j] = htons(data16[j]);
|
||||
}
|
||||
|
||||
return p_print->write((uint8_t *)data, len);
|
||||
}
|
||||
|
||||
operator bool() override { return is_open; }
|
||||
|
||||
bool isOpen() { return is_open; }
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
bool is_open;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
193
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecL8.h
Normal file
193
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecL8.h
Normal file
@@ -0,0 +1,193 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief DecoderL8 - Converts an 8 Bit Stream into 16Bits
|
||||
* Most microcontrollers can not output 8 bit data directly. 8 bit data however
|
||||
* is very memory efficient and helps if you need to store audio on constrained
|
||||
* resources. This decoder translates 8bit data into 16bit data.
|
||||
* By default the encoded data is represented as uint8_t, so the values are from
|
||||
* 0 to 255.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class DecoderL8 : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new DecoderL8 object
|
||||
*/
|
||||
|
||||
DecoderL8(bool isSigned = false) {
|
||||
TRACED();
|
||||
setSigned(isSigned);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new DecoderL8 object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
*/
|
||||
DecoderL8(Print &out_stream, bool active = true) {
|
||||
TRACED();
|
||||
p_print = &out_stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new DecoderL8 object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
* @param bi Object that will be notified about the Audio Formt (Changes)
|
||||
*/
|
||||
|
||||
DecoderL8(Print &out_stream, AudioInfoSupport &bi) {
|
||||
TRACED();
|
||||
setOutput(out_stream);
|
||||
addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
/// By default the encoded values are unsigned, but you can change them to
|
||||
/// signed
|
||||
void setSigned(bool isSigned) { is_signed = isSigned; }
|
||||
|
||||
/// for most decoders this is not needed
|
||||
virtual void setAudioInfo(AudioInfo from) override {
|
||||
TRACED();
|
||||
if (from.bits_per_sample!=16){
|
||||
LOGE("Bits per sample not supported: %d", from.bits_per_sample);
|
||||
}
|
||||
from.bits_per_sample = 16;
|
||||
if (info != from) {
|
||||
notifyAudioChange(from);
|
||||
}
|
||||
info = from;
|
||||
}
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (p_print == nullptr) return 0;
|
||||
buffer.resize(len);
|
||||
memset(buffer.data(), 0, len * 2);
|
||||
if (is_signed) {
|
||||
int8_t *pt8 = (int8_t *)data;
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
buffer[j] = convertSample(pt8[j]);
|
||||
}
|
||||
} else {
|
||||
uint8_t *pt8 = (uint8_t *)data;
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
buffer[j] = convertSample(pt8[j]);
|
||||
}
|
||||
}
|
||||
int write_byte_count = len * sizeof(int16_t);
|
||||
size_t result = p_print->write((uint8_t *)buffer.data(), write_byte_count);
|
||||
LOGD("DecoderL8 %d -> %d -> %d", (int)len, write_byte_count, (int)result);
|
||||
return result / sizeof(int16_t);
|
||||
}
|
||||
|
||||
int16_t convertSample(int16_t in) {
|
||||
int32_t tmp = in;
|
||||
if (!is_signed) {
|
||||
tmp -= 129;
|
||||
}
|
||||
return NumberConverter::clipT<int16_t>(tmp * 258);
|
||||
}
|
||||
|
||||
virtual operator bool() override { return p_print!=nullptr; }
|
||||
|
||||
|
||||
protected:
|
||||
bool is_signed = false;
|
||||
Vector<int16_t> buffer;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief EncoderL8s - Condenses 16 bit PCM data stream to 8 bits
|
||||
* data.
|
||||
* Most microcontrollers can not process 8 bit audio data directly. 8 bit data
|
||||
* however is very memory efficient and helps if you need to store audio on
|
||||
* constrained resources. This encoder translates 16bit data into 8bit data.
|
||||
* By default the encoded data is represented as uint8_t, so the values are from
|
||||
* 0 to 255.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncoderL8 : public AudioEncoder {
|
||||
public:
|
||||
// Empty Constructor - the output stream must be provided with begin()
|
||||
EncoderL8(bool isSigned = false) {
|
||||
TRACED();
|
||||
setSigned(isSigned);
|
||||
}
|
||||
|
||||
// Constructor providing the output stream
|
||||
EncoderL8(Print &out) { p_print = &out; }
|
||||
|
||||
/// By default the encoded values are unsigned, but can change them to signed
|
||||
void setSigned(bool isSigned) { is_signed = isSigned; }
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
/// Provides "audio/pcm"
|
||||
const char *mime() override { return "audio/l8"; }
|
||||
|
||||
/// starts the processing using the actual RAWAudioInfo
|
||||
bool begin() override { is_open = true; return true;}
|
||||
|
||||
/// starts the processing
|
||||
bool begin(Print &out) {
|
||||
p_print = &out;
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override { is_open = false; }
|
||||
|
||||
/// Writes PCM data to be encoded as RAW
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
if (p_print == nullptr) return 0;
|
||||
int16_t *pt16 = (int16_t *)data;
|
||||
size_t samples = len / 2;
|
||||
buffer.resize(samples);
|
||||
memset(buffer.data(), 0, samples);
|
||||
for (size_t j = 0; j < samples; j++) {
|
||||
buffer[j] = convertSample(pt16[j]);
|
||||
}
|
||||
|
||||
size_t result = p_print->write((uint8_t *)buffer.data(), samples);
|
||||
LOGD("EncoderL8 %d -> %d -> %d", (int)len,(int) samples, (int)result);
|
||||
return result * sizeof(int16_t);
|
||||
}
|
||||
|
||||
operator bool() override { return is_open; }
|
||||
|
||||
int16_t convertSample(int16_t sample) {
|
||||
int16_t tmp = NumberConverter::clipT<int8_t>(sample / 258);
|
||||
if (!is_signed) {
|
||||
tmp += 129;
|
||||
// clip to range
|
||||
if (tmp < 0) {
|
||||
tmp = 0;
|
||||
} else if (tmp > 255) {
|
||||
tmp = 255;
|
||||
}
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool isOpen() { return is_open; }
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
bool is_open;
|
||||
bool is_signed = false;
|
||||
Vector<int8_t> buffer;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
318
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecLC3.h
Normal file
318
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecLC3.h
Normal file
@@ -0,0 +1,318 @@
|
||||
/**
|
||||
* @file CodecLC3.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief Codec for lc3 using https://github.com/pschatzmann/arduino-liblc3
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*
|
||||
* @copyright Copyright (c) 2022
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "lc3.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
// 20 to 400
|
||||
#define DEFAULT_BYTE_COUNT 40
|
||||
// 7500 or 10000
|
||||
#define LC3_DEFAULT_DT_US 7500
|
||||
|
||||
/**
|
||||
* @brief Decoder for LC3. Depends on
|
||||
* https://github.com/pschatzmann/arduino-liblc3
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class LC3Decoder : public AudioDecoder {
|
||||
public:
|
||||
LC3Decoder(AudioInfo info, int dt_us = LC3_DEFAULT_DT_US,
|
||||
uint16_t inputByteCount = DEFAULT_BYTE_COUNT) {
|
||||
this->dt_us = dt_us;
|
||||
this->info = info;
|
||||
this->input_byte_count = inputByteCount;
|
||||
}
|
||||
|
||||
LC3Decoder(int dt_us = LC3_DEFAULT_DT_US,
|
||||
uint16_t inputByteCount = DEFAULT_BYTE_COUNT) {
|
||||
this->dt_us = dt_us;
|
||||
this->input_byte_count = inputByteCount;
|
||||
info.sample_rate = 32000;
|
||||
info.bits_per_sample = 16;
|
||||
info.channels = 1;
|
||||
}
|
||||
|
||||
virtual bool begin() {
|
||||
TRACEI();
|
||||
|
||||
// Return the number of PCM samples in a frame
|
||||
num_frames = lc3_frame_samples(dt_us, info.sample_rate);
|
||||
dec_size = lc3_decoder_size(dt_us, info.sample_rate);
|
||||
|
||||
LOGI("channels: %d", info.channels);
|
||||
LOGI("sample_rate: %d", info.sample_rate);
|
||||
LOGI("input_byte_count: %d", input_byte_count);
|
||||
LOGI("dt_us: %d", dt_us);
|
||||
LOGI("num_frames: %d", num_frames);
|
||||
LOGI("dec_size: %d", dec_size);
|
||||
|
||||
if (!checkValues()) {
|
||||
LOGE("Invalid Parameters");
|
||||
return false;
|
||||
}
|
||||
|
||||
// setup memory
|
||||
input_buffer.resize(input_byte_count);
|
||||
output_buffer.resize(num_frames * 2);
|
||||
lc3_decoder_memory.resize(dec_size);
|
||||
|
||||
// setup decoder
|
||||
lc3_decoder = lc3_setup_decoder(dt_us, info.sample_rate, 0,
|
||||
(void *)lc3_decoder_memory.data());
|
||||
notifyAudioChange(info);
|
||||
|
||||
input_pos = 0;
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
active = false;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return active; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
if (!active) return 0;
|
||||
LOGD("write %u", len);
|
||||
|
||||
uint8_t *p_ptr8 = (uint8_t *)data;
|
||||
|
||||
for (int j = 0; j < len; j++) {
|
||||
input_buffer[input_pos++] = p_ptr8[j];
|
||||
if (input_pos >= input_buffer.size()) {
|
||||
if (lc3_decode(lc3_decoder, input_buffer.data(), input_buffer.size(),
|
||||
pcm_format, (int16_t *)output_buffer.data(), 1) != 0) {
|
||||
LOGE("lc3_decode");
|
||||
}
|
||||
|
||||
// write all data to final output
|
||||
int requested = output_buffer.size();
|
||||
int written =
|
||||
p_print->write((const uint8_t *)output_buffer.data(), requested);
|
||||
if (written != requested) {
|
||||
LOGE("Decoder Bytes requested: %d - written: %d", requested, written);
|
||||
}
|
||||
input_pos = 0;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
lc3_decoder_t lc3_decoder = nullptr;
|
||||
lc3_pcm_format pcm_format;
|
||||
Vector<uint8_t> lc3_decoder_memory;
|
||||
Vector<uint16_t> output_buffer;
|
||||
Vector<uint8_t> input_buffer;
|
||||
size_t input_pos = 0;
|
||||
int dt_us;
|
||||
uint16_t input_byte_count = 20; // up to 400
|
||||
uint16_t num_frames;
|
||||
unsigned dec_size;
|
||||
bool active = false;
|
||||
|
||||
bool checkValues() {
|
||||
if (p_print == nullptr) {
|
||||
LOGE("Output is not defined");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!LC3_CHECK_DT_US(dt_us)) {
|
||||
LOGE("dt_us: %d", dt_us);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!LC3_CHECK_SR_HZ(info.sample_rate)) {
|
||||
LOGE("sample_rate: %d", info.sample_rate);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info.channels!=1){
|
||||
LOGE("channels: %d", info.channels);
|
||||
}
|
||||
|
||||
if (num_frames == -1) {
|
||||
LOGE("num_frames could not be determined - using m");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dec_size == 0) {
|
||||
LOGE("dec_size");
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (info.bits_per_sample) {
|
||||
case 16:
|
||||
pcm_format = LC3_PCM_FORMAT_S16;
|
||||
break;
|
||||
case 24:
|
||||
pcm_format = LC3_PCM_FORMAT_S24;
|
||||
break;
|
||||
default:
|
||||
LOGE("Bits per sample not supported: %d", info.bits_per_sample);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for LC3 - Depends on
|
||||
* https://github.com/pschatzmann/arduino-liblc3
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class LC3Encoder : public AudioEncoder {
|
||||
public:
|
||||
LC3Encoder(int dt_us = LC3_DEFAULT_DT_US,
|
||||
uint16_t outputByteCount = DEFAULT_BYTE_COUNT) {
|
||||
this->dt_us = dt_us;
|
||||
info.sample_rate = 32000;
|
||||
info.bits_per_sample = 16;
|
||||
info.channels = 1;
|
||||
output_byte_count = outputByteCount;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
|
||||
unsigned enc_size = lc3_encoder_size(dt_us, info.sample_rate);
|
||||
num_frames = lc3_frame_samples(dt_us, info.sample_rate);
|
||||
|
||||
LOGI("sample_rate: %d", info.sample_rate);
|
||||
LOGI("channels: %d", info.channels);
|
||||
LOGI("dt_us: %d", dt_us);
|
||||
LOGI("output_byte_count: %d", output_byte_count);
|
||||
LOGI("enc_size: %d", enc_size);
|
||||
LOGI("num_frames: %d", num_frames);
|
||||
|
||||
if (!checkValues()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// setup memory
|
||||
lc3_encoder_memory.resize(enc_size);
|
||||
input_buffer.resize(num_frames * 2);
|
||||
output_buffer.resize(output_byte_count);
|
||||
|
||||
// setup encoder
|
||||
lc3_encoder = lc3_setup_encoder(dt_us, info.sample_rate, 0,
|
||||
lc3_encoder_memory.data());
|
||||
|
||||
input_pos = 0;
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
active = false;
|
||||
}
|
||||
|
||||
virtual const char *mime() { return "audio/lc3"; }
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return lc3_encoder != nullptr; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
if (!active) return 0;
|
||||
LOGD("write %u", len);
|
||||
uint8_t *p_ptr8 = (uint8_t *)data;
|
||||
|
||||
for (int j = 0; j < len; j++) {
|
||||
input_buffer[input_pos++] = p_ptr8[j];
|
||||
if (input_pos >= num_frames * 2) {
|
||||
if (lc3_encode(lc3_encoder, pcm_format,
|
||||
(const int16_t *)input_buffer.data(), 1,
|
||||
output_buffer.size(), output_buffer.data()) != 0) {
|
||||
LOGE("lc3_encode");
|
||||
}
|
||||
|
||||
// write all data to final output
|
||||
int requested = output_buffer.size();
|
||||
int written = p_print->write(output_buffer.data(), requested);
|
||||
if (written != requested) {
|
||||
LOGE("Encoder Bytes requested: %d - written: %d", requested, written);
|
||||
}
|
||||
input_pos = 0;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
unsigned dt_us = 1000;
|
||||
uint16_t num_frames;
|
||||
lc3_encoder_t lc3_encoder = nullptr;
|
||||
lc3_pcm_format pcm_format;
|
||||
uint16_t output_byte_count = 20;
|
||||
Vector<uint8_t> lc3_encoder_memory;
|
||||
Vector<uint8_t> output_buffer;
|
||||
Vector<uint8_t> input_buffer;
|
||||
int input_pos = 0;
|
||||
bool active = false;
|
||||
|
||||
bool checkValues() {
|
||||
if (p_print == nullptr) {
|
||||
LOGE("Output is not defined");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!LC3_CHECK_DT_US(dt_us)) {
|
||||
LOGE("dt_us: %d", dt_us);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!LC3_CHECK_SR_HZ(info.sample_rate)) {
|
||||
LOGE("sample_rate: %d", info.sample_rate);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info.channels!=1){
|
||||
LOGE("channels: %d", info.channels);
|
||||
}
|
||||
|
||||
if (num_frames == -1) {
|
||||
LOGE("Invalid num_frames");
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (info.bits_per_sample) {
|
||||
case 16:
|
||||
pcm_format = LC3_PCM_FORMAT_S16;
|
||||
break;
|
||||
case 24:
|
||||
pcm_format = LC3_PCM_FORMAT_S24;
|
||||
break;
|
||||
default:
|
||||
LOGE("Bits per sample not supported: %d", info.bits_per_sample);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
164
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3Helix.h
Normal file
164
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3Helix.h
Normal file
@@ -0,0 +1,164 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#ifndef HELIX_PRINT
|
||||
#define HELIX_PRINT
|
||||
#endif
|
||||
#include "MP3DecoderHelix.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief MP3 Decoder using libhelix:
|
||||
* https://github.com/pschatzmann/arduino-libhelix This is basically just a
|
||||
* simple wrapper to provide AudioInfo and AudioInfoSupport
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MP3DecoderHelix : public AudioDecoder {
|
||||
public:
|
||||
MP3DecoderHelix() {
|
||||
TRACED();
|
||||
mp3 = new libhelix::MP3DecoderHelix();
|
||||
if (mp3 != nullptr) {
|
||||
mp3->setReference(this);
|
||||
} else {
|
||||
LOGE("Not enough memory for libhelix");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief Construct a new MP3DecoderMini object
|
||||
*
|
||||
* @param out_stream
|
||||
*/
|
||||
MP3DecoderHelix(Print &out_stream) {
|
||||
TRACED();
|
||||
mp3 = new libhelix::MP3DecoderHelix();
|
||||
if (mp3 != nullptr) {
|
||||
mp3->setReference(this);
|
||||
} else {
|
||||
LOGE("Not enough memory for libhelix");
|
||||
}
|
||||
setOutput(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new MP3DecoderMini object. The decoded output will go to
|
||||
* the print object.
|
||||
*
|
||||
* @param out_stream
|
||||
* @param bi
|
||||
*/
|
||||
MP3DecoderHelix(Print &out_stream, AudioInfoSupport &bi) {
|
||||
TRACED();
|
||||
mp3 = new libhelix::MP3DecoderHelix();
|
||||
if (mp3 != nullptr) {
|
||||
mp3->setReference(this);
|
||||
} else {
|
||||
LOGE("Not enough memory for libhelix");
|
||||
}
|
||||
setOutput(out_stream);
|
||||
addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Destroy the MP3DecoderMini object
|
||||
*
|
||||
*/
|
||||
~MP3DecoderHelix() {
|
||||
if (mp3 != nullptr) delete mp3;
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &outStream) override {
|
||||
AudioDecoder::setOutput(outStream);
|
||||
if (mp3 != nullptr) mp3->setOutput(outStream);
|
||||
}
|
||||
|
||||
/// Starts the processing
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
if (mp3 == nullptr) {
|
||||
LOGE("Not enough memory for libhelix");
|
||||
return false;
|
||||
}
|
||||
mp3->begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Releases the reserved memory
|
||||
void end() override {
|
||||
TRACED();
|
||||
if (mp3 != nullptr) mp3->end();
|
||||
}
|
||||
|
||||
MP3FrameInfo audioInfoEx() { return mp3->audioInfo(); }
|
||||
|
||||
AudioInfo audioInfo() override {
|
||||
AudioInfo baseInfo;
|
||||
MP3FrameInfo i = audioInfoEx();
|
||||
if (i.nChans != 0 && i.samprate != 0 && i.bitsPerSample != 0) {
|
||||
baseInfo.channels = i.nChans;
|
||||
baseInfo.sample_rate = i.samprate;
|
||||
baseInfo.bits_per_sample = i.bitsPerSample;
|
||||
}
|
||||
return baseInfo;
|
||||
}
|
||||
|
||||
/// Write mp3 data to decoder
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("%s: %zu", LOG_METHOD, len);
|
||||
if (mp3 == nullptr) return 0;
|
||||
return mp3->write((uint8_t *)data, len);
|
||||
}
|
||||
|
||||
/// checks if the class is active
|
||||
operator bool() override { return mp3 != nullptr && (bool)*mp3; }
|
||||
|
||||
libhelix::MP3DecoderHelix *driver() { return mp3; }
|
||||
|
||||
/// Defines the callback object to which the Audio information change is
|
||||
/// provided
|
||||
void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
TRACED();
|
||||
AudioDecoder::addNotifyAudioChange(bi);
|
||||
if (mp3 != nullptr) mp3->setInfoCallback(infoCallback, this);
|
||||
}
|
||||
|
||||
/// notifies the subscriber about a change
|
||||
static void infoCallback(MP3FrameInfo &i, void *ref) {
|
||||
MP3DecoderHelix *p_helix = (MP3DecoderHelix *)ref;
|
||||
if (p_helix != nullptr) {
|
||||
TRACED();
|
||||
AudioInfo baseInfo;
|
||||
baseInfo.channels = i.nChans;
|
||||
baseInfo.sample_rate = i.samprate;
|
||||
baseInfo.bits_per_sample = i.bitsPerSample;
|
||||
baseInfo.logInfo("MP3DecoderHelix");
|
||||
p_helix->notifyAudioChange(baseInfo);
|
||||
} else {
|
||||
LOGE("Wrong Libhelix Version");
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides the maximum frame size - this is allocated on the heap and you
|
||||
/// can reduce the heap size my minimizing this value
|
||||
size_t maxFrameSize() { return mp3->maxFrameSize(); }
|
||||
|
||||
/// Define your optimized maximum frame size
|
||||
void setMaxFrameSize(size_t len) { mp3->setMaxFrameSize(len); }
|
||||
|
||||
/// Provides the maximum pwm buffer size - this is allocated on the heap and
|
||||
/// you can reduce the heap size my minimizing this value
|
||||
size_t maxPCMSize() { return mp3->maxPCMSize(); }
|
||||
|
||||
/// Define your optimized maximum pwm buffer size
|
||||
void setMaxPCMSize(size_t len) { mp3->setMaxPCMSize(len); }
|
||||
|
||||
protected:
|
||||
libhelix::MP3DecoderHelix *mp3 = nullptr;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
150
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3LAME.h
Normal file
150
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3LAME.h
Normal file
@@ -0,0 +1,150 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "MP3EncoderLAME.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief LAME parameters
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
struct AudioInfoLAME : public liblame::AudioInfo {
|
||||
AudioInfoLAME () {
|
||||
sample_rate = 44100;
|
||||
channels = 2;
|
||||
bits_per_sample = 16;
|
||||
};
|
||||
AudioInfoLAME (const AudioInfoLAME &) = default;
|
||||
|
||||
int quality = 7; // 0..9. 0=best (very slow). 9=worst.
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encodes PCM data to the MP3 format and writes the result to a stream
|
||||
* This is basically just a wrapper using https://github.com/pschatzmann/arduino-liblame
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MP3EncoderLAME : public AudioEncoder {
|
||||
|
||||
public:
|
||||
MP3EncoderLAME(){
|
||||
TRACED();
|
||||
}
|
||||
|
||||
MP3EncoderLAME(Print &out_stream){
|
||||
TRACED();
|
||||
p_print = &out_stream;
|
||||
}
|
||||
|
||||
~MP3EncoderLAME(){
|
||||
TRACED();
|
||||
end();
|
||||
}
|
||||
|
||||
/// Defines the output stream
|
||||
void setOutput(Print &out_stream){
|
||||
TRACED();
|
||||
p_print = &out_stream;
|
||||
if (enc!=nullptr){
|
||||
enc->setOutput(out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the Audio Info
|
||||
void setAudioInfo(AudioInfo from) {
|
||||
TRACED();
|
||||
AudioEncoder::setAudioInfo(from);
|
||||
lame_info.channels = from.channels;
|
||||
lame_info.sample_rate = from.sample_rate;
|
||||
lame_info.bits_per_sample = from.bits_per_sample;
|
||||
}
|
||||
|
||||
/// Defines the Audio Info
|
||||
void setAudioInfo(AudioInfoLAME from) {
|
||||
TRACED();
|
||||
lame_info = from;
|
||||
}
|
||||
|
||||
bool begin(AudioInfoLAME from) {
|
||||
setAudioInfo(from);
|
||||
return begin();
|
||||
}
|
||||
|
||||
// starts the processing
|
||||
bool begin() {
|
||||
createEnc();
|
||||
if (enc==nullptr) return false;
|
||||
enc->begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
AudioInfoLAME &audioInfoExt(){
|
||||
return lame_info;
|
||||
}
|
||||
|
||||
AudioInfoLAME defaultConfig(){
|
||||
AudioInfoLAME def;
|
||||
return def;
|
||||
}
|
||||
|
||||
// convert PCM data to convert into MP3
|
||||
size_t write(const uint8_t *data, size_t len){
|
||||
if (enc==nullptr) return 0;
|
||||
LOGD("write %d bytes", (int) len);
|
||||
return enc->write((uint8_t*)data, len);
|
||||
}
|
||||
|
||||
// release resources
|
||||
void end(){
|
||||
TRACED();
|
||||
if (enc!=nullptr){
|
||||
enc->end();
|
||||
delete enc;
|
||||
enc = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
liblame::MP3EncoderLAME *driver() {
|
||||
return enc;
|
||||
}
|
||||
|
||||
const char *mime() {
|
||||
return "audio/mp3";
|
||||
}
|
||||
|
||||
virtual operator bool() {
|
||||
return enc!=nullptr && (bool)(*enc);
|
||||
}
|
||||
|
||||
protected:
|
||||
liblame::MP3EncoderLAME *enc=nullptr;
|
||||
AudioInfoLAME lame_info;
|
||||
Print *p_print=nullptr;
|
||||
|
||||
// Create enc only at begin so that we can use psram
|
||||
void createEnc(){
|
||||
TRACED();
|
||||
if (enc==nullptr){
|
||||
enc = new liblame::MP3EncoderLAME();
|
||||
if (p_print!=nullptr){
|
||||
setOutput(*p_print);
|
||||
} else {
|
||||
LOGE("Output undefined");
|
||||
}
|
||||
LOGI("LibLAME channels: %d", lame_info.channels);
|
||||
LOGI("LibLAME sample_rate: %d", lame_info.sample_rate);
|
||||
LOGI("LibLAME bits_per_sample: %d", lame_info.bits_per_sample);
|
||||
LOGI("LibLAME quality: %d", lame_info.quality);
|
||||
enc->setAudioInfo(lame_info);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
140
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3MAD.h
Executable file
140
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3MAD.h
Executable file
@@ -0,0 +1,140 @@
|
||||
#pragma once
|
||||
|
||||
#define MINIMP3_IMPLEMENTATION
|
||||
#define MINIMP3_NO_STDIO
|
||||
#define LOGGING_ACTIVE true
|
||||
|
||||
#include "Stream.h"
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "MP3DecoderMAD.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
// forward audio changes
|
||||
static AudioInfoSupport *audioChangeMAD=nullptr;
|
||||
|
||||
/**
|
||||
* @brief MP3 Decoder using https://github.com/pschatzmann/arduino-libmad
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MP3DecoderMAD : public AudioDecoder {
|
||||
public:
|
||||
|
||||
MP3DecoderMAD(){
|
||||
TRACED();
|
||||
mad = new libmad::MP3DecoderMAD();
|
||||
}
|
||||
|
||||
MP3DecoderMAD(libmad::MP3DataCallback dataCallback, libmad::MP3InfoCallback infoCB=nullptr){
|
||||
TRACED();
|
||||
mad = new libmad::MP3DecoderMAD(dataCallback, infoCB);
|
||||
}
|
||||
|
||||
MP3DecoderMAD(Print &mad_output_streamput, libmad::MP3InfoCallback infoCB = nullptr){
|
||||
TRACED();
|
||||
mad = new libmad::MP3DecoderMAD(mad_output_streamput, infoCB);
|
||||
}
|
||||
|
||||
~MP3DecoderMAD(){
|
||||
TRACED();
|
||||
delete mad;
|
||||
}
|
||||
|
||||
void setOutput(Print &out) override {
|
||||
TRACED();
|
||||
mad->setOutput(out);
|
||||
}
|
||||
|
||||
/// Defines the callback which receives the decoded data
|
||||
void setAudioDataCallback(libmad::MP3DataCallback cb){
|
||||
TRACED();
|
||||
mad->setDataCallback(cb);
|
||||
}
|
||||
|
||||
/// Defines the callback which receives the Info changes
|
||||
void setInfoCallback(libmad::MP3InfoCallback cb){
|
||||
TRACED();
|
||||
mad->setInfoCallback(cb);
|
||||
}
|
||||
|
||||
/// Starts the processing
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
mad->begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Releases the reserved memory
|
||||
void end() override{
|
||||
TRACED();
|
||||
mad->end();
|
||||
}
|
||||
|
||||
/// Provides the last valid audio information
|
||||
libmad::MadAudioInfo audioInfoEx(){
|
||||
TRACED();
|
||||
return mad->audioInfo();
|
||||
}
|
||||
|
||||
AudioInfo audioInfo() override {
|
||||
TRACED();
|
||||
libmad::MadAudioInfo info = audioInfoEx();
|
||||
AudioInfo base;
|
||||
base.channels = info.channels;
|
||||
base.sample_rate = info.sample_rate;
|
||||
base.bits_per_sample = info.bits_per_sample;
|
||||
return base;
|
||||
}
|
||||
|
||||
/// Makes the mp3 data available for decoding: however we recommend to provide the data via a callback or input stream
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
TRACED();
|
||||
return mad->write(data,len);
|
||||
}
|
||||
|
||||
/// Makes the mp3 data available for decoding: however we recommend to provide the data via a callback or input stream
|
||||
size_t write(void *data, size_t len){
|
||||
TRACED();
|
||||
return mad->write(data,len);
|
||||
}
|
||||
|
||||
/// Returns true as long as we are processing data
|
||||
operator bool() override{
|
||||
return (bool)*mad;
|
||||
}
|
||||
|
||||
libmad::MP3DecoderMAD *driver() {
|
||||
return mad;
|
||||
}
|
||||
|
||||
static void audioChangeCallback(libmad::MadAudioInfo &info){
|
||||
if (audioChangeMAD!=nullptr){
|
||||
TRACED();
|
||||
AudioInfo base;
|
||||
base.channels = info.channels;
|
||||
base.sample_rate = info.sample_rate;
|
||||
base.bits_per_sample = info.bits_per_sample;
|
||||
// notify audio change
|
||||
audioChangeMAD->setAudioInfo(base);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
TRACED();
|
||||
audioChangeMAD = &bi;
|
||||
// register audio change handler
|
||||
mad->setInfoCallback(audioChangeCallback);
|
||||
}
|
||||
|
||||
protected:
|
||||
libmad::MP3DecoderMAD *mad;
|
||||
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
177
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3Mini.h
Normal file
177
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMP3Mini.h
Normal file
@@ -0,0 +1,177 @@
|
||||
#pragma once
|
||||
|
||||
#define MINIMP3_NO_STDIO
|
||||
//#define MINIMP3_NO_SIMD
|
||||
//#define MINIMP3_IMPLEMENTATION
|
||||
//#define MINIMP3_ONLY_MP3
|
||||
//#define MINIMP3_FLOAT_OUTPUT
|
||||
|
||||
#ifndef MINIMP3_MAX_SAMPLE_RATE
|
||||
#define MINIMP3_MAX_SAMPLE_RATE 44100
|
||||
#endif
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "minimp3.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief MP3 Decoder using https://github.com/pschatzmann/minimp3.
|
||||
* This decoder does not provide any good results and it is not suited to decode any audio above 32000 on an ESP32. So the
|
||||
* sample rate is limited by the MINIMP3_MAX_SAMPLE_RATE variable.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MP3DecoderMini : public AudioDecoder {
|
||||
public:
|
||||
MP3DecoderMini() = default;
|
||||
|
||||
/// Destroy the MP3DecoderMini object
|
||||
~MP3DecoderMini() {
|
||||
if (active) {
|
||||
end();
|
||||
}
|
||||
}
|
||||
|
||||
void setBufferLength(int len) { buffer_size = len; }
|
||||
|
||||
/// Starts the processing
|
||||
bool begin() {
|
||||
TRACED();
|
||||
//esp_task_wdt_delete(nullptr);
|
||||
::mp3dec_init(&mp3d);
|
||||
buffer.resize(buffer_size);
|
||||
pcm.resize(MINIMP3_MAX_SAMPLES_PER_FRAME);
|
||||
buffer_pos = 0;
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Releases the reserved memory
|
||||
void end() {
|
||||
TRACED();
|
||||
flush();
|
||||
active = false;
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &outStream) { this->out = &outStream; }
|
||||
|
||||
/// Write mp3 data to decoder
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %zu", len);
|
||||
if (active) {
|
||||
if (buffer_pos+len>=buffer.size()){
|
||||
decode(len);
|
||||
}
|
||||
assert(buffer_pos+len<buffer.size());
|
||||
memcpy(buffer.data()+buffer_pos, data, len);
|
||||
buffer_pos += len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Decodes the last outstanding data
|
||||
void flush() {
|
||||
// decode the full buffer
|
||||
decode(0);
|
||||
buffer_pos = 0;
|
||||
}
|
||||
|
||||
/// checks if the class is active
|
||||
virtual operator bool() { return active; }
|
||||
|
||||
void setSampleRateLimit(int limit){
|
||||
sample_rate_limit = limit;
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *out = nullptr;
|
||||
mp3dec_t mp3d;
|
||||
mp3dec_frame_info_t mp3dec_info;
|
||||
size_t buffer_size = 5 * 1024;
|
||||
size_t buffer_pos = 0;
|
||||
Vector<uint8_t> buffer;
|
||||
Vector<mp3d_sample_t> pcm;
|
||||
#ifdef MINIMP3_FLOAT_OUTPUT
|
||||
Vector<int16_t> pcm16;
|
||||
#endif
|
||||
bool active;
|
||||
int sample_rate_limit = MINIMP3_MAX_SAMPLE_RATE; //32000;
|
||||
|
||||
/// Process single bytes so that we can decode a full frame when it is available
|
||||
void decode(int write_len) {
|
||||
LOGD("decode: %zd ", buffer_pos);
|
||||
int open = buffer_pos;
|
||||
int processed = 0;
|
||||
int samples;
|
||||
do {
|
||||
// decode data
|
||||
samples = ::mp3dec_decode_frame(&mp3d, buffer.data()+processed, open,
|
||||
pcm.data(), &mp3dec_info);
|
||||
LOGD("frame_offset: %d - frame_bytes: %d -> samples %d", mp3dec_info.frame_offset, mp3dec_info.frame_bytes, samples);
|
||||
open -= mp3dec_info.frame_bytes;
|
||||
processed += mp3dec_info.frame_bytes;
|
||||
// output decoding result
|
||||
if (samples > 0) {
|
||||
provideResult(samples);
|
||||
}
|
||||
// process until we have space for the next write
|
||||
} while(processed < write_len);
|
||||
|
||||
// save unprocessed data
|
||||
buffer_pos = open;
|
||||
memmove(buffer.data(),buffer.data()+processed, open);
|
||||
}
|
||||
|
||||
/// Provides Metadata and PCM data
|
||||
void provideResult(int samples) {
|
||||
LOGD("provideResult: %d samples", samples);
|
||||
AudioInfo tmp;
|
||||
tmp.sample_rate = mp3dec_info.hz>sample_rate_limit ? sample_rate_limit : mp3dec_info.hz;
|
||||
tmp.channels = mp3dec_info.channels;
|
||||
tmp.bits_per_sample = 16;
|
||||
|
||||
// notify about audio changes
|
||||
if (tmp != info) {
|
||||
tmp.logInfo();
|
||||
notifyAudioChange(tmp);
|
||||
}
|
||||
// store last info so that we can detect any changes
|
||||
info = info;
|
||||
|
||||
// provide result pwm data
|
||||
if (out != nullptr) {
|
||||
#ifdef MINIMP3_FLOAT_OUTPUT
|
||||
pcm16.resize(samples);
|
||||
f32_to_s16(pcm.data(), pcm16.data(), samples);
|
||||
out->write((uint8_t *)pcm16.data(), samples * sizeof(int16_t));
|
||||
#else
|
||||
out->write((uint8_t *)pcm.data(), samples * sizeof(mp3d_sample_t));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void f32_to_s16(float *in, int16_t *out, int num_samples) {
|
||||
int i = 0;
|
||||
for(; i < num_samples; i++){
|
||||
float sample = in[i] * 32768.0f;
|
||||
if (sample >= 32766.5f)
|
||||
out[i] = (int16_t) 32767;
|
||||
else if (sample <= -32767.5f)
|
||||
out[i] = (int16_t)-32768;
|
||||
else {
|
||||
int16_t s = (int16_t)(sample + .5f);
|
||||
s -= (s < 0); /* away from zero, to be compliant */
|
||||
out[i] = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
492
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMTS.h
Normal file
492
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecMTS.h
Normal file
@@ -0,0 +1,492 @@
|
||||
#pragma once
|
||||
|
||||
#define TS_PACKET_SIZE 188
|
||||
|
||||
#ifndef MTS_WRITE_BUFFER_SIZE
|
||||
#define MTS_WRITE_BUFFER_SIZE 2000
|
||||
#endif
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/AudioTypes.h"
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "stdlib.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief PMT Program Element Stream Types
|
||||
* @ingroup basic
|
||||
*/
|
||||
enum class MTSStreamType {
|
||||
VIDEO = 0x01,
|
||||
VIDEO_H262 = 0x02,
|
||||
AUDIO_MP3 = 0x03,
|
||||
AUDIO_MP3_LOW_BITRATE = 0x04,
|
||||
PRV_SECTIONS = 0x05,
|
||||
PES_PRV = 0x06,
|
||||
MHEG = 0x07,
|
||||
H222_0_DSM_CC = 0x08,
|
||||
H222_1 = 0x09,
|
||||
A = 0x0A,
|
||||
B = 0x0B,
|
||||
C = 0x0C,
|
||||
D = 0x0D,
|
||||
H222_0_AUX = 0x0E,
|
||||
AUDIO_AAC = 0x0F,
|
||||
VISUAL = 0x10,
|
||||
AUDIO_AAC_LATM = 0x11,
|
||||
SL_PES = 0x12,
|
||||
SL_SECTIONS = 0x13,
|
||||
SYNC_DOWNLOAD = 0x14,
|
||||
PES_METADATA = 0x15,
|
||||
METDATA_SECTIONS = 0x16,
|
||||
METADATA_DATA_CAROUSEL = 0x17,
|
||||
METADATA_OBJ_CAROUSEL = 0x18,
|
||||
METADATA_SYNC_DOWNLOAD = 0x19,
|
||||
IPMP = 0x1A,
|
||||
VIDEO_AVC = 0X1B,
|
||||
VIDEO_H222_0 = 0x1C,
|
||||
DCII_VIDEO = 0x80,
|
||||
AUDIO_A53 = 0x81,
|
||||
SCTE_STD_SUBTITLE = 0x82,
|
||||
SCTE_ISOCH_DATA = 0x83,
|
||||
ATSC_PROG_ID = 0x85,
|
||||
SCTE_25 = 0x86,
|
||||
AUDIO_EAC3 = 0x87,
|
||||
AUDIO_DTS_HD = 0x88,
|
||||
DVB_MPE_FEC = 0x90,
|
||||
ULE = 0x91,
|
||||
VEI = 0x92,
|
||||
ATSC_DATA_SERVICE_TABLE = 0x95,
|
||||
SCTE_IP_DATA = 0xA0,
|
||||
DCII_TEXT = 0xC0,
|
||||
ATSC_SYNC_DATA = 0xC2,
|
||||
SCTE_AYSNC_DATA = 0xC3,
|
||||
ATSC_USER_PRIV_PROG_ELEMENTS = 0xC4,
|
||||
VC1 = 0xEA,
|
||||
ATSC_USER_PRIV = 0xEB,
|
||||
};
|
||||
|
||||
// enum class AACProfile : uint8_t {
|
||||
// MAIN = 0, // AAC Main (High complexity, rarely used)
|
||||
// LC = 1, // AAC Low Complexity (Most common)
|
||||
// SSR = 2, // AAC Scalable Sample Rate (Rare)
|
||||
// LTP = 3 // AAC Long Term Prediction (Not widely supported)
|
||||
// };
|
||||
|
||||
/**
|
||||
* @brief MPEG-TS (MTS) decoder. Extracts (demuxes) the indicated audio/video
|
||||
* data from a MPEG-TS (MTS) data stream. You can define the relevant stream
|
||||
* types via the API: addStreamType(MTSStreamType). By default, the
|
||||
* decoder selects the AUDIO_AAC, AUDIO_AAC_LATM stream types.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
**/
|
||||
|
||||
class MTSDecoder : public AudioDecoder {
|
||||
public:
|
||||
/// Default constructor
|
||||
MTSDecoder() = default;
|
||||
/// Provide the AAC decoder (or MP3 Decoder) to receive the extracted content
|
||||
MTSDecoder(AudioDecoder &dec) { p_dec = &dec; };
|
||||
/// Start the prcessor
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
pmt_pid = 0xFFFF; // undefined
|
||||
pes_count = 0;
|
||||
is_adts_missing = false;
|
||||
open_pes_data_size = 0;
|
||||
frame_length = 0;
|
||||
|
||||
// default supported stream types
|
||||
if (stream_types.empty()) {
|
||||
addStreamType(MTSStreamType::AUDIO_AAC);
|
||||
addStreamType(MTSStreamType::AUDIO_AAC_LATM);
|
||||
}
|
||||
|
||||
// automatically close when called multiple times
|
||||
if (is_active) {
|
||||
end();
|
||||
}
|
||||
|
||||
if (p_dec) p_dec->begin();
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Stops the processing
|
||||
void end() override {
|
||||
TRACED();
|
||||
if (p_dec) p_dec->end();
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual operator bool() override { return is_active; }
|
||||
|
||||
/// Provides the mime type: "video/MP2T";
|
||||
const char *mime() { return "video/MP2T"; }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
// only process when open
|
||||
if (!is_active) {
|
||||
TRACEE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// wait until we have enough data
|
||||
if (buffer.availableForWrite() < len) {
|
||||
LOGI("MTSDecoder::write: Buffer full");
|
||||
demux();
|
||||
return 0;
|
||||
}
|
||||
LOGI("MTSDecoder::write: %d", (int)len);
|
||||
size_t result = buffer.writeArray((uint8_t *)data, len);
|
||||
demux();
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Set a new write buffer size (default is 2000)
|
||||
void resizeBuffer(int size) { buffer.resize(size); }
|
||||
|
||||
/// Clears the stream type filter
|
||||
void clearStreamTypes() {
|
||||
TRACED();
|
||||
stream_types.clear();
|
||||
}
|
||||
|
||||
/// Defines the stream type that should be extracted
|
||||
void addStreamType(MTSStreamType type) {
|
||||
TRACED();
|
||||
stream_types.push_back(type);
|
||||
}
|
||||
|
||||
/// Checks if the stream type is active
|
||||
bool isStreamTypeActive(MTSStreamType type) {
|
||||
for (int j = 0; j < stream_types.size(); j++) {
|
||||
if (stream_types[j] == type) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
void setOutput(AudioStream &out_stream) override {
|
||||
if (p_dec) {
|
||||
p_dec->setOutput(out_stream);
|
||||
} else {
|
||||
AudioDecoder::setOutput(out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
void setOutput(AudioOutput &out_stream) override {
|
||||
if (p_dec) {
|
||||
p_dec->setOutput(out_stream);
|
||||
} else {
|
||||
AudioDecoder::setOutput(out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines where the decoded result is written to
|
||||
void setOutput(Print &out_stream) override {
|
||||
if (p_dec) {
|
||||
p_dec->setOutput(out_stream);
|
||||
} else {
|
||||
AudioDecoder::setOutput(out_stream);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
bool is_active = false;
|
||||
SingleBuffer<uint8_t> buffer{MTS_WRITE_BUFFER_SIZE};
|
||||
Vector<MTSStreamType> stream_types;
|
||||
Vector<int> pids{0};
|
||||
AudioDecoder *p_dec = nullptr;
|
||||
uint16_t pmt_pid = 0xFFFF;
|
||||
// AACProfile aac_profile = AACProfile::LC;
|
||||
MTSStreamType selected_stream_type;
|
||||
int open_pes_data_size = 0;
|
||||
int frame_length = 0;
|
||||
bool is_adts_missing = false;
|
||||
size_t pes_count = 0;
|
||||
|
||||
/// Add the PID for which we want to extract the audio data from the PES
|
||||
/// packets
|
||||
void addPID(uint16_t pid) {
|
||||
if (pid == 0) return;
|
||||
for (int j = 0; j < pids.size(); j++) {
|
||||
if (pids[j] == pid) return;
|
||||
}
|
||||
LOGI("-> PMT PID: 0x%04X(%d)", pid, pid);
|
||||
pids.push_back(pid);
|
||||
}
|
||||
|
||||
/// demux the available data
|
||||
void demux() {
|
||||
TRACED();
|
||||
int count = 0;
|
||||
while (parse()) {
|
||||
LOGI("demux: step #%d with PES #%d", ++count, (int)pes_count);
|
||||
}
|
||||
LOGI("Number of demux calls: %d", count);
|
||||
}
|
||||
|
||||
/// Find the position of the next sync byte: Usually on position 0
|
||||
int syncPos() {
|
||||
int len = buffer.available();
|
||||
if (len < TS_PACKET_SIZE) return -1;
|
||||
for (int j = 0; j < len; j++) {
|
||||
if (buffer.data()[j] == 0x47) {
|
||||
return j;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/// Parse a single packet and remove the processed data
|
||||
bool parse() {
|
||||
int pos = syncPos();
|
||||
if (pos < 0) return false;
|
||||
if (pos != 0) {
|
||||
LOGW("Sync byte not found at position 0. Skipping %d bytes", pos);
|
||||
buffer.clearArray(pos);
|
||||
}
|
||||
// parse data
|
||||
uint8_t *packet = buffer.data();
|
||||
int pid = ((packet[1] & 0x1F) << 8) | (packet[2] & 0xFF);
|
||||
LOGI("PID: 0x%04X(%d)", pid, pid);
|
||||
|
||||
// PES contains the audio data
|
||||
if (!is_adts_missing && pids.contains(pid)) {
|
||||
parsePES(packet, pid);
|
||||
} else {
|
||||
parsePacket(packet, pid);
|
||||
}
|
||||
|
||||
// remove processed data
|
||||
buffer.clearArray(TS_PACKET_SIZE);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Detailed processing for parsing a single packet
|
||||
void parsePacket(uint8_t *packet, int pid) {
|
||||
TRACEI();
|
||||
bool payloadUnitStartIndicator = false;
|
||||
|
||||
int payloadStart =
|
||||
getPayloadStart(packet, false, payloadUnitStartIndicator);
|
||||
int len = TS_PACKET_SIZE - payloadStart;
|
||||
|
||||
// if we are at the beginning we start with a pat
|
||||
if (pid == 0 && payloadUnitStartIndicator) {
|
||||
pids.clear();
|
||||
}
|
||||
|
||||
// PID 0 is for PAT
|
||||
if (pid == 0) {
|
||||
parsePAT(&packet[payloadStart], len);
|
||||
} else if (pid == pmt_pid && packet[payloadStart] == 0x02) {
|
||||
parsePMT(&packet[payloadStart], len);
|
||||
} else {
|
||||
LOGE("-> Packet ignored for PID 0x%x", pid);
|
||||
}
|
||||
}
|
||||
|
||||
int getPayloadStart(uint8_t *packet, bool isPES,
|
||||
bool &payloadUnitStartIndicator) {
|
||||
uint8_t adaptionField = (packet[3] & 0x30) >> 4;
|
||||
int adaptationSize = 0;
|
||||
int offset = 4; // Start after TS header (4 bytes)
|
||||
|
||||
// Check for adaptation field
|
||||
// 00 (0) → Invalid (should never happen).
|
||||
// 01 (1) → Payload only (no adaptation field).
|
||||
// 10 (2) → Adaptation field only (no payload).
|
||||
// 11 (3) → Adaptation field + payload.
|
||||
if (adaptionField == 0b11) { // Adaptation field exists
|
||||
adaptationSize = packet[4] + 1;
|
||||
offset += adaptationSize;
|
||||
}
|
||||
|
||||
// If PUSI is set, there's a pointer field (skip it)
|
||||
if (packet[1] & 0x40) {
|
||||
if (!isPES) offset += packet[offset] + 1;
|
||||
payloadUnitStartIndicator = true;
|
||||
}
|
||||
|
||||
LOGI("Payload Unit Start Indicator (PUSI): %d", payloadUnitStartIndicator);
|
||||
LOGI("Adaption Field Control: 0x%x / size: %d", adaptionField,
|
||||
adaptationSize);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
void parsePAT(uint8_t *pat, int len) {
|
||||
TRACEI();
|
||||
assert(pat[0] == 0); // Program Association section
|
||||
int startOfProgramNums = 8;
|
||||
int lengthOfPATValue = 4;
|
||||
int sectionLength = ((pat[1] & 0x0F) << 8) | (pat[2] & 0xFF);
|
||||
LOGI("PAT Section Length: %d", sectionLength);
|
||||
if (sectionLength >= len) {
|
||||
LOGE("Unexpected PAT Section Length: %d", sectionLength);
|
||||
sectionLength = len;
|
||||
}
|
||||
int indexOfPids = 0;
|
||||
for (int i = startOfProgramNums; i <= sectionLength;
|
||||
i += lengthOfPATValue) {
|
||||
int program_number = ((pat[i] & 0xFF) << 8) | (pat[i + 1] & 0xFF);
|
||||
int pid = ((pat[i + 2] & 0x1F) << 8) | (pat[i + 3] & 0xFF);
|
||||
LOGI("Program Num: 0x%04X(%d) / PID: 0x%04X(%d) ", program_number,
|
||||
program_number, pid, pid);
|
||||
|
||||
if (pmt_pid == 0xFFFF && pid >= 0x0020 && pid <= 0x1FFE) {
|
||||
pmt_pid = pid;
|
||||
}
|
||||
}
|
||||
LOGI("Using PMT PID: 0x%04X(%d)", pmt_pid, pmt_pid);
|
||||
}
|
||||
|
||||
void parsePMT(uint8_t *pmt, int len) {
|
||||
TRACEI();
|
||||
assert(pmt[0] == 0x02); // Program Association section
|
||||
int staticLengthOfPMT = 12;
|
||||
int sectionLength = ((pmt[1] & 0x0F) << 8) | (pmt[2] & 0xFF);
|
||||
LOGI("- PMT Section Length: %d", sectionLength);
|
||||
int programInfoLength = ((pmt[10] & 0x0F) << 8) | (pmt[11] & 0xFF);
|
||||
LOGI("- PMT Program Info Length: %d", programInfoLength);
|
||||
|
||||
int cursor = staticLengthOfPMT + programInfoLength;
|
||||
while (cursor < sectionLength - 1) {
|
||||
MTSStreamType streamType = static_cast<MTSStreamType>(pmt[cursor] & 0xFF);
|
||||
int elementaryPID =
|
||||
((pmt[cursor + 1] & 0x1F) << 8) | (pmt[cursor + 2] & 0xFF);
|
||||
LOGI("-- Stream Type: 0x%02X(%d) [%s] for Elementary PID: 0x%04X(%d)",
|
||||
(int)streamType, (int)streamType, toStr(streamType), elementaryPID,
|
||||
elementaryPID);
|
||||
|
||||
if (isStreamTypeActive(streamType)) {
|
||||
selected_stream_type = streamType;
|
||||
addPID(elementaryPID);
|
||||
}
|
||||
|
||||
int esInfoLength =
|
||||
((pmt[cursor + 3] & 0x0F) << 8) | (pmt[cursor + 4] & 0xFF);
|
||||
LOGI("-- ES Info Length: 0x%04X(%d)", esInfoLength, esInfoLength);
|
||||
cursor += 5 + esInfoLength;
|
||||
}
|
||||
}
|
||||
|
||||
void parsePES(uint8_t *packet, int pid) {
|
||||
LOGI("parsePES: %d", pid);
|
||||
++pes_count;
|
||||
|
||||
// calculate payload start
|
||||
bool payloadUnitStartIndicator = false;
|
||||
int payloadStart = getPayloadStart(packet, true, payloadUnitStartIndicator);
|
||||
|
||||
// PES
|
||||
uint8_t *pes = packet + payloadStart;
|
||||
int len = TS_PACKET_SIZE - payloadStart;
|
||||
// PES (AAC) data
|
||||
uint8_t *pesData = nullptr;
|
||||
int pesDataSize = 0;
|
||||
|
||||
if (payloadUnitStartIndicator) {
|
||||
assert(len >= 6);
|
||||
// PES header is not alligned correctly
|
||||
if (!isPESStartCodeValid(pes)) {
|
||||
LOGE("PES header not aligned correctly");
|
||||
return;
|
||||
}
|
||||
|
||||
int pesPacketLength =
|
||||
(static_cast<int>(pes[4]) << 8) | static_cast<int>(pes[5]);
|
||||
|
||||
// PES Header size is at least 6 bytes, but can be larger with optional
|
||||
// fields
|
||||
int pesHeaderSize = 6;
|
||||
if ((pes[6] & 0xC0) != 0) { // Check for PTS/DTS flags
|
||||
pesHeaderSize += 3 + ((pes[7] & 0xC0) == 0xC0 ? 5 : 0);
|
||||
pesHeaderSize += pes[8]; // PES header stuffing size
|
||||
}
|
||||
LOGI("- PES Header Size: %d", pesHeaderSize);
|
||||
pesData = pes + pesHeaderSize;
|
||||
pesDataSize = len - pesHeaderSize;
|
||||
|
||||
assert(pesHeaderSize < len);
|
||||
assert(pesDataSize > 0);
|
||||
|
||||
/// Check for ADTS
|
||||
if (pes_count == 1 && selected_stream_type == MTSStreamType::AUDIO_AAC) {
|
||||
is_adts_missing = findSyncWord(pesData, pesDataSize) == -1;
|
||||
}
|
||||
|
||||
open_pes_data_size = pesPacketLength;
|
||||
|
||||
} else {
|
||||
pesData = pes;
|
||||
pesDataSize = len;
|
||||
}
|
||||
|
||||
// Recalculate the open data
|
||||
open_pes_data_size -= pesDataSize;
|
||||
if (open_pes_data_size < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/// Write the data
|
||||
LOGI("- writing %d bytes (open: %d)", pesDataSize, open_pes_data_size);
|
||||
if (p_print) {
|
||||
size_t result = writeData<uint8_t>(p_print, pesData, pesDataSize);
|
||||
assert(result == pesDataSize);
|
||||
}
|
||||
if (p_dec) {
|
||||
size_t result =
|
||||
writeDataT<uint8_t, AudioDecoder>(p_dec, pesData, pesDataSize);
|
||||
assert(result == pesDataSize);
|
||||
}
|
||||
}
|
||||
|
||||
/// check for PES packet start code prefix
|
||||
bool isPESStartCodeValid(uint8_t *pes) {
|
||||
if (pes[0] != 0) return false;
|
||||
if (pes[1] != 0) return false;
|
||||
if (pes[2] != 0x1) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Convert the relevant MTSStreamType to a string
|
||||
const char *toStr(MTSStreamType type) {
|
||||
switch (type) {
|
||||
case MTSStreamType::AUDIO_MP3:
|
||||
return "AUDIO_MP3";
|
||||
case MTSStreamType::AUDIO_MP3_LOW_BITRATE:
|
||||
return "AUDIO_MP3_LOW_BITRATE";
|
||||
case MTSStreamType::AUDIO_AAC:
|
||||
return "AUDIO_AAC";
|
||||
case MTSStreamType::AUDIO_AAC_LATM:
|
||||
return "AUDIO_AAC_LATM";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds the mp3/aac sync word
|
||||
int findSyncWord(const uint8_t *buf, size_t nBytes, uint8_t synch = 0xFF,
|
||||
uint8_t syncl = 0xF0) {
|
||||
for (int i = 0; i < nBytes - 1; i++) {
|
||||
if ((buf[i + 0] & synch) == synch && (buf[i + 1] & syncl) == syncl)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief Legacy alias for MPEG Transport Stream decoder
|
||||
/// @ingroup codecs
|
||||
using MPEG_TSDecoder = MTSDecoder;
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,138 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Net.h"
|
||||
#if defined(ARDUINO) && !defined(IS_MIN_DESKTOP)
|
||||
#include "Print.h"
|
||||
#endif
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief PCM decoder which converts from network format to the host format.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class DecoderNetworkFormat : public AudioDecoder {
|
||||
public:
|
||||
DecoderNetworkFormat() = default;
|
||||
|
||||
DecoderNetworkFormat(Print &out_stream) {
|
||||
TRACED();
|
||||
pt_print = &out_stream;
|
||||
}
|
||||
|
||||
DecoderNetworkFormat(Print &out_stream, AudioInfoSupport &bi) {
|
||||
pt_print = &out_stream;
|
||||
}
|
||||
|
||||
~DecoderNetworkFormat() {}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { pt_print = &out_stream; }
|
||||
|
||||
bool begin() { return true; }
|
||||
|
||||
void end() {}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
TRACED();
|
||||
switch (audioInfo().bits_per_sample) {
|
||||
case 8:
|
||||
// nothing to do
|
||||
break;
|
||||
case 16: {
|
||||
int16_t *data16 = (int16_t *)data;
|
||||
for (int i = 0; i < len / sizeof(int16_t); i++) {
|
||||
data16[i] = ntohs(data16[i]);
|
||||
}
|
||||
} break;
|
||||
case 24:
|
||||
case 32: {
|
||||
int32_t *data32 = (int32_t *)data;
|
||||
for (int i = 0; i < len / sizeof(int32_t); i++) {
|
||||
data32[i] = ntohl(data32[i]);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
LOGE("bits_per_sample not supported: %d",
|
||||
(int)audioInfo().bits_per_sample);
|
||||
break;
|
||||
}
|
||||
return pt_print->write((uint8_t *)data, len);
|
||||
}
|
||||
|
||||
operator bool() { return true; }
|
||||
|
||||
/// The result is encoded data - by default this is false
|
||||
virtual bool isResultPCM() { return true; }
|
||||
|
||||
protected:
|
||||
Print *pt_print = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder which converts from the host format to the network format.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class EncoderNetworkFormat : public AudioEncoder {
|
||||
public:
|
||||
EncoderNetworkFormat() { TRACED(); }
|
||||
|
||||
EncoderNetworkFormat(Print &out_stream) {
|
||||
TRACED();
|
||||
pt_print = &out_stream;
|
||||
}
|
||||
|
||||
EncoderNetworkFormat(Print &out_stream, AudioInfoSupport &bi) {
|
||||
pt_print = &out_stream;
|
||||
}
|
||||
|
||||
~EncoderNetworkFormat() {}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { pt_print = &out_stream; }
|
||||
|
||||
bool begin() { return true; }
|
||||
|
||||
void end() {}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
TRACED();
|
||||
switch (audioInfo().bits_per_sample) {
|
||||
case 8:
|
||||
// nothing to do
|
||||
break;
|
||||
case 16: {
|
||||
int16_t *data16 = (int16_t *)data;
|
||||
for (int i = 0; i < len / sizeof(int16_t); i++) {
|
||||
data16[i] = htons(data16[i]);
|
||||
}
|
||||
} break;
|
||||
case 24:
|
||||
case 32: {
|
||||
int32_t *data32 = (int32_t *)data;
|
||||
for (int i = 0; i < len / sizeof(int32_t); i++) {
|
||||
data32[i] = htonl(data32[i]);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
LOGE("bits_per_sample not supported: %d",
|
||||
(int)audioInfo().bits_per_sample);
|
||||
break;
|
||||
}
|
||||
return pt_print->write((uint8_t *)data, len);
|
||||
}
|
||||
|
||||
operator bool() { return true; }
|
||||
|
||||
const char *mime() { return "audio/pcm"; }
|
||||
|
||||
protected:
|
||||
Print *pt_print = nullptr;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
478
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecOpus.h
Normal file
478
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecOpus.h
Normal file
@@ -0,0 +1,478 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "Print.h"
|
||||
#include "opus.h"
|
||||
|
||||
#ifndef OPUS_ENC_MAX_BUFFER_SIZE
|
||||
#define OPUS_ENC_MAX_BUFFER_SIZE 2048
|
||||
#endif
|
||||
|
||||
#ifndef OPUS_DEC_MAX_BUFFER_SIZE
|
||||
#define OPUS_DEC_MAX_BUFFER_SIZE 4 * 1024
|
||||
#endif
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Setting for Opus Decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
struct OpusSettings : public AudioInfo {
|
||||
OpusSettings() {
|
||||
/// 8000,12000,16000 ,24000,48000
|
||||
sample_rate = 48000;
|
||||
/// 1 or 2
|
||||
channels = 2;
|
||||
/// must be 16!
|
||||
bits_per_sample = 16;
|
||||
}
|
||||
int max_buffer_size = OPUS_DEC_MAX_BUFFER_SIZE;
|
||||
int max_buffer_write_size = 512;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Setting for Opus Encoder where the following values are valid:
|
||||
* -1 indicates that the default value should be used and that this codec is not
|
||||
setting the value.
|
||||
*
|
||||
int channels[2] = {1, 2};<br>
|
||||
int applications[3] = {OPUS_APPLICATION_AUDIO, OPUS_APPLICATION_VOIP,
|
||||
OPUS_APPLICATION_RESTRICTED_LOWDELAY};<br>
|
||||
|
||||
int sample_rates[] = {8000,12000,16000 ,24000,48000}<br>
|
||||
|
||||
int bitrates[11] = {6000, 12000, 16000, 24000, 32000, 48000,
|
||||
64000, 96000, 510000, OPUS_AUTO, OPUS_BITRATE_MAX};<br>
|
||||
int force_channels[4] = {OPUS_AUTO, OPUS_AUTO, 1, 2};<br>
|
||||
int use_vbr[3] = {0, 1, 1};<br>
|
||||
int vbr_constraints[3] = {0, 1, 1};<br>
|
||||
int complexities[11] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10};<br>
|
||||
int max_bandwidths[6] = {
|
||||
OPUS_BANDWIDTH_NARROWBAND, OPUS_BANDWIDTH_MEDIUMBAND,
|
||||
OPUS_BANDWIDTH_WIDEBAND, OPUS_BANDWIDTH_SUPERWIDEBAND,
|
||||
OPUS_BANDWIDTH_FULLBAND, OPUS_BANDWIDTH_FULLBAND};<br>
|
||||
|
||||
int signals[4] = {OPUS_AUTO, OPUS_AUTO, OPUS_SIGNAL_VOICE,
|
||||
OPUS_SIGNAL_MUSIC};<br> int inband_fecs[3] = {0, 0, 1};<br> int
|
||||
packet_loss_perc[4] = {0, 1, 2, 5};<br> int lsb_depths[2] = {8, 24};<br> int
|
||||
prediction_disabled[3] = {0, 0, 1};<br> int use_dtx[2] = {0, 1};<br> int
|
||||
frame_sizes_ms_x2[9] =
|
||||
{OPUS_FRAMESIZE_2_5_MS,OPUS_FRAMESIZE_5_MS,OPUS_FRAMESIZE_10_MS,OPUS_FRAMESIZE_20_MS,OPUS_FRAMESIZE_40_MS,OPUS_FRAMESIZE_60_MS,OPUS_FRAMESIZE_80_MS,OPUS_FRAMESIZE_100_MS,OPUS_FRAMESIZE_120_MS}
|
||||
x2 to avoid 2.5 ms <br>
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
**/
|
||||
|
||||
struct OpusEncoderSettings : public OpusSettings {
|
||||
OpusEncoderSettings() : OpusSettings() {
|
||||
/// Default is 5760
|
||||
max_buffer_size = OPUS_ENC_MAX_BUFFER_SIZE;
|
||||
}
|
||||
/// OPUS_APPLICATION_AUDIO, OPUS_APPLICATION_VOIP,
|
||||
/// OPUS_APPLICATION_RESTRICTED_LOWDELAY
|
||||
int application = OPUS_APPLICATION_AUDIO;
|
||||
/// 6000, 12000, 16000, 24000, 32000, 48000, 64000, 96000, 510000,
|
||||
/// OPUS_AUTO, OPUS_BITRATE_MAX
|
||||
int bitrate = -1;
|
||||
/// OPUS_AUTO, OPUS_AUTO, 1, 2
|
||||
int force_channel = -1;
|
||||
/// 0, 1
|
||||
int vbr = -1;
|
||||
/// 0, 1
|
||||
int vbr_constraint = -1;
|
||||
/// 0 to 10
|
||||
int complexity = -1;
|
||||
/// OPUS_BANDWIDTH_NARROWBAND,
|
||||
/// OPUS_BANDWIDTH_MEDIUMBAND,OPUS_BANDWIDTH_WIDEBAND,
|
||||
/// OPUS_BANDWIDTH_SUPERWIDEBAND, OPUS_BANDWIDTH_FULLBAND,
|
||||
/// OPUS_BANDWIDTH_FULLBAND
|
||||
int max_bandwidth = -1;
|
||||
/// OPUS_AUTO, OPUS_SIGNAL_VOICE, OPUS_SIGNAL_MUSIC
|
||||
int signal = -1;
|
||||
/// 0, 1
|
||||
int inband_fec = -1;
|
||||
/// 0, 1, 2, 5
|
||||
int packet_loss_perc = -1;
|
||||
/// 8, 24
|
||||
int lsb_depth = -1;
|
||||
/// 0, 1
|
||||
int prediction_disabled = -1;
|
||||
/// 0, 1
|
||||
int use_dtx = -1;
|
||||
/// OPUS_FRAMESIZE_2_5_MS,OPUS_FRAMESIZE_5_MS,OPUS_FRAMESIZE_10_MS,OPUS_FRAMESIZE_20_MS,OPUS_FRAMESIZE_40_MS,OPUS_FRAMESIZE_60_MS,OPUS_FRAMESIZE_80_MS,OPUS_FRAMESIZE_100_MS,OPUS_FRAMESIZE_120_MS
|
||||
int frame_sizes_ms_x2 = -1; /* x2 to avoid 2.5 ms */
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Decoder for the Opus audio format.
|
||||
* Each Opus frame must be provided with one write() call. Therefore, Opus
|
||||
* is usually encapsulated in a container format (e.g., Ogg) that splits
|
||||
* the stream into frames.
|
||||
*
|
||||
* Depends on https://github.com/pschatzmann/arduino-libopus.git
|
||||
*
|
||||
* @author Phil Schatzmann
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OpusAudioDecoder : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new OpusDecoder object
|
||||
*/
|
||||
OpusAudioDecoder(bool releaseOnEnd = false) : release_on_end(releaseOnEnd) {}
|
||||
|
||||
/**
|
||||
* @brief Construct a new OpusDecoder object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
*/
|
||||
OpusAudioDecoder(Print &out_stream) {
|
||||
TRACED();
|
||||
setOutput(out_stream);
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
AudioInfo audioInfo() override { return cfg; }
|
||||
|
||||
/// Provides access to the configuration
|
||||
OpusSettings &config() { return cfg; }
|
||||
OpusSettings &defaultConfig() { return cfg; }
|
||||
|
||||
bool begin(OpusSettings settings) {
|
||||
TRACED();
|
||||
AudioDecoder::setAudioInfo(settings);
|
||||
cfg = settings;
|
||||
notifyAudioChange(cfg);
|
||||
return begin();
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
if (!isValidRate(cfg.sample_rate)) {
|
||||
LOGE("Sample rate not supported: %d", cfg.sample_rate);
|
||||
return false;
|
||||
}
|
||||
outbuf.resize(cfg.max_buffer_size);
|
||||
assert(outbuf.data() != nullptr);
|
||||
|
||||
// allocate decoder
|
||||
size_t size = opus_decoder_get_size(cfg.channels);
|
||||
decbuf.resize(size);
|
||||
assert(decbuf.data() != nullptr);
|
||||
dec = (OpusDecoder *)decbuf.data();
|
||||
int err = opus_decoder_init(dec, cfg.sample_rate, cfg.channels);
|
||||
|
||||
if (err != OPUS_OK) {
|
||||
LOGE("opus_decoder_create: %s for sample_rate: %d, channels:%d",
|
||||
opus_strerror(err), cfg.sample_rate, cfg.channels);
|
||||
return false;
|
||||
}
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACED();
|
||||
dec = nullptr;
|
||||
if (release_on_end) {
|
||||
outbuf.resize(0);
|
||||
decbuf.resize(0);
|
||||
}
|
||||
active = false;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo from) override {
|
||||
AudioDecoder::setAudioInfo(from);
|
||||
info = from;
|
||||
cfg.sample_rate = from.sample_rate;
|
||||
cfg.channels = from.channels;
|
||||
cfg.bits_per_sample = from.bits_per_sample;
|
||||
}
|
||||
|
||||
/// write one full opus frame
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
if (!active || p_print == nullptr) return 0;
|
||||
// decode data
|
||||
LOGD("OpusAudioDecoder::write: %d", (int)len);
|
||||
int in_band_forward_error_correction = 0;
|
||||
int frame_count = cfg.max_buffer_size / cfg.channels / sizeof(opus_int16);
|
||||
int out_samples =
|
||||
opus_decode(dec, (uint8_t *)data, len, (opus_int16 *)outbuf.data(),
|
||||
frame_count, in_band_forward_error_correction);
|
||||
if (out_samples < 0) {
|
||||
LOGW("opus-decode: %s", opus_strerror(out_samples));
|
||||
} else if (out_samples > 0) {
|
||||
// write data to final destination
|
||||
int out_bytes = out_samples * cfg.channels * sizeof(int16_t);
|
||||
LOGD("opus-decode: %d", out_bytes);
|
||||
int open = out_bytes;
|
||||
int processed = 0;
|
||||
while (open > 0) {
|
||||
int to_write = std::min(open, cfg.max_buffer_write_size);
|
||||
int written = p_print->write(outbuf.data() + processed, to_write);
|
||||
open -= written;
|
||||
processed += written;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
operator bool() override { return active; }
|
||||
|
||||
/// Defines if the resources should be released when the stream is closed
|
||||
/// (default: false)
|
||||
void setReleaseOnEnd(bool flag) { release_on_end = flag; }
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
OpusDecoder *dec = nullptr;
|
||||
OpusSettings cfg;
|
||||
bool active = false;
|
||||
Vector<uint8_t> outbuf{0};
|
||||
Vector<uint8_t> decbuf{0};
|
||||
const uint32_t valid_rates[5] = {8000, 12000, 16000, 24000, 48000};
|
||||
bool release_on_end = false;
|
||||
|
||||
bool isValidRate(int rate) {
|
||||
for (auto &valid : valid_rates) {
|
||||
if (valid == rate) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encode for Opus audio.
|
||||
*
|
||||
* Depends on https://github.com/pschatzmann/arduino-libopus.git
|
||||
* Please note that each fully encoded frame is written to the output stream.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OpusAudioEncoder : public AudioEncoder {
|
||||
public:
|
||||
// Empty Constructor - the output stream must be provided with begin()
|
||||
OpusAudioEncoder() = default;
|
||||
|
||||
// Constructor providing the output stream
|
||||
OpusAudioEncoder(Print &out) { setOutput(out); }
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override { p_print = &out_stream; }
|
||||
|
||||
/// Provides "audio/pcm"
|
||||
const char *mime() override { return "audio/opus"; }
|
||||
|
||||
/// We actually do nothing with this
|
||||
void setAudioInfo(AudioInfo from) override {
|
||||
AudioEncoder::setAudioInfo(from);
|
||||
cfg.sample_rate = from.sample_rate;
|
||||
cfg.channels = from.channels;
|
||||
cfg.bits_per_sample = from.bits_per_sample;
|
||||
}
|
||||
|
||||
/// starts the processing using the actual OpusAudioInfo
|
||||
bool begin() override {
|
||||
int err;
|
||||
int size = getFrameSizeSamples(cfg.sample_rate) * 2;
|
||||
frame.resize(size);
|
||||
assert(frame.data() != nullptr);
|
||||
enc = opus_encoder_create(cfg.sample_rate, cfg.channels, cfg.application,
|
||||
&err);
|
||||
if (err != OPUS_OK) {
|
||||
LOGE("opus_encoder_create: %s for sample_rate: %d, channels:%d",
|
||||
opus_strerror(err), cfg.sample_rate, cfg.channels);
|
||||
return false;
|
||||
}
|
||||
is_open = settings();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Provides access to the configuration
|
||||
OpusEncoderSettings &config() { return cfg; }
|
||||
|
||||
OpusEncoderSettings &defaultConfig() { return cfg; }
|
||||
|
||||
bool begin(OpusEncoderSettings settings) {
|
||||
cfg = settings;
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override {
|
||||
// flush buffered data
|
||||
encodeFrame();
|
||||
// release memory
|
||||
opus_encoder_destroy(enc);
|
||||
is_open = false;
|
||||
}
|
||||
|
||||
/// Writes PCM data to be encoded as Opus
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
if (!is_open || p_print == nullptr) return 0;
|
||||
LOGD("OpusAudioEncoder::write: %d", (int)len);
|
||||
|
||||
// fill frame
|
||||
for (int j = 0; j < len; j++) {
|
||||
encodeByte(data[j]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
operator bool() override { return is_open; }
|
||||
|
||||
bool isOpen() { return is_open; }
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
OpusEncoder *enc = nullptr;
|
||||
OpusEncoderSettings cfg;
|
||||
bool is_open = false;
|
||||
Vector<uint8_t> frame{0};
|
||||
int frame_pos = 0;
|
||||
|
||||
void encodeByte(uint8_t data) {
|
||||
// add byte to frame
|
||||
frame[frame_pos++] = data;
|
||||
|
||||
// if frame is complete -> encode
|
||||
if (frame_pos >= frame.size()) {
|
||||
encodeFrame();
|
||||
frame_pos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void encodeFrame() {
|
||||
if (frame.size() > 0) {
|
||||
// allocate temp buffer on stack
|
||||
int packet_len =
|
||||
OPUS_ENC_MAX_BUFFER_SIZE > 0 ? OPUS_ENC_MAX_BUFFER_SIZE : 512;
|
||||
uint8_t packet[packet_len];
|
||||
|
||||
int frames = frame.size() / cfg.channels / sizeof(int16_t);
|
||||
LOGD("opus_encode - frame_size: %d", frames);
|
||||
int len = opus_encode(enc, (opus_int16 *)frame.data(), frames, packet,
|
||||
packet_len);
|
||||
if (len < 0) {
|
||||
LOGE("opus_encode: %s", opus_strerror(len));
|
||||
} else if (len > 0) {
|
||||
LOGD("opus-encode: %d", len);
|
||||
int eff = p_print->write(packet, len);
|
||||
if (eff != len) {
|
||||
LOGE("encodeFrame data lost: %d->%d", len, eff);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the frame size in samples
|
||||
int getFrameSizeSamples(int sampling_rate) {
|
||||
switch (cfg.frame_sizes_ms_x2) {
|
||||
case OPUS_FRAMESIZE_2_5_MS:
|
||||
return sampling_rate / 400;
|
||||
case OPUS_FRAMESIZE_5_MS:
|
||||
return sampling_rate / 200;
|
||||
case OPUS_FRAMESIZE_10_MS:
|
||||
return sampling_rate / 100;
|
||||
case OPUS_FRAMESIZE_20_MS:
|
||||
return sampling_rate / 50;
|
||||
case OPUS_FRAMESIZE_40_MS:
|
||||
return sampling_rate / 25;
|
||||
case OPUS_FRAMESIZE_60_MS:
|
||||
return 3 * sampling_rate / 50;
|
||||
case OPUS_FRAMESIZE_80_MS:
|
||||
return 4 * sampling_rate / 50;
|
||||
case OPUS_FRAMESIZE_100_MS:
|
||||
return 5 * sampling_rate / 50;
|
||||
case OPUS_FRAMESIZE_120_MS:
|
||||
return 6 * sampling_rate / 50;
|
||||
}
|
||||
return sampling_rate / 100;
|
||||
}
|
||||
|
||||
bool settings() {
|
||||
bool ok = true;
|
||||
if (cfg.bitrate >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_BITRATE(cfg.bitrate)) != OPUS_OK) {
|
||||
LOGE("invalid bitrate: %d", cfg.bitrate);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.force_channel >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(cfg.force_channel)) !=
|
||||
OPUS_OK) {
|
||||
LOGE("invalid force_channel: %d", cfg.force_channel);
|
||||
ok = false;
|
||||
};
|
||||
if (cfg.vbr >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_VBR(cfg.vbr)) != OPUS_OK) {
|
||||
LOGE("invalid vbr: %d", cfg.vbr);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.vbr_constraint >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_VBR_CONSTRAINT(cfg.vbr_constraint)) !=
|
||||
OPUS_OK) {
|
||||
LOGE("invalid vbr_constraint: %d", cfg.vbr_constraint);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.complexity >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_COMPLEXITY(cfg.complexity)) != OPUS_OK) {
|
||||
LOGE("invalid complexity: %d", cfg.complexity);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.max_bandwidth >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_MAX_BANDWIDTH(cfg.max_bandwidth)) !=
|
||||
OPUS_OK) {
|
||||
LOGE("invalid max_bandwidth: %d", cfg.max_bandwidth);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.signal >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_SIGNAL(cfg.signal)) != OPUS_OK) {
|
||||
LOGE("invalid signal: %d", cfg.signal);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.inband_fec >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(cfg.inband_fec)) != OPUS_OK) {
|
||||
LOGE("invalid inband_fec: %d", cfg.inband_fec);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.packet_loss_perc >= 0 &&
|
||||
opus_encoder_ctl(
|
||||
enc, OPUS_SET_PACKET_LOSS_PERC(cfg.packet_loss_perc)) != OPUS_OK) {
|
||||
LOGE("invalid pkt_loss: %d", cfg.packet_loss_perc);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.lsb_depth >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_LSB_DEPTH(cfg.lsb_depth)) != OPUS_OK) {
|
||||
LOGE("invalid lsb_depth: %d", cfg.lsb_depth);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.prediction_disabled >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_PREDICTION_DISABLED(
|
||||
cfg.prediction_disabled)) != OPUS_OK) {
|
||||
LOGE("invalid pred_disabled: %d", cfg.prediction_disabled);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.use_dtx >= 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_DTX(cfg.use_dtx)) != OPUS_OK) {
|
||||
LOGE("invalid use_dtx: %d", cfg.use_dtx);
|
||||
ok = false;
|
||||
}
|
||||
if (cfg.frame_sizes_ms_x2 > 0 &&
|
||||
opus_encoder_ctl(enc, OPUS_SET_EXPERT_FRAME_DURATION(
|
||||
cfg.frame_sizes_ms_x2)) != OPUS_OK) {
|
||||
LOGE("invalid frame_sizes_ms_x2: %d", cfg.frame_sizes_ms_x2);
|
||||
ok = false;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
178
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecOpusOgg.h
Normal file
178
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecOpusOgg.h
Normal file
@@ -0,0 +1,178 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/CodecOpus.h"
|
||||
#include "AudioTools/AudioCodecs/ContainerOgg.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/// Opus header
|
||||
struct __attribute__((packed)) OpusOggHeader {
|
||||
char signature[8] = {'O', 'p', 'u', 's', 'H', 'e', 'a', 'd'};
|
||||
uint8_t version = 1;
|
||||
uint8_t channelCount = 0;
|
||||
uint16_t preSkip = 3840;
|
||||
uint32_t sampleRate = 0;
|
||||
int16_t outputGain = 0;
|
||||
uint8_t channelMappingFamily = 0;
|
||||
};
|
||||
|
||||
/// Simplified header w/o comments
|
||||
struct __attribute__((packed)) OpusOggCommentHeader {
|
||||
char signature[8] = {'O', 'p', 'u', 's', 'T', 'a', 'g', 's'};
|
||||
uint32_t vendorStringLength = 8;
|
||||
char vendor[8] = "Arduino";
|
||||
uint32_t userCommentListLength = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Opus Decoder which uses the Ogg Container. See
|
||||
* https://datatracker.ietf.org/doc/html/rfc7845. The audio data is transmitted
|
||||
* in frames and the header information contains the sampler rate, channels and
|
||||
* other critical info.
|
||||
* Dependency: https://github.com/pschatzmann/arduino-libopus
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OpusOggDecoder : public OggContainerDecoder {
|
||||
public:
|
||||
OpusOggDecoder() {
|
||||
p_codec = &dec; // OpusAudioDecoder
|
||||
out.setDecoder(p_codec);
|
||||
};
|
||||
|
||||
/// Provides access to the Opus configuration
|
||||
OpusSettings &config() { return dec.config(); }
|
||||
|
||||
bool begin(OpusSettings settings) {
|
||||
OggContainerDecoder::begin();
|
||||
return dec.begin(settings);
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
OggContainerDecoder::begin();
|
||||
return dec.begin();
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACED();
|
||||
OggContainerDecoder::end();
|
||||
dec.end();
|
||||
}
|
||||
|
||||
protected:
|
||||
OpusOggHeader header;
|
||||
OpusAudioDecoder dec;
|
||||
|
||||
virtual void beginOfSegment(ogg_packet *op) override {
|
||||
LOGD("bos");
|
||||
if (op->packet == nullptr) return;
|
||||
if (strncmp("OpusHead", (char *)op->packet, 8) == 0) {
|
||||
memmove(&header, (char *)op->packet, sizeof(header));
|
||||
AudioInfo info = audioInfo();
|
||||
info.sample_rate = header.sampleRate;
|
||||
info.channels = header.channelCount;
|
||||
info.bits_per_sample = 16;
|
||||
info.logInfo();
|
||||
setAudioInfo(info);
|
||||
} else if (strncmp("OpusTags", (char *)op->packet, 8) == 0) {
|
||||
// not processed
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class OpusOggWriter : public OggContainerOutput {
|
||||
protected:
|
||||
OpusOggHeader header;
|
||||
OpusOggCommentHeader comment;
|
||||
ogg_packet oh1;
|
||||
|
||||
bool writeHeader() override {
|
||||
LOGI("writeHeader");
|
||||
bool result = true;
|
||||
header.sampleRate = cfg.sample_rate;
|
||||
header.channelCount = cfg.channels;
|
||||
// write header
|
||||
oh.packet = (uint8_t *)&header;
|
||||
oh.bytes = sizeof(header);
|
||||
oh.granulepos = 0;
|
||||
oh.packetno = packetno++;
|
||||
oh.b_o_s = true;
|
||||
oh.e_o_s = false;
|
||||
if (!writePacket(oh)) {
|
||||
result = false;
|
||||
LOGE("writePacket-header");
|
||||
}
|
||||
|
||||
// write comment header
|
||||
oh1.packet = (uint8_t *)&comment;
|
||||
oh1.bytes = sizeof(comment);
|
||||
oh1.granulepos = 0;
|
||||
oh1.packetno = packetno++;
|
||||
oh1.b_o_s = true;
|
||||
oh1.e_o_s = false;
|
||||
if (!writePacket(oh1, OGGZ_FLUSH_AFTER)) {
|
||||
result = false;
|
||||
LOGE("writePacket-header1");
|
||||
}
|
||||
TRACED();
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Opus Encoder which uses the Ogg Container: see
|
||||
* https://datatracker.ietf.org/doc/html/rfc7845
|
||||
* Dependency: https://github.com/pschatzmann/arduino-libopus
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OpusOggEncoder : public OggContainerEncoder {
|
||||
public:
|
||||
OpusOggEncoder() {
|
||||
setOggOutput(&ogg_writer);
|
||||
setEncoder(&enc);
|
||||
}
|
||||
|
||||
/// Provides "audio/opus"
|
||||
const char *mime() override { return "audio/ogg;codecs=opus"; }
|
||||
|
||||
/// Provides access to the Opus config
|
||||
OpusEncoderSettings &config() { return enc.config(); }
|
||||
|
||||
/// provides the frame duration in us (e.g. for RTSP)
|
||||
uint32_t frameDurationUs() override {
|
||||
// Get frame duration from encoder settings
|
||||
int frameDurationMs = config().frame_sizes_ms_x2;
|
||||
uint32_t frameDurationUs = 20000;
|
||||
switch (frameDurationMs) {
|
||||
case OPUS_FRAMESIZE_2_5_MS:
|
||||
frameDurationUs = 2500;
|
||||
break;
|
||||
case OPUS_FRAMESIZE_5_MS:
|
||||
frameDurationUs = 5000;
|
||||
break;
|
||||
case OPUS_FRAMESIZE_10_MS:
|
||||
frameDurationUs = 10000;
|
||||
break;
|
||||
case OPUS_FRAMESIZE_20_MS:
|
||||
frameDurationUs = 20000;
|
||||
break;
|
||||
}
|
||||
return frameDurationUs;
|
||||
}
|
||||
|
||||
protected:
|
||||
// use custom writer
|
||||
OpusOggWriter ogg_writer;
|
||||
// use opus encoder
|
||||
OpusAudioEncoder enc;
|
||||
};
|
||||
|
||||
#include "AudioTools/Communication/RTSP/RTSPFormat.h"
|
||||
|
||||
} // namespace audio_tools
|
||||
400
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecSBC.h
Normal file
400
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecSBC.h
Normal file
@@ -0,0 +1,400 @@
|
||||
/**
|
||||
* @file CodecSBC.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief SBC Codec using https://github.com/pschatzmann/arduino-libsbc
|
||||
* @version 0.1
|
||||
* @date 2022-04-24
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "sbc.h"
|
||||
#include "sbc/formats.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for SBC. Depends on
|
||||
* https://github.com/pschatzmann/arduino-libsbc.
|
||||
* Inspired by sbcdec.c
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class SBCDecoder : public AudioDecoder {
|
||||
public:
|
||||
SBCDecoder(int bufferSize = 8192) {
|
||||
result_buffer = new uint8_t[bufferSize];
|
||||
result_buffer_size = bufferSize;
|
||||
}
|
||||
|
||||
~SBCDecoder() {
|
||||
if (result_buffer != nullptr)
|
||||
delete[] result_buffer;
|
||||
if (input_buffer != nullptr)
|
||||
delete[] input_buffer;
|
||||
}
|
||||
|
||||
virtual bool begin() {
|
||||
TRACEI();
|
||||
is_first = true;
|
||||
is_active = true;
|
||||
sbc_init(&sbc, 0L);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
sbc_finish(&sbc);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint8_t *start = (uint8_t *)data;
|
||||
int count = len;
|
||||
if (is_first) {
|
||||
framelen = firstWrite(data, len);
|
||||
LOGI("framelen: %d", framelen);
|
||||
// check if we have a valid frame length
|
||||
if (isValidFrameLen(framelen)) {
|
||||
start = start + framelen;
|
||||
count = len - framelen;
|
||||
is_first = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_first) {
|
||||
for (int j = 0; j < count; j++) {
|
||||
processByte(start[j]);
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
// Provides the uncompressed length (of the PCM data) in bytes
|
||||
int bytesUncompressed() {
|
||||
return codeSize();
|
||||
}
|
||||
/// Provides the compressed length in bytes (after encoding)
|
||||
int bytesCompressed() {
|
||||
return frameLength();
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
sbc_t sbc;
|
||||
bool is_first = true;
|
||||
bool is_active = false;
|
||||
uint8_t *result_buffer = nullptr;
|
||||
int result_buffer_size;
|
||||
int framelen;
|
||||
uint8_t *input_buffer = nullptr;
|
||||
int input_pos = 0;
|
||||
|
||||
/// Provides the compressed length in bytes (after encoding)
|
||||
int frameLength() { return sbc_get_frame_length(&sbc); }
|
||||
|
||||
// Provides the uncompressed length (of the PCM data) in bytes
|
||||
int codeSize() { return sbc_get_codesize(&sbc); }
|
||||
|
||||
/// Process audio info
|
||||
void setupAudioInfo() {
|
||||
info.bits_per_sample = 16;
|
||||
info.channels = sbc.mode == SBC_MODE_MONO ? 1 : 2;
|
||||
LOGI("channels: %d", info.channels);
|
||||
switch (sbc.frequency) {
|
||||
case SBC_FREQ_16000:
|
||||
info.sample_rate = 16000;
|
||||
break;
|
||||
case SBC_FREQ_32000:
|
||||
info.sample_rate = 32000;
|
||||
break;
|
||||
case SBC_FREQ_44100:
|
||||
info.sample_rate = 44100;
|
||||
break;
|
||||
case SBC_FREQ_48000:
|
||||
info.sample_rate = 48000;
|
||||
break;
|
||||
default:
|
||||
LOGE("Unsupported sample rate");
|
||||
info.sample_rate = 0;
|
||||
break;
|
||||
}
|
||||
LOGI("sample_rate: %d", info.sample_rate);
|
||||
notifyAudioChange(info);
|
||||
}
|
||||
|
||||
bool isValidFrameLen(int len) { return len > 0 && len < 256; }
|
||||
|
||||
/// Determines the framelen
|
||||
int firstWrite(const void *data, size_t length) {
|
||||
size_t result_len = 0;
|
||||
int frame_len = sbc_parse(&sbc, data, length);
|
||||
if (isValidFrameLen(frame_len)) {
|
||||
|
||||
// setup audio info
|
||||
setupAudioInfo();
|
||||
|
||||
// setup input buffer for subsequent decoding stpes
|
||||
setupInputBuffer(frame_len);
|
||||
}
|
||||
|
||||
return frame_len;
|
||||
}
|
||||
|
||||
void setupInputBuffer(int len) {
|
||||
LOGI("input_buffer: %d", len);
|
||||
if (input_buffer != nullptr)
|
||||
delete[] input_buffer;
|
||||
input_buffer = new uint8_t[len];
|
||||
}
|
||||
|
||||
/// Build decoding buffer and decode when frame is full
|
||||
void processByte(uint8_t byte) {
|
||||
// add byte to buffer
|
||||
input_buffer[input_pos++] = byte;
|
||||
|
||||
// decode if buffer is full
|
||||
if (input_pos >= framelen) {
|
||||
size_t result_len = 0;
|
||||
sbc_decode(&sbc, input_buffer, framelen, result_buffer,
|
||||
result_buffer_size, &result_len);
|
||||
if (result_len > 0) {
|
||||
p_print->write(result_buffer, result_len);
|
||||
}
|
||||
input_pos = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for SBC - Depends on
|
||||
* https://github.com/pschatzmann/arduino-libsbc.
|
||||
* Inspired by sbcenc.c
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class SBCEncoder : public AudioEncoder {
|
||||
public:
|
||||
SBCEncoder(int subbands = 8, int blocks = 16, int bitpool = 32,
|
||||
int allocation_method = SBC_AM_LOUDNESS) {
|
||||
setSubbands(subbands);
|
||||
setBlocks(blocks);
|
||||
setBitpool(bitpool);
|
||||
setAllocationMethod(allocation_method);
|
||||
}
|
||||
|
||||
/// Defines the subbands: Use 4 or 8
|
||||
void setSubbands(int subbands) {
|
||||
if (subbands == 8 || subbands == 4) {
|
||||
this->subbands = subbands;
|
||||
} else {
|
||||
LOGE("Invalid subbands: %d - using 8", subbands);
|
||||
this->subbands = 8;
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the number of blocks: valid values (4,8,12,16)
|
||||
void setBlocks(int blocks) {
|
||||
if (blocks == 16 || blocks == 12 || blocks == 8 || blocks == 4) {
|
||||
this->blocks = blocks;
|
||||
} else {
|
||||
LOGE("Invalid blocks: %d - using 16", blocks);
|
||||
this->blocks = 16;
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the bitpool (2-86?)
|
||||
void setBitpool(int bitpool) { this->bitpool = bitpool; }
|
||||
|
||||
/// Defines the allocation method: Use SBC_AM_LOUDNESS, SBC_AM_SNR
|
||||
void setAllocationMethod(int allocation_method) {
|
||||
if (allocation_method == SBC_AM_LOUDNESS || allocation_method == SBC_AM_SNR) {
|
||||
this->allocation_method = allocation_method;
|
||||
} else {
|
||||
LOGE("Invalid allocation Method: %d - using SBC_AM_LOUDNESS", allocation_method);
|
||||
this->allocation_method = SBC_AM_LOUDNESS;
|
||||
}
|
||||
}
|
||||
|
||||
/// Restarts the processing
|
||||
bool begin() {
|
||||
TRACEI();
|
||||
is_first = true;
|
||||
is_active = setup();
|
||||
current_codesize = codeSize();
|
||||
buffer.resize(current_codesize);
|
||||
result_buffer.resize(frameLength());
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Ends the processing
|
||||
virtual void end() {
|
||||
TRACEI();
|
||||
sbc_finish(&sbc);
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
virtual const char *mime() { return "audio/sbc"; }
|
||||
|
||||
virtual void setOutput(Print &out_stream) { p_print = &out_stream; }
|
||||
|
||||
operator bool() { return is_active; }
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %d", len);
|
||||
if (!is_active) {
|
||||
LOGE("inactive");
|
||||
return 0;
|
||||
}
|
||||
if (p_print==nullptr){
|
||||
LOGE("output not defined");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// encode bytes
|
||||
for (int j = 0; j < len; j++) {
|
||||
processByte(data[j]);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
|
||||
int bytesUncompressed() {
|
||||
return codeSize();
|
||||
}
|
||||
int bytesCompressed() {
|
||||
return frameLength();
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_print = nullptr;
|
||||
sbc_t sbc;
|
||||
bool is_first = true;
|
||||
bool is_active = false;
|
||||
int current_codesize = 0;
|
||||
int buffer_pos = 0;
|
||||
Vector<uint8_t> buffer{0};
|
||||
Vector<uint8_t> result_buffer{0};
|
||||
int subbands = 4;
|
||||
int blocks = 4;
|
||||
int bitpool = 32;
|
||||
int allocation_method;
|
||||
|
||||
/// Provides the compressed length in bytes (after encoding)
|
||||
int frameLength() { return sbc_get_frame_length(&sbc); }
|
||||
|
||||
/// Provides the uncompressed length (of the PCM data) in bytes
|
||||
int codeSize() { return sbc_get_codesize(&sbc); }
|
||||
|
||||
/// Determines audio information and calls sbc_init;
|
||||
bool setup() {
|
||||
sbc_init(&sbc, 0L);
|
||||
|
||||
if (info.bits_per_sample!=16){
|
||||
LOGE("Invalid bits_per_sample: %d", info.bits_per_sample);
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (info.sample_rate) {
|
||||
case 16000:
|
||||
sbc.frequency = SBC_FREQ_16000;
|
||||
break;
|
||||
case 32000:
|
||||
sbc.frequency = SBC_FREQ_32000;
|
||||
break;
|
||||
case 44100:
|
||||
sbc.frequency = SBC_FREQ_44100;
|
||||
break;
|
||||
case 48000:
|
||||
sbc.frequency = SBC_FREQ_48000;
|
||||
break;
|
||||
default:
|
||||
LOGE("Invalid sample_rate: %d", info.sample_rate);
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (info.channels) {
|
||||
case 1:
|
||||
sbc.mode = SBC_MODE_MONO;
|
||||
break;
|
||||
case 2:
|
||||
sbc.mode = SBC_MODE_STEREO;
|
||||
break;
|
||||
default:
|
||||
LOGE("Invalid channels: %d", info.channels);
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (subbands) {
|
||||
case 4:
|
||||
sbc.subbands = SBC_SB_4;
|
||||
break;
|
||||
case 8:
|
||||
sbc.subbands = SBC_SB_8;
|
||||
break;
|
||||
default:
|
||||
LOGE("Invalid subbands: %d", subbands);
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (blocks) {
|
||||
case 4:
|
||||
sbc.blocks = SBC_BLK_4;
|
||||
break;
|
||||
case 8:
|
||||
sbc.blocks = SBC_BLK_8;
|
||||
break;
|
||||
case 12:
|
||||
sbc.blocks = SBC_BLK_12;
|
||||
break;
|
||||
case 16:
|
||||
sbc.blocks = SBC_BLK_16;
|
||||
break;
|
||||
default:
|
||||
LOGE("Invalid blocks: %d", blocks);
|
||||
return false;
|
||||
}
|
||||
|
||||
sbc.bitpool = bitpool;
|
||||
sbc.allocation = allocation_method;
|
||||
return true;
|
||||
}
|
||||
|
||||
// add byte to decoding buffer and decode if buffer is full
|
||||
void processByte(uint8_t byte) {
|
||||
buffer[buffer_pos++] = byte;
|
||||
if (buffer_pos >= current_codesize) {
|
||||
ssize_t written;
|
||||
// Encodes ONE input block into ONE output block */
|
||||
// ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len,
|
||||
// void *output, size_t output_len, ssize_t *written);
|
||||
sbc_encode(&sbc, &buffer[0], current_codesize, &result_buffer[0],
|
||||
result_buffer.size(), &written);
|
||||
LOGD("sbc_encode: %d -> %d (buffer: %d))", current_codesize, written,
|
||||
result_buffer.size());
|
||||
p_print->write(&result_buffer[0], written);
|
||||
buffer_pos = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
1022
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecTSDemux.h
Normal file
1022
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecTSDemux.h
Normal file
File diff suppressed because it is too large
Load Diff
297
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecVorbis.h
Normal file
297
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecVorbis.h
Normal file
@@ -0,0 +1,297 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "ogg.h"
|
||||
#include "vorbis-tremor.h"
|
||||
|
||||
// #include "AudioTools/AudioCodecs/ContainerOgg.h"
|
||||
// #include "ivorbiscodec.h"
|
||||
// #include "ivorbisfile.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
#ifndef VARBIS_MAX_READ_SIZE
|
||||
#define VARBIS_MAX_READ_SIZE 1024
|
||||
#endif
|
||||
|
||||
#define VORBIS_HEADER_OPEN_LIMIT 1024
|
||||
|
||||
/**
|
||||
* @brief Vorbis Streaming Decoder using
|
||||
* https://github.com/pschatzmann/arduino-libvorbis-tremor
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class VorbisDecoder : public StreamingDecoder {
|
||||
public:
|
||||
VorbisDecoder() = default;
|
||||
|
||||
/// Destroy the VorbisDecoder object
|
||||
~VorbisDecoder() {
|
||||
if (active) {
|
||||
end();
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts the processing
|
||||
bool begin() override {
|
||||
LOGI("begin");
|
||||
|
||||
// Ensure we start with clean state
|
||||
if (active) {
|
||||
LOGW("Decoder already active, calling end() first");
|
||||
end();
|
||||
}
|
||||
|
||||
callbacks.read_func = read_func;
|
||||
callbacks.seek_func = seek_func;
|
||||
callbacks.close_func = nullptr;
|
||||
callbacks.tell_func = tell_func;
|
||||
|
||||
assert(p_input != nullptr);
|
||||
if (p_input->available() < VORBIS_HEADER_OPEN_LIMIT) {
|
||||
delay(delay_wait_for_data_ms);
|
||||
}
|
||||
LOGI("available: %d", p_input->available());
|
||||
|
||||
is_ov_open = ovOpen();
|
||||
LOGI("ovOpen result: %d", is_ov_open);
|
||||
|
||||
active = is_ov_open;
|
||||
return is_ov_open;
|
||||
}
|
||||
|
||||
/// Releases the reserved memory
|
||||
void end() override {
|
||||
LOGI("end");
|
||||
if (is_ov_open && active) {
|
||||
ov_clear(&file);
|
||||
LOGI("ov_clear completed");
|
||||
}
|
||||
is_ov_open = false;
|
||||
is_first = true;
|
||||
active = false;
|
||||
pcm.clear(); // Free the PCM buffer
|
||||
}
|
||||
|
||||
/// Provides the last available MP3FrameInfo
|
||||
AudioInfo audioInfo() override { return cfg; }
|
||||
|
||||
/// checks if the class is active
|
||||
virtual operator bool() override { return active; }
|
||||
|
||||
virtual bool copy() override {
|
||||
TRACED();
|
||||
|
||||
// open if not already done
|
||||
if (!is_ov_open) {
|
||||
if (!ovOpen()) {
|
||||
LOGE("Failed to open Vorbis stream");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Defensive checks before calling Vorbis functions
|
||||
if (pcm.data() == nullptr) {
|
||||
LOGE("PCM buffer is null - memory allocation failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (pcm.size() == 0) {
|
||||
LOGE("PCM buffer size is 0");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Additional sanity check for the file structure
|
||||
if (!active) {
|
||||
LOGE("Decoder is not active");
|
||||
return false;
|
||||
}
|
||||
|
||||
LOGD("ov_read: buffer size %d", pcm.size());
|
||||
bitstream = 0;
|
||||
|
||||
// Call ov_read with additional error checking
|
||||
long result = ov_read(&file, (char *)pcm.data(), pcm.size(), &bitstream);
|
||||
LOGI("copy result: %d", (int)result);
|
||||
|
||||
if (result > 0) {
|
||||
AudioInfo current = currentInfo();
|
||||
if (current != cfg) {
|
||||
cfg = current;
|
||||
cfg.logInfo();
|
||||
notifyAudioChange(cfg);
|
||||
}
|
||||
|
||||
if (p_print != nullptr) {
|
||||
p_print->write(pcm.data(), result);
|
||||
} else {
|
||||
LOGE("Output stream is null");
|
||||
return false;
|
||||
}
|
||||
delay(1);
|
||||
return true;
|
||||
} else {
|
||||
if (result == 0 || result == -3) {
|
||||
// data interruption
|
||||
LOGD("copy: %d - %s", (int)result, readError(result));
|
||||
} else {
|
||||
LOGE("copy: %d - %s", (int)result, readError(result));
|
||||
}
|
||||
delay(delay_on_no_data_ms);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides "audio/ogg"
|
||||
const char *mime() override { return "audio/vorbis+ogg"; }
|
||||
|
||||
/// Defines the delay when there is no data
|
||||
void setDelayOnNoData(size_t delay) { delay_on_no_data_ms = delay; }
|
||||
|
||||
/// Defines the delay to wait if there is not enough data to open the decoder
|
||||
void setWaitForData(size_t wait) { delay_wait_for_data_ms = wait; }
|
||||
|
||||
/// Defines the default read size
|
||||
void setReadSize(size_t size) {
|
||||
max_read_size = size;
|
||||
// Ensure we don't set an unreasonably large size
|
||||
if (max_read_size > 8192) {
|
||||
LOGW("Read size %zu is very large, consider smaller buffer",
|
||||
max_read_size);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
AudioInfo cfg;
|
||||
Vector<uint8_t> pcm{0};
|
||||
OggVorbis_File file;
|
||||
ov_callbacks callbacks;
|
||||
int bitstream = 0;
|
||||
size_t delay_on_no_data_ms = 100;
|
||||
size_t delay_wait_for_data_ms = 500;
|
||||
size_t max_read_size = VARBIS_MAX_READ_SIZE;
|
||||
bool active = false;
|
||||
bool is_first = true;
|
||||
bool is_ov_open = false;
|
||||
|
||||
bool ovOpen() {
|
||||
pcm.resize(max_read_size);
|
||||
checkMemory(true);
|
||||
int rc = ov_open_callbacks(this, &file, nullptr, 0, callbacks);
|
||||
if (rc < 0) {
|
||||
LOGE("ov_open_callbacks failed with error %d: %s", rc, getOpenError(rc));
|
||||
} else {
|
||||
LOGI("ov_open_callbacks succeeded");
|
||||
is_ov_open = true;
|
||||
}
|
||||
checkMemory(true);
|
||||
return is_ov_open;
|
||||
}
|
||||
|
||||
AudioInfo currentInfo() {
|
||||
AudioInfo result;
|
||||
if (!is_ov_open) {
|
||||
LOGE("Cannot get audio info - stream not open");
|
||||
return result;
|
||||
}
|
||||
|
||||
vorbis_info *info = ov_info(&file, -1);
|
||||
if (info == nullptr) {
|
||||
LOGE("ov_info returned null pointer");
|
||||
return result;
|
||||
}
|
||||
|
||||
result.sample_rate = info->rate;
|
||||
result.channels = info->channels;
|
||||
result.bits_per_sample = 16;
|
||||
|
||||
LOGD("Audio info - rate: %d, channels: %d", info->rate, info->channels);
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual size_t readBytes(uint8_t *data, size_t len) override {
|
||||
size_t read_size = min(len, (size_t)max_read_size);
|
||||
size_t result = p_input->readBytes((uint8_t *)data, read_size);
|
||||
LOGD("readBytes: %zu", result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t read_func(void *ptr, size_t size, size_t nmemb,
|
||||
void *datasource) {
|
||||
VorbisDecoder *self = (VorbisDecoder *)datasource;
|
||||
assert(datasource != nullptr);
|
||||
size_t result = self->readBytes((uint8_t *)ptr, size * nmemb);
|
||||
LOGD("read_func: %d -> %d", size * nmemb, (int)result);
|
||||
return result;
|
||||
}
|
||||
|
||||
static int seek_func(void *datasource, ogg_int64_t offset, int whence) {
|
||||
VorbisDecoder *self = (VorbisDecoder *)datasource;
|
||||
return -1;
|
||||
}
|
||||
|
||||
static long tell_func(void *datasource) {
|
||||
VorbisDecoder *self = (VorbisDecoder *)datasource;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// static int close_func(void *datasource) {
|
||||
// VorbisDecoder *self = (VorbisDecoder *)datasource;
|
||||
// self->end();
|
||||
// return 0;
|
||||
// }
|
||||
|
||||
const char *readError(long error) {
|
||||
if (error >= 0) {
|
||||
return "OK";
|
||||
}
|
||||
switch (error) {
|
||||
case OV_HOLE:
|
||||
return "Interruption in the data";
|
||||
case OV_EBADLINK:
|
||||
return "Invalid stream section";
|
||||
case OV_EREAD:
|
||||
return "Read error";
|
||||
case OV_EFAULT:
|
||||
return "Internal fault";
|
||||
case OV_EIMPL:
|
||||
return "Unimplemented feature";
|
||||
case OV_EINVAL:
|
||||
return "Invalid argument";
|
||||
case OV_ENOTVORBIS:
|
||||
return "Not a Vorbis file";
|
||||
case OV_EBADHEADER:
|
||||
return "Invalid Vorbis header";
|
||||
case OV_EVERSION:
|
||||
return "Vorbis version mismatch";
|
||||
case OV_ENOSEEK:
|
||||
return "Stream not seekable";
|
||||
default:
|
||||
return "Unknown error";
|
||||
}
|
||||
}
|
||||
|
||||
const char *getOpenError(int error) {
|
||||
switch (error) {
|
||||
case 0:
|
||||
return "Success";
|
||||
case OV_EREAD:
|
||||
return "Read from media error";
|
||||
case OV_ENOTVORBIS:
|
||||
return "Not Vorbis data";
|
||||
case OV_EVERSION:
|
||||
return "Vorbis version mismatch";
|
||||
case OV_EBADHEADER:
|
||||
return "Invalid Vorbis bitstream header";
|
||||
case OV_EFAULT:
|
||||
return "Internal logic fault";
|
||||
default:
|
||||
return "Unknown open error";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
687
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecWAV.h
Normal file
687
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecWAV.h
Normal file
@@ -0,0 +1,687 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/AudioEncoded.h"
|
||||
#include "AudioTools/AudioCodecs/AudioFormat.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
|
||||
#define READ_BUFFER_SIZE 512
|
||||
#define MAX_WAV_HEADER_LEN 200
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Sound information which is available in the WAV header
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*
|
||||
*/
|
||||
struct WAVAudioInfo : AudioInfo {
|
||||
WAVAudioInfo() = default;
|
||||
WAVAudioInfo(const AudioInfo &from) {
|
||||
sample_rate = from.sample_rate;
|
||||
channels = from.channels;
|
||||
bits_per_sample = from.bits_per_sample;
|
||||
}
|
||||
|
||||
AudioFormat format = AudioFormat::PCM;
|
||||
int byte_rate = 0;
|
||||
int block_align = 0;
|
||||
bool is_streamed = true;
|
||||
bool is_valid = false;
|
||||
uint32_t data_length = 0;
|
||||
uint32_t file_size = 0;
|
||||
int offset = 0;
|
||||
};
|
||||
|
||||
static const char *wav_mime = "audio/wav";
|
||||
|
||||
/**
|
||||
* @brief Parser for Wav header data
|
||||
* for details see https://de.wikipedia.org/wiki/RIFF_WAVE
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*
|
||||
*/
|
||||
class WAVHeader {
|
||||
public:
|
||||
WAVHeader() = default;
|
||||
|
||||
/// Adds data to the 44 byte wav header data buffer and make it available for
|
||||
/// parsing
|
||||
int write(uint8_t *data, size_t data_len) {
|
||||
return buffer.writeArray(data, data_len);
|
||||
}
|
||||
|
||||
/// Call begin when header data is complete to parse the data
|
||||
bool parse() {
|
||||
LOGI("WAVHeader::begin: %u", (unsigned)buffer.available());
|
||||
this->data_pos = 0l;
|
||||
memset((void *)&headerInfo, 0, sizeof(WAVAudioInfo));
|
||||
|
||||
if (!setPos("RIFF")) return false;
|
||||
headerInfo.file_size = read_int32();
|
||||
if (!setPos("WAVE")) return false;
|
||||
if (!setPos("fmt ")) return false;
|
||||
int fmt_length = read_int32();
|
||||
headerInfo.format = (AudioFormat)read_int16();
|
||||
headerInfo.channels = read_int16();
|
||||
headerInfo.sample_rate = read_int32();
|
||||
headerInfo.byte_rate = read_int32();
|
||||
headerInfo.block_align = read_int16();
|
||||
headerInfo.bits_per_sample = read_int16();
|
||||
if (!setPos("data")) return false;
|
||||
headerInfo.data_length = read_int32();
|
||||
if (headerInfo.data_length == 0 || headerInfo.data_length >= 0x7fff0000) {
|
||||
headerInfo.is_streamed = true;
|
||||
headerInfo.data_length = ~0;
|
||||
}
|
||||
|
||||
logInfo();
|
||||
buffer.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if the header is complete (containd data tag)
|
||||
bool isDataComplete() {
|
||||
int pos = getDataPos();
|
||||
return pos > 0 && buffer.available() >= pos;
|
||||
}
|
||||
|
||||
/// number of bytes available in the header buffer
|
||||
size_t available() { return buffer.available(); }
|
||||
|
||||
/// Determines the data start position using the data tag
|
||||
int getDataPos() {
|
||||
int pos =
|
||||
StrView((char *)buffer.data(), MAX_WAV_HEADER_LEN, buffer.available())
|
||||
.indexOf("data");
|
||||
return pos > 0 ? pos + 8 : 0;
|
||||
}
|
||||
|
||||
/// provides the info from the header
|
||||
WAVAudioInfo &audioInfo() { return headerInfo; }
|
||||
|
||||
/// Sets the info in the header
|
||||
void setAudioInfo(WAVAudioInfo info) { headerInfo = info; }
|
||||
|
||||
/// Just write a wav header to the indicated outputbu
|
||||
int writeHeader(Print *out) {
|
||||
writeRiffHeader(buffer);
|
||||
writeFMT(buffer);
|
||||
writeDataHeader(buffer);
|
||||
int len = buffer.available();
|
||||
out->write(buffer.data(), buffer.available());
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Reset internal stored header information and buffer
|
||||
void clear() {
|
||||
data_pos = 0;
|
||||
WAVAudioInfo empty;
|
||||
empty.sample_rate = 0;
|
||||
empty.channels = 0;
|
||||
empty.bits_per_sample = 0;
|
||||
headerInfo = empty;
|
||||
buffer.setClearWithZero(true);
|
||||
buffer.reset();
|
||||
}
|
||||
|
||||
/// Debug helper: dumps header bytes as printable characters
|
||||
void dumpHeader() {
|
||||
char msg[buffer.available() + 1];
|
||||
memset(msg, 0, buffer.available() + 1);
|
||||
for (int j = 0; j < buffer.available(); j++) {
|
||||
char c = (char)buffer.data()[j];
|
||||
if (!isalpha(c)) {
|
||||
c = '.';
|
||||
}
|
||||
msg[j] = c;
|
||||
}
|
||||
LOGI("Header: %s", msg);
|
||||
}
|
||||
|
||||
protected:
|
||||
struct WAVAudioInfo headerInfo;
|
||||
SingleBuffer<uint8_t> buffer{MAX_WAV_HEADER_LEN};
|
||||
size_t data_pos = 0;
|
||||
|
||||
bool setPos(const char *id) {
|
||||
int id_len = strlen(id);
|
||||
int pos = indexOf(id);
|
||||
if (pos < 0) return false;
|
||||
data_pos = pos + id_len;
|
||||
return true;
|
||||
}
|
||||
|
||||
int indexOf(const char *str) {
|
||||
return StrView((char *)buffer.data(), MAX_WAV_HEADER_LEN,
|
||||
buffer.available())
|
||||
.indexOf(str);
|
||||
}
|
||||
|
||||
uint32_t read_tag() {
|
||||
uint32_t tag = 0;
|
||||
tag = (tag << 8) | getChar();
|
||||
tag = (tag << 8) | getChar();
|
||||
tag = (tag << 8) | getChar();
|
||||
tag = (tag << 8) | getChar();
|
||||
return tag;
|
||||
}
|
||||
|
||||
uint32_t getChar32() { return getChar(); }
|
||||
|
||||
uint32_t read_int32() {
|
||||
uint32_t value = 0;
|
||||
value |= getChar32() << 0;
|
||||
value |= getChar32() << 8;
|
||||
value |= getChar32() << 16;
|
||||
value |= getChar32() << 24;
|
||||
return value;
|
||||
}
|
||||
|
||||
uint16_t read_int16() {
|
||||
uint16_t value = 0;
|
||||
value |= getChar() << 0;
|
||||
value |= getChar() << 8;
|
||||
return value;
|
||||
}
|
||||
|
||||
void skip(int n) {
|
||||
int i;
|
||||
for (i = 0; i < n; i++) getChar();
|
||||
}
|
||||
|
||||
int getChar() {
|
||||
if (data_pos < buffer.size())
|
||||
return buffer.data()[data_pos++];
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
void seek(long int offset, int origin) {
|
||||
if (origin == SEEK_SET) {
|
||||
data_pos = offset;
|
||||
} else if (origin == SEEK_CUR) {
|
||||
data_pos += offset;
|
||||
}
|
||||
}
|
||||
|
||||
size_t tell() { return data_pos; }
|
||||
|
||||
bool eof() { return data_pos >= buffer.size() - 1; }
|
||||
|
||||
void logInfo() {
|
||||
LOGI("WAVHeader sound_pos: %d", getDataPos());
|
||||
LOGI("WAVHeader channels: %d ", headerInfo.channels);
|
||||
LOGI("WAVHeader bits_per_sample: %d", headerInfo.bits_per_sample);
|
||||
LOGI("WAVHeader sample_rate: %d ", (int)headerInfo.sample_rate);
|
||||
LOGI("WAVHeader format: %d", (int)headerInfo.format);
|
||||
}
|
||||
|
||||
void writeRiffHeader(BaseBuffer<uint8_t> &buffer) {
|
||||
buffer.writeArray((uint8_t *)"RIFF", 4);
|
||||
write32(buffer, headerInfo.file_size - 8);
|
||||
buffer.writeArray((uint8_t *)"WAVE", 4);
|
||||
}
|
||||
|
||||
void writeFMT(BaseBuffer<uint8_t> &buffer) {
|
||||
uint16_t fmt_len = 16;
|
||||
buffer.writeArray((uint8_t *)"fmt ", 4);
|
||||
write32(buffer, fmt_len);
|
||||
write16(buffer, (uint16_t)headerInfo.format); // PCM
|
||||
write16(buffer, headerInfo.channels);
|
||||
write32(buffer, headerInfo.sample_rate);
|
||||
write32(buffer, headerInfo.byte_rate);
|
||||
write16(buffer, headerInfo.block_align); // frame size
|
||||
write16(buffer, headerInfo.bits_per_sample);
|
||||
}
|
||||
|
||||
void write32(BaseBuffer<uint8_t> &buffer, uint64_t value) {
|
||||
buffer.writeArray((uint8_t *)&value, 4);
|
||||
}
|
||||
|
||||
void write16(BaseBuffer<uint8_t> &buffer, uint16_t value) {
|
||||
buffer.writeArray((uint8_t *)&value, 2);
|
||||
}
|
||||
|
||||
void writeDataHeader(BaseBuffer<uint8_t> &buffer) {
|
||||
buffer.writeArray((uint8_t *)"data", 4);
|
||||
write32(buffer, headerInfo.file_size);
|
||||
int offset = headerInfo.offset;
|
||||
if (offset > 0) {
|
||||
uint8_t empty[offset];
|
||||
memset(empty, 0, offset);
|
||||
buffer.writeArray(empty, offset); // resolve issue with wrong aligment
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A simple WAVDecoder: We parse the header data on the first record to
|
||||
* determine the format. If no AudioDecoderExt is specified we just write the
|
||||
* PCM data to the output that is defined by calling setOutput(). You can define
|
||||
* a ADPCM decoder to decode WAV files that contain ADPCM data.
|
||||
*
|
||||
* Optionally, if the input WAV file contains 8-bit PCM data, you can enable automatic
|
||||
* conversion to 16-bit PCM output by calling setConvert8to16(true). This will convert
|
||||
* unsigned 8-bit samples to signed 16-bit samples before writing to the output stream,
|
||||
* and the reported bits_per_sample in audioInfo() will be 16 when conversion is active.
|
||||
* The same is valid for the 24 bit conversion which converts 24 bit (3 byte) to 32 bit
|
||||
* (4 byte).
|
||||
*
|
||||
* Please note that you need to call begin() everytime you process a new file to let the decoder
|
||||
* know that we start with a new header.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class WAVDecoder : public AudioDecoder {
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new WAVDecoder object for PCM data
|
||||
*/
|
||||
WAVDecoder() = default;
|
||||
|
||||
/**
|
||||
* @brief Construct a new WAVDecoder object for ADPCM data
|
||||
*
|
||||
*/
|
||||
WAVDecoder(AudioDecoderExt &dec, AudioFormat fmt) { setDecoder(dec, fmt); }
|
||||
|
||||
/// Defines an optional decoder if the format is not PCM
|
||||
void setDecoder(AudioDecoderExt &dec, AudioFormat fmt) {
|
||||
TRACED();
|
||||
decoder_format = fmt;
|
||||
p_decoder = &dec;
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) override { this->p_print = &out_stream; }
|
||||
|
||||
/// Prepare decoder for a new WAV stream
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
header.clear();
|
||||
setupEncodedAudio();
|
||||
byte_buffer.reset();
|
||||
buffer24.reset();
|
||||
isFirst = true;
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Finish decoding and release temporary buffers
|
||||
void end() override {
|
||||
TRACED();
|
||||
byte_buffer.reset();
|
||||
buffer24.reset();
|
||||
active = false;
|
||||
}
|
||||
|
||||
/// Provides MIME type "audio/wav"
|
||||
const char *mime() { return wav_mime; }
|
||||
|
||||
/// Extended WAV specific info (original header values)
|
||||
WAVAudioInfo &audioInfoEx() { return header.audioInfo(); }
|
||||
|
||||
/// Exposed AudioInfo (may reflect conversion flags)
|
||||
AudioInfo audioInfo() override {
|
||||
WAVAudioInfo info = header.audioInfo();
|
||||
if (convert8to16 && info.format == AudioFormat::PCM &&
|
||||
info.bits_per_sample == 8) {
|
||||
info.bits_per_sample = 16;
|
||||
}
|
||||
// 32 bits gives better result
|
||||
if (convert24 && info.format == AudioFormat::PCM &&
|
||||
info.bits_per_sample == 24) {
|
||||
info.bits_per_sample = 32;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
/// Write incoming WAV data (header + PCM) into output
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
TRACED();
|
||||
size_t result = 0;
|
||||
if (active) {
|
||||
if (isFirst) {
|
||||
int data_start = decodeHeader((uint8_t *)data, len);
|
||||
// we do not have the complete header yet: need more data
|
||||
if (data_start == 0) return len;
|
||||
// process the outstanding data
|
||||
result = data_start +
|
||||
write_out((uint8_t *)data + data_start, len - data_start);
|
||||
|
||||
} else if (isValid) {
|
||||
result = write_out((uint8_t *)data, len);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Check if the decoder is active
|
||||
virtual operator bool() override { return active; }
|
||||
|
||||
/// Convert 8 bit to 16 bit PCM data (default: enabled)
|
||||
void setConvert8Bit(bool enable) {
|
||||
convert8to16 = enable;
|
||||
}
|
||||
|
||||
/// Convert 24 bit (3 byte) to 32 bit (4 byte) PCM data (default: enabled)
|
||||
void setConvert24Bit(bool enable) {
|
||||
convert24 = enable;
|
||||
}
|
||||
|
||||
protected:
|
||||
WAVHeader header;
|
||||
bool isFirst = true;
|
||||
bool isValid = true;
|
||||
bool active = false;
|
||||
AudioFormat decoder_format = AudioFormat::PCM;
|
||||
AudioDecoderExt *p_decoder = nullptr;
|
||||
EncodedAudioOutput dec_out;
|
||||
SingleBuffer<uint8_t> byte_buffer{0};
|
||||
SingleBuffer<int32_t> buffer24{0};
|
||||
bool convert8to16 = true; // Optional conversion flag
|
||||
bool convert24 = true; // Optional conversion flag
|
||||
const size_t batch_size = 256;
|
||||
|
||||
Print &out() { return p_decoder == nullptr ? *p_print : dec_out; }
|
||||
|
||||
virtual size_t write_out(const uint8_t *in_ptr, size_t in_size) {
|
||||
// check if we need to convert int24 data from 3 bytes to 4 bytes
|
||||
size_t result = 0;
|
||||
if (convert24 && header.audioInfo().format == AudioFormat::PCM &&
|
||||
header.audioInfo().bits_per_sample == 24 && sizeof(int24_t) == 4) {
|
||||
write_out_24(in_ptr, in_size);
|
||||
result = in_size;
|
||||
} else if (convert8to16 && header.audioInfo().format == AudioFormat::PCM &&
|
||||
header.audioInfo().bits_per_sample == 8) {
|
||||
result = write_out_8to16(in_ptr, in_size);
|
||||
} else {
|
||||
result = out().write(in_ptr, in_size);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Convert 8-bit PCM to 16-bit PCM and write out
|
||||
size_t write_out_8to16(const uint8_t *in_ptr, size_t in_size) {
|
||||
size_t total_written = 0;
|
||||
size_t samples_remaining = in_size;
|
||||
size_t offset = 0;
|
||||
int16_t out_buf[batch_size];
|
||||
while (samples_remaining > 0) {
|
||||
size_t current_batch =
|
||||
samples_remaining > batch_size ? batch_size : samples_remaining;
|
||||
for (size_t i = 0; i < current_batch; ++i) {
|
||||
out_buf[i] = ((int16_t)in_ptr[offset + i] - 128) << 8;
|
||||
}
|
||||
writeDataT<int16_t>(&out(), out_buf, current_batch);
|
||||
offset += current_batch;
|
||||
samples_remaining -= current_batch;
|
||||
}
|
||||
return in_size;
|
||||
}
|
||||
|
||||
/// convert 3 byte int24 to 4 byte int32
|
||||
size_t write_out_24(const uint8_t *in_ptr, size_t in_size) {
|
||||
// store 1 sample
|
||||
buffer24.resize(batch_size);
|
||||
byte_buffer.resize(3);
|
||||
|
||||
for (size_t i = 0; i < in_size; i++) {
|
||||
// Add byte to buffer
|
||||
byte_buffer.write(in_ptr[i]);
|
||||
|
||||
// Process complete sample when buffer is full
|
||||
if (byte_buffer.isFull()) {
|
||||
int24_3bytes_t sample24{byte_buffer.data()};
|
||||
int32_t converted_sample = sample24.scale32();
|
||||
buffer24.write(converted_sample);
|
||||
if (buffer24.isFull()) {
|
||||
writeDataT<int32_t>(&out(), buffer24.data(), buffer24.available());
|
||||
buffer24.reset();
|
||||
}
|
||||
byte_buffer.reset();
|
||||
}
|
||||
}
|
||||
|
||||
return in_size;
|
||||
}
|
||||
|
||||
|
||||
/// Decodes the header data: Returns the start pos of the data
|
||||
int decodeHeader(uint8_t *in_ptr, size_t in_size) {
|
||||
int result = in_size;
|
||||
// we expect at least the full header
|
||||
int written = header.write(in_ptr, in_size);
|
||||
if (!header.isDataComplete()) {
|
||||
LOGW("WAV header misses 'data' section in len: %d",
|
||||
(int)header.available());
|
||||
header.dumpHeader();
|
||||
return 0;
|
||||
}
|
||||
// parse header
|
||||
if (!header.parse()) {
|
||||
LOGE("WAV header parsing failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
isFirst = false;
|
||||
isValid = header.audioInfo().is_valid;
|
||||
|
||||
LOGI("WAV sample_rate: %d", (int)header.audioInfo().sample_rate);
|
||||
LOGI("WAV data_length: %u", (unsigned)header.audioInfo().data_length);
|
||||
LOGI("WAV is_streamed: %d", header.audioInfo().is_streamed);
|
||||
LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" : "false");
|
||||
|
||||
// check format
|
||||
AudioFormat format = header.audioInfo().format;
|
||||
isValid = format == decoder_format;
|
||||
if (isValid) {
|
||||
// update blocksize
|
||||
if (p_decoder != nullptr) {
|
||||
int block_size = header.audioInfo().block_align;
|
||||
p_decoder->setBlockSize(block_size);
|
||||
}
|
||||
|
||||
// update sampling rate if the target supports it
|
||||
AudioInfo bi = audioInfo();
|
||||
notifyAudioChange(bi);
|
||||
} else {
|
||||
LOGE("WAV format not supported: %d", (int)format);
|
||||
}
|
||||
return header.getDataPos();
|
||||
}
|
||||
|
||||
void setupEncodedAudio() {
|
||||
if (p_decoder != nullptr) {
|
||||
assert(p_print != nullptr);
|
||||
dec_out.setOutput(p_print);
|
||||
dec_out.setDecoder(p_decoder);
|
||||
dec_out.begin(info);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A simple WAV file encoder. If no AudioEncoderExt is specified the WAV
|
||||
* file contains PCM data, otherwise it is encoded as ADPCM. The WAV header is
|
||||
* written with the first writing of audio data. Calling begin() is making sure
|
||||
* that the header is written again.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class WAVEncoder : public AudioEncoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new WAVEncoder object for PCM data
|
||||
*/
|
||||
WAVEncoder() = default;
|
||||
|
||||
/**
|
||||
* @brief Construct a new WAVEncoder object for ADPCM data
|
||||
*/
|
||||
WAVEncoder(AudioEncoderExt &enc, AudioFormat fmt) { setEncoder(enc, fmt); };
|
||||
|
||||
/// Associates an external encoder for non-PCM formats
|
||||
void setEncoder(AudioEncoderExt &enc, AudioFormat fmt) {
|
||||
TRACED();
|
||||
wav_info.format = fmt;
|
||||
p_encoder = &enc;
|
||||
}
|
||||
|
||||
/// Defines the otuput stream
|
||||
void setOutput(Print &out) override {
|
||||
TRACED();
|
||||
p_print = &out;
|
||||
}
|
||||
|
||||
/// Provides "audio/wav"
|
||||
const char *mime() override { return wav_mime; }
|
||||
|
||||
/// Provides the default configuration
|
||||
WAVAudioInfo defaultConfig() {
|
||||
WAVAudioInfo info;
|
||||
info.format = AudioFormat::PCM;
|
||||
info.sample_rate = DEFAULT_SAMPLE_RATE;
|
||||
info.bits_per_sample = DEFAULT_BITS_PER_SAMPLE;
|
||||
info.channels = DEFAULT_CHANNELS;
|
||||
info.is_streamed = true;
|
||||
info.is_valid = true;
|
||||
info.data_length = 0x7fff0000;
|
||||
info.file_size = info.data_length + 36;
|
||||
return info;
|
||||
}
|
||||
|
||||
/// Update actual WAVAudioInfo
|
||||
virtual void setAudioInfo(AudioInfo from) override {
|
||||
wav_info.sample_rate = from.sample_rate;
|
||||
wav_info.channels = from.channels;
|
||||
wav_info.bits_per_sample = from.bits_per_sample;
|
||||
// recalculate byte rate, block align...
|
||||
setAudioInfo(wav_info);
|
||||
}
|
||||
|
||||
/// Defines the WAVAudioInfo
|
||||
virtual void setAudioInfo(WAVAudioInfo ai) {
|
||||
AudioEncoder::setAudioInfo(ai);
|
||||
if (p_encoder) p_encoder->setAudioInfo(ai);
|
||||
wav_info = ai;
|
||||
LOGI("sample_rate: %d", (int)wav_info.sample_rate);
|
||||
LOGI("channels: %d", wav_info.channels);
|
||||
// bytes per second
|
||||
wav_info.byte_rate = wav_info.sample_rate * wav_info.channels *
|
||||
wav_info.bits_per_sample / 8;
|
||||
if (wav_info.format == AudioFormat::PCM) {
|
||||
wav_info.block_align =
|
||||
wav_info.bits_per_sample / 8 * wav_info.channels;
|
||||
}
|
||||
if (wav_info.is_streamed || wav_info.data_length == 0 ||
|
||||
wav_info.data_length >= 0x7fff0000) {
|
||||
LOGI("is_streamed! because length is %u",
|
||||
(unsigned)wav_info.data_length);
|
||||
wav_info.is_streamed = true;
|
||||
wav_info.data_length = ~0;
|
||||
} else {
|
||||
size_limit = wav_info.data_length;
|
||||
LOGI("size_limit is %d", (int)size_limit);
|
||||
}
|
||||
}
|
||||
|
||||
/// starts the processing
|
||||
bool begin(WAVAudioInfo ai) {
|
||||
header.clear();
|
||||
setAudioInfo(ai);
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// starts the processing using the actual WAVAudioInfo
|
||||
virtual bool begin() override {
|
||||
TRACED();
|
||||
setupEncodedAudio();
|
||||
header_written = false;
|
||||
is_open = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override { is_open = false; }
|
||||
|
||||
/// Writes PCM data to be encoded as WAV
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (!is_open) {
|
||||
LOGE("The WAVEncoder is not open - please call begin()");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (p_print == nullptr) {
|
||||
LOGE("No output stream was provided");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!header_written) {
|
||||
LOGI("Writing Header");
|
||||
header.setAudioInfo(wav_info);
|
||||
int len = header.writeHeader(p_print);
|
||||
wav_info.file_size -= len;
|
||||
header_written = true;
|
||||
}
|
||||
|
||||
int32_t result = 0;
|
||||
Print *p_out = p_encoder == nullptr ? p_print : &enc_out;
|
||||
;
|
||||
if (wav_info.is_streamed) {
|
||||
result = p_out->write((uint8_t *)data, len);
|
||||
} else if (size_limit > 0) {
|
||||
size_t write_size = min((size_t)len, (size_t)size_limit);
|
||||
result = p_out->write((uint8_t *)data, write_size);
|
||||
size_limit -= result;
|
||||
|
||||
if (size_limit <= 0) {
|
||||
LOGI("The defined size was written - so we close the WAVEncoder now");
|
||||
is_open = false;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Check if encoder is active and ready to write
|
||||
operator bool() override { return is_open; }
|
||||
|
||||
/// Check if encoder is open
|
||||
bool isOpen() { return is_open; }
|
||||
|
||||
/// Adds n empty bytes at the beginning of the data
|
||||
void setDataOffset(uint16_t offset) { wav_info.offset = offset; }
|
||||
|
||||
protected:
|
||||
WAVHeader header;
|
||||
Print *p_print = nullptr; // final output CopyEncoder copy; // used for PCM
|
||||
AudioEncoderExt *p_encoder = nullptr;
|
||||
EncodedAudioOutput enc_out;
|
||||
WAVAudioInfo wav_info = defaultConfig();
|
||||
int64_t size_limit = 0;
|
||||
bool header_written = false;
|
||||
volatile bool is_open = false;
|
||||
|
||||
void setupEncodedAudio() {
|
||||
if (p_encoder != nullptr) {
|
||||
assert(p_print != nullptr);
|
||||
enc_out.setOutput(p_print);
|
||||
enc_out.setEncoder(p_encoder);
|
||||
enc_out.setAudioInfo(wav_info);
|
||||
enc_out.begin();
|
||||
// block size only available after begin(): update block size
|
||||
wav_info.block_align = p_encoder->blockSize();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
502
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecWavIMA.h
Normal file
502
libraries/audio-tools/src/AudioTools/AudioCodecs/CodecWavIMA.h
Normal file
@@ -0,0 +1,502 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
|
||||
#define WAVE_FORMAT_IMA_ADPCM 0x0011
|
||||
#define TAG(a, b, c, d) ((static_cast<uint32_t>(a) << 24) | (static_cast<uint32_t>(b) << 16) | (static_cast<uint32_t>(c) << 8) | (d))
|
||||
#define READ_BUFFER_SIZE 512
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
const int16_t ima_index_table[16] {
|
||||
-1, -1, -1, -1, 2, 4, 6, 8,
|
||||
-1, -1, -1, -1, 2, 4, 6, 8
|
||||
};
|
||||
|
||||
const int32_t ima_step_table[89] {
|
||||
7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
|
||||
19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
|
||||
50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
|
||||
130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
|
||||
337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
|
||||
876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
|
||||
2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
|
||||
5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
|
||||
15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Sound information which is available in the WAV header - adjusted for IMA ADPCM
|
||||
* @author Phil Schatzmann
|
||||
* @author Norman Ritz
|
||||
* @copyright GPLv3
|
||||
*
|
||||
*/
|
||||
struct WavIMAAudioInfo : AudioInfo {
|
||||
WavIMAAudioInfo() = default;
|
||||
WavIMAAudioInfo(const AudioInfo& from) {
|
||||
sample_rate = from.sample_rate;
|
||||
channels = from.channels;
|
||||
bits_per_sample = from.bits_per_sample;
|
||||
}
|
||||
|
||||
int format = WAVE_FORMAT_IMA_ADPCM;
|
||||
int byte_rate = 0;
|
||||
int block_align = 0;
|
||||
int frames_per_block = 0;
|
||||
int num_samples = 0;
|
||||
bool is_valid = false;
|
||||
uint32_t data_length = 0;
|
||||
uint32_t file_size = 0;
|
||||
};
|
||||
|
||||
struct IMAState {
|
||||
int32_t predictor = 0;
|
||||
int step_index = 0;
|
||||
};
|
||||
|
||||
const char* wav_ima_mime = "audio/x-wav";
|
||||
|
||||
/**
|
||||
* @brief Parser for Wav header data adjusted for IMA ADPCM format - partially based on CodecWAV.h
|
||||
* for details see https://de.wikipedia.org/wiki/RIFF_WAVE
|
||||
* @author Phil Schatzmann
|
||||
* @author Norman Ritz
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
IMA_ERR_INVALID_CHUNK = -2,
|
||||
IMA_ERR_INVALID_CONTAINER,
|
||||
IMA_CHUNK_OK,
|
||||
IMA_CHUNK_UNKNOWN
|
||||
} chunk_result;
|
||||
|
||||
class WavIMAHeader {
|
||||
public:
|
||||
WavIMAHeader() {
|
||||
clearHeader();
|
||||
};
|
||||
|
||||
void clearHeader() {
|
||||
data_pos = 0;
|
||||
memset((void*)&headerInfo, 0, sizeof(WavIMAAudioInfo));
|
||||
headerInfo.is_valid = false;
|
||||
header_complete = false;
|
||||
chunk_len = 0;
|
||||
max_chunk_len = 8;
|
||||
skip_len = 0;
|
||||
isFirstChunk = true;
|
||||
}
|
||||
|
||||
chunk_result parseChunk() {
|
||||
data_pos = 0;
|
||||
bool chunkUnknown = false;
|
||||
uint32_t tag = read_tag();
|
||||
uint32_t length = read_int32();
|
||||
if (length < 4) {
|
||||
return IMA_ERR_INVALID_CHUNK;
|
||||
}
|
||||
if (tag == TAG('R', 'I', 'F', 'F')) {
|
||||
uint32_t container_type = read_tag();
|
||||
if (container_type != TAG('W', 'A', 'V', 'E')) {
|
||||
return IMA_ERR_INVALID_CONTAINER;
|
||||
}
|
||||
}
|
||||
else if (tag == TAG('f', 'm', 't', ' ')) {
|
||||
if (length < 20) {
|
||||
// Insufficient data for 'fmt '
|
||||
return IMA_ERR_INVALID_CHUNK;
|
||||
}
|
||||
headerInfo.format = read_int16();
|
||||
headerInfo.channels = read_int16();
|
||||
headerInfo.sample_rate = read_int32();
|
||||
headerInfo.byte_rate = read_int32();
|
||||
headerInfo.block_align = read_int16();
|
||||
headerInfo.bits_per_sample = read_int16();
|
||||
|
||||
// Skip the size parameter for extra information as for IMA ADPCM the following data should always be 2 bytes.
|
||||
skip(2);
|
||||
headerInfo.frames_per_block = read_int16();
|
||||
if (headerInfo.format != WAVE_FORMAT_IMA_ADPCM || headerInfo.channels > 2) {
|
||||
// Insufficient or invalid data for waveformatex
|
||||
LOGE("Format not supported: %d, %d\n", headerInfo.format, headerInfo.channels);
|
||||
return IMA_ERR_INVALID_CHUNK;
|
||||
} else {
|
||||
headerInfo.is_valid = true; // At this point we know that the format information is valid
|
||||
}
|
||||
} else if (tag == TAG('f', 'a', 'c', 't')) {
|
||||
/* In the context of ADPCM the fact chunk should contain the total number of mono or stereo samples
|
||||
however we shouldn't rely on this as some programs (e.g. Audacity) write an incorrect value in some cases. This value is currently not used by the decoder.
|
||||
*/
|
||||
headerInfo.num_samples = read_int32();
|
||||
} else if (tag == TAG('d', 'a', 't', 'a')) {
|
||||
// Size of the data chunk.
|
||||
headerInfo.data_length = length;
|
||||
} else {
|
||||
chunkUnknown = true;
|
||||
}
|
||||
// Skip any remaining data that exceeds the buffer
|
||||
if (tag != TAG('R', 'I', 'F', 'F') && length > 20) skip_len = length - 20;
|
||||
return chunkUnknown ? IMA_CHUNK_UNKNOWN : IMA_CHUNK_OK;
|
||||
}
|
||||
|
||||
/* Adds data to the header data buffer
|
||||
Because the header isn't necessarily uniform, we go through each chunk individually
|
||||
and only copy the ones we need. This could probably still be optimized. */
|
||||
int write(uint8_t* data, size_t data_len) {
|
||||
int write_len;
|
||||
int data_offset = 0;
|
||||
while (data_len > 0 && !header_complete) {
|
||||
if (skip_len > 0) {
|
||||
/* Used to skip any unknown chunks or chunks that are longer than expected.
|
||||
Some encoders like ffmpeg write meta information before the "data" chunk by default. */
|
||||
write_len = min(skip_len, data_len);
|
||||
skip_len -= write_len;
|
||||
data_offset += write_len;
|
||||
data_len -= write_len;
|
||||
}
|
||||
else {
|
||||
// Search / Wait for the individual chunks and write them to the temporary buffer.
|
||||
write_len = min(data_len, max_chunk_len - chunk_len);
|
||||
memmove(chunk_buffer + chunk_len, data + data_offset, write_len);
|
||||
chunk_len += write_len;
|
||||
data_offset += write_len;
|
||||
data_len -= write_len;
|
||||
|
||||
if (chunk_len == max_chunk_len) {
|
||||
data_pos = 0;
|
||||
if (max_chunk_len == 8) {
|
||||
uint32_t chunk_tag = read_tag();
|
||||
uint32_t chunk_size = read_int32();
|
||||
if (isFirstChunk && chunk_tag != TAG('R', 'I', 'F', 'F')) {
|
||||
headerInfo.is_valid = false;
|
||||
return IMA_ERR_INVALID_CONTAINER;
|
||||
}
|
||||
isFirstChunk = false;
|
||||
if (chunk_tag == TAG('R', 'I', 'F', 'F')) chunk_size = 4;
|
||||
else if (chunk_tag == TAG('d', 'a', 't', 'a')) {
|
||||
parseChunk();
|
||||
header_complete = true;
|
||||
logInfo();
|
||||
break;
|
||||
}
|
||||
|
||||
/* Wait for the rest of the data before processing the chunk.
|
||||
The largest chunk we expect is the "fmt " chunk which is 20 bytes long in this case. */
|
||||
write_len = min((size_t)chunk_size, (size_t)20);
|
||||
max_chunk_len += write_len;
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
chunk_result result = parseChunk();
|
||||
switch (result) {
|
||||
// Abort processing the header if the RIFF container or a required chunk is not valid
|
||||
case IMA_ERR_INVALID_CONTAINER:
|
||||
case IMA_ERR_INVALID_CHUNK:
|
||||
headerInfo.is_valid = false;
|
||||
return result;
|
||||
break;
|
||||
}
|
||||
chunk_len = 0;
|
||||
max_chunk_len = 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return data_offset;
|
||||
}
|
||||
|
||||
/// Returns true if the header is complete (data chunk has been found)
|
||||
bool isDataComplete() {
|
||||
return header_complete;
|
||||
}
|
||||
|
||||
// provides the AudioInfo
|
||||
WavIMAAudioInfo &audioInfo() {
|
||||
return headerInfo;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct WavIMAAudioInfo headerInfo;
|
||||
uint8_t chunk_buffer[28];
|
||||
size_t chunk_len = 0;
|
||||
size_t max_chunk_len = 8;
|
||||
size_t skip_len = 0;
|
||||
size_t data_pos = 0;
|
||||
bool header_complete = false;
|
||||
bool isFirstChunk = true;
|
||||
|
||||
uint32_t read_tag() {
|
||||
uint32_t tag = getChar();
|
||||
tag = (tag << 8) | getChar();
|
||||
tag = (tag << 8) | getChar();
|
||||
tag = (tag << 8) | getChar();
|
||||
return tag;
|
||||
}
|
||||
|
||||
uint32_t read_int32() {
|
||||
uint32_t value = (uint32_t)getChar();
|
||||
value |= (uint32_t)getChar() << 8;
|
||||
value |= (uint32_t)getChar() << 16;
|
||||
value |= (uint32_t)getChar() << 24;
|
||||
return value;
|
||||
}
|
||||
|
||||
uint16_t read_int16() {
|
||||
uint16_t value = getChar();
|
||||
value |= getChar() << 8;
|
||||
return value;
|
||||
}
|
||||
|
||||
void skip(int n) {
|
||||
n = min((size_t)n, chunk_len - data_pos);
|
||||
for (int i=0; i<n; i++) if (data_pos < chunk_len) data_pos++;
|
||||
return;
|
||||
}
|
||||
|
||||
int getChar() {
|
||||
if (data_pos < chunk_len) return chunk_buffer[data_pos++];
|
||||
else return -1;
|
||||
}
|
||||
|
||||
void logInfo() {
|
||||
LOGI("WavIMAHeader format: %d", headerInfo.format);
|
||||
LOGI("WavIMAHeader channels: %d", headerInfo.channels);
|
||||
LOGI("WavIMAHeader sample_rate: %d", headerInfo.sample_rate);
|
||||
LOGI("WavIMAHeader block align: %d", headerInfo.block_align);
|
||||
LOGI("WavIMAHeader bits_per_sample: %d", headerInfo.bits_per_sample);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Obsolete: WavIMADecoder - based on WAVDecoder - We parse the header data as we receive it
|
||||
* and send the sound data to the stream which was indicated in the constructor.
|
||||
* Only WAV files with WAVE_FORMAT_IMA_ADPCM are supported by this codec!
|
||||
*
|
||||
* We recommend using the WAVDecoder with a corresponding ADPCMDecoder instead.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @author Norman Ritz
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class WavIMADecoder : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new WavIMADecoder object
|
||||
*/
|
||||
|
||||
WavIMADecoder() {
|
||||
TRACED();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new WavIMADecoder object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
*/
|
||||
WavIMADecoder(Print &out_stream, bool active=true) {
|
||||
TRACED();
|
||||
this->out = &out_stream;
|
||||
this->active = active;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Construct a new WavIMADecoder object
|
||||
*
|
||||
* @param out_stream Output Stream to which we write the decoded result
|
||||
* @param bi Object that will be notified about the Audio Formt (Changes)
|
||||
*/
|
||||
|
||||
WavIMADecoder(Print &out_stream, AudioInfoSupport &bi) {
|
||||
TRACED();
|
||||
this->out = &out_stream;
|
||||
addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
~WavIMADecoder() {
|
||||
if (input_buffer != nullptr) delete[] input_buffer;
|
||||
if (output_buffer != nullptr) delete[] output_buffer;
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &out_stream) {
|
||||
this->out = &out_stream;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACED();
|
||||
ima_states[0].predictor = 0;
|
||||
ima_states[0].step_index = 0;
|
||||
ima_states[1].predictor = 0;
|
||||
ima_states[1].step_index = 0;
|
||||
isFirst = true;
|
||||
active = true;
|
||||
header.clearHeader();
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() {
|
||||
TRACED();
|
||||
active = false;
|
||||
}
|
||||
|
||||
const char* mime() {
|
||||
return wav_ima_mime;
|
||||
}
|
||||
|
||||
WavIMAAudioInfo &audioInfoEx() {
|
||||
return header.audioInfo();
|
||||
}
|
||||
|
||||
AudioInfo audioInfo() override {
|
||||
return header.audioInfo();
|
||||
}
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
TRACED();
|
||||
if (active) {
|
||||
if (isFirst) {
|
||||
// we expect at least the full header
|
||||
int written = header.write((uint8_t*)data, len);
|
||||
if (written == IMA_ERR_INVALID_CONTAINER || written == IMA_ERR_INVALID_CHUNK) {
|
||||
isValid = false;
|
||||
isFirst = false;
|
||||
LOGE("File is not valid");
|
||||
return len;
|
||||
}
|
||||
|
||||
if (!header.isDataComplete()) {
|
||||
return len;
|
||||
}
|
||||
|
||||
size_t len_open = len - written;
|
||||
uint8_t *sound_ptr = (uint8_t *) data + written;
|
||||
isFirst = false;
|
||||
isValid = header.audioInfo().is_valid;
|
||||
|
||||
LOGI("WAV sample_rate: %d", header.audioInfo().sample_rate);
|
||||
LOGI("WAV data_length: %u", (unsigned) header.audioInfo().data_length);
|
||||
LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" : "false");
|
||||
|
||||
isValid = header.audioInfo().is_valid;
|
||||
if (isValid) {
|
||||
if (input_buffer != nullptr) delete[] input_buffer;
|
||||
if (output_buffer != nullptr) delete[] output_buffer;
|
||||
bytes_per_encoded_block = header.audioInfo().block_align;
|
||||
bytes_per_decoded_block = header.audioInfo().frames_per_block * header.audioInfo().channels * 2;
|
||||
samples_per_decoded_block = bytes_per_decoded_block >> 1;
|
||||
input_buffer = new uint8_t[bytes_per_encoded_block];
|
||||
output_buffer = new int16_t[samples_per_decoded_block];
|
||||
// update sampling rate if the target supports it
|
||||
AudioInfo bi;
|
||||
bi.sample_rate = header.audioInfo().sample_rate;
|
||||
bi.channels = header.audioInfo().channels;
|
||||
bi.bits_per_sample = 16;
|
||||
remaining_bytes = header.audioInfo().data_length;
|
||||
notifyAudioChange(bi);
|
||||
// write prm data from first record
|
||||
LOGI("WavIMADecoder writing first sound data");
|
||||
processInput(sound_ptr, len_open);
|
||||
}
|
||||
} else if (isValid) {
|
||||
processInput((uint8_t*)data, len);
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Alternative API which provides the data from an input stream
|
||||
int readStream(Stream &in) {
|
||||
TRACED();
|
||||
uint8_t buffer[READ_BUFFER_SIZE];
|
||||
int len = in.readBytes(buffer, READ_BUFFER_SIZE);
|
||||
return write(buffer, len);
|
||||
}
|
||||
|
||||
virtual operator bool() {
|
||||
return active;
|
||||
}
|
||||
|
||||
protected:
|
||||
WavIMAHeader header;
|
||||
Print *out;
|
||||
bool isFirst = true;
|
||||
bool isValid = true;
|
||||
bool active;
|
||||
uint8_t *input_buffer = nullptr;
|
||||
int32_t input_pos = 0;
|
||||
size_t remaining_bytes = 0;
|
||||
size_t bytes_per_encoded_block = 0;
|
||||
int16_t *output_buffer = nullptr;
|
||||
size_t bytes_per_decoded_block = 0;
|
||||
size_t samples_per_decoded_block = 0;
|
||||
IMAState ima_states[2];
|
||||
|
||||
int16_t decodeSample(uint8_t sample, int channel = 0) {
|
||||
int step_index = ima_states[channel].step_index;
|
||||
int32_t step = ima_step_table[step_index];
|
||||
step_index += ima_index_table[sample];
|
||||
if (step_index < 0) step_index = 0;
|
||||
else if (step_index > 88) step_index = 88;
|
||||
ima_states[channel].step_index = step_index;
|
||||
int32_t predictor = ima_states[channel].predictor;
|
||||
uint8_t sign = sample & 8;
|
||||
uint8_t delta = sample & 7;
|
||||
int32_t diff = step >> 3;
|
||||
if (delta & 4) diff += step;
|
||||
if (delta & 2) diff += (step >> 1);
|
||||
if (delta & 1) diff += (step >> 2);
|
||||
if (sign) predictor -= diff;
|
||||
else predictor += diff;
|
||||
if (predictor < -32768) predictor = -32768;
|
||||
else if (predictor > 32767) predictor = 32767;
|
||||
ima_states[channel].predictor = predictor;
|
||||
return (int16_t)predictor;
|
||||
}
|
||||
|
||||
void decodeBlock(int channels) {
|
||||
if (channels == 0 || channels > 2) return;
|
||||
input_pos = 4;
|
||||
int output_pos = 1;
|
||||
ima_states[0].predictor = (int16_t)((input_buffer[1] << 8) + input_buffer[0]);
|
||||
ima_states[0].step_index = input_buffer[2];
|
||||
output_buffer[0] = ima_states[0].predictor;
|
||||
if (channels == 2) {
|
||||
ima_states[1].predictor = (int16_t)(input_buffer[5] << 8) + input_buffer[4];
|
||||
ima_states[1].step_index = input_buffer[6];
|
||||
output_buffer[1] = ima_states[1].predictor;
|
||||
input_pos = 8;
|
||||
output_pos = 2;
|
||||
}
|
||||
for (int i=0; i<samples_per_decoded_block-channels; i++) {
|
||||
uint8_t sample = (i & 1) ? input_buffer[input_pos++] >> 4 : input_buffer[input_pos] & 15;
|
||||
if (channels == 1) output_buffer[output_pos++] = decodeSample(sample);
|
||||
else {
|
||||
output_buffer[output_pos] = decodeSample(sample, (i >> 3) & 1);
|
||||
output_pos += 2;
|
||||
if ((i & 15) == 7) output_pos -= 15;
|
||||
else if ((i & 15) == 15) output_pos--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void processInput(const uint8_t* data, size_t size) {
|
||||
int max_size = min(size, remaining_bytes);
|
||||
for (int i=0; i<max_size; i++) {
|
||||
input_buffer[input_pos++] = data[i];
|
||||
if (input_pos == bytes_per_encoded_block) {
|
||||
decodeBlock(header.audioInfo().channels);
|
||||
input_pos = 0;
|
||||
out->write((uint8_t*)output_buffer, bytes_per_decoded_block);
|
||||
}
|
||||
}
|
||||
remaining_bytes -= max_size;
|
||||
if (remaining_bytes == 0) active = false;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
720
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerAVI.h
Normal file
720
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerAVI.h
Normal file
@@ -0,0 +1,720 @@
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Str.h"
|
||||
#include "AudioTools/AudioCodecs/AudioFormat.h"
|
||||
#include "AudioTools/Video/Video.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
|
||||
#define LIST_HEADER_SIZE 12
|
||||
#define CHUNK_HEADER_SIZE 8
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief We try to keep the necessary buffer for parsing as small as possible,
|
||||
* The data() method provides the start of the actual data and with consume
|
||||
* we remove the processed data from the buffer to make space again.
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ParseBuffer {
|
||||
public:
|
||||
size_t writeArray(uint8_t *data, size_t len) {
|
||||
int to_write = min(availableToWrite(), (size_t)len);
|
||||
memmove(vector.data() + available_byte_count, data, to_write);
|
||||
available_byte_count += to_write;
|
||||
return to_write;
|
||||
}
|
||||
void consume(int size) {
|
||||
memmove(vector.data(), &vector[size], available_byte_count - size);
|
||||
available_byte_count -= size;
|
||||
}
|
||||
void resize(int size) { vector.resize(size + 4); }
|
||||
|
||||
uint8_t *data() { return vector.data(); }
|
||||
|
||||
size_t availableToWrite() { return size() - available_byte_count; }
|
||||
|
||||
size_t available() { return available_byte_count; }
|
||||
|
||||
void clear() {
|
||||
available_byte_count = 0;
|
||||
memset(vector.data(), 0, vector.size());
|
||||
}
|
||||
|
||||
bool isEmpty() { return available_byte_count == 0; }
|
||||
|
||||
size_t size() { return vector.size(); }
|
||||
|
||||
long indexOf(const char *str) {
|
||||
uint8_t *ptr = (uint8_t *)memmem(vector.data(), available_byte_count, str,
|
||||
strlen(str));
|
||||
return ptr == nullptr ? -1l : ptr - vector.data();
|
||||
}
|
||||
|
||||
protected:
|
||||
Vector<uint8_t> vector{0};
|
||||
size_t available_byte_count = 0;
|
||||
};
|
||||
|
||||
/// @brief Four-character code identifier for AVI format
|
||||
/// @ingroup codecs
|
||||
using FOURCC = char[4];
|
||||
|
||||
struct AVIMainHeader {
|
||||
// FOURCC fcc;
|
||||
// uint32_t cb;
|
||||
uint32_t dwMicroSecPerFrame;
|
||||
uint32_t dwMaxBytesPerSec;
|
||||
uint32_t dwPaddingGranularity;
|
||||
uint32_t dwFlags;
|
||||
uint32_t dwTotalFrames;
|
||||
uint32_t dwInitialFrames;
|
||||
uint32_t dwStreams;
|
||||
uint32_t dwSuggestedBufferSize;
|
||||
uint32_t dwWidth;
|
||||
uint32_t dwHeight;
|
||||
uint32_t dwReserved[4];
|
||||
};
|
||||
|
||||
struct RECT {
|
||||
uint32_t dwWidth;
|
||||
uint32_t dwHeight;
|
||||
};
|
||||
|
||||
struct AVIStreamHeader {
|
||||
FOURCC fccType;
|
||||
FOURCC fccHandler;
|
||||
uint32_t dwFlags;
|
||||
uint16_t wPriority;
|
||||
uint16_t wLanguage;
|
||||
uint32_t dwInitialFrames;
|
||||
uint32_t dwScale;
|
||||
uint32_t dwRate;
|
||||
uint32_t dwStart;
|
||||
uint32_t dwLength;
|
||||
uint32_t dwSuggestedBufferSize;
|
||||
uint32_t dwQuality;
|
||||
uint32_t dwSampleSize;
|
||||
RECT rcFrame;
|
||||
};
|
||||
|
||||
struct BitmapInfoHeader {
|
||||
uint32_t biSize;
|
||||
uint64_t biWidth;
|
||||
uint64_t biHeight;
|
||||
uint16_t biPlanes;
|
||||
uint16_t biBitCount;
|
||||
uint32_t biCompression;
|
||||
uint32_t biSizeImage;
|
||||
uint64_t biXPelsPerMeter;
|
||||
uint64_t biYPelsPerMeter;
|
||||
uint32_t biClrUsed;
|
||||
uint32_t biClrImportant;
|
||||
};
|
||||
|
||||
struct WAVFormatX {
|
||||
AudioFormat wFormatTag;
|
||||
uint16_t nChannels;
|
||||
uint32_t nSamplesPerSec;
|
||||
uint32_t nAvgBytesPerSec;
|
||||
uint16_t nBlockAlign;
|
||||
uint16_t wBitsPerSample;
|
||||
uint16_t cbSize;
|
||||
};
|
||||
|
||||
// struct WAVFormat {
|
||||
// uint16_t wFormatTag;
|
||||
// uint16_t nChannels;
|
||||
// uint32_t nSamplesPerSec;
|
||||
// uint32_t nAvgBytesPerSec;
|
||||
// uint16_t nBlockAlign;
|
||||
// };
|
||||
|
||||
enum StreamContentType { Audio, Video };
|
||||
|
||||
enum ParseObjectType { AVIList, AVIChunk, AVIStreamData };
|
||||
|
||||
enum ParseState {
|
||||
ParseHeader,
|
||||
ParseHdrl,
|
||||
ParseAvih,
|
||||
ParseStrl,
|
||||
SubChunkContinue,
|
||||
SubChunk,
|
||||
ParseRec,
|
||||
ParseStrf,
|
||||
AfterStrf,
|
||||
ParseMovi,
|
||||
ParseIgnore,
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Represents a LIST or a CHUNK: The ParseObject represents the
|
||||
* current parsing result. We just keep position information and ids
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ParseObject {
|
||||
public:
|
||||
void set(size_t currentPos, StrView id, size_t size, ParseObjectType type) {
|
||||
set(currentPos, id.c_str(), size, type);
|
||||
}
|
||||
|
||||
void set(size_t currentPos, const char *id, size_t size,
|
||||
ParseObjectType type) {
|
||||
object_type = type;
|
||||
data_size = size;
|
||||
start_pos = currentPos;
|
||||
// allign on word
|
||||
if (size % 2 != 0) {
|
||||
data_size++;
|
||||
}
|
||||
end_pos = currentPos + data_size + 4;
|
||||
// save FOURCC
|
||||
if (id != nullptr) {
|
||||
memcpy(chunk_id, id, 4);
|
||||
chunk_id[4] = 0;
|
||||
}
|
||||
open = data_size;
|
||||
}
|
||||
const char *id() { return chunk_id; }
|
||||
size_t size() { return data_size; }
|
||||
|
||||
ParseObjectType type() { return object_type; }
|
||||
bool isValid() {
|
||||
switch (object_type) {
|
||||
case AVIStreamData:
|
||||
return isAudio() || isVideo();
|
||||
case AVIChunk:
|
||||
return open > 0;
|
||||
case AVIList:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// for Chunk
|
||||
AVIMainHeader *asAVIMainHeader(void *ptr) { return (AVIMainHeader *)ptr; }
|
||||
AVIStreamHeader *asAVIStreamHeader(void *ptr) {
|
||||
return (AVIStreamHeader *)ptr;
|
||||
}
|
||||
WAVFormatX *asAVIAudioFormat(void *ptr) { return (WAVFormatX *)ptr; }
|
||||
BitmapInfoHeader *asAVIVideoFormat(void *ptr) {
|
||||
return (BitmapInfoHeader *)ptr;
|
||||
}
|
||||
|
||||
size_t open;
|
||||
size_t end_pos;
|
||||
size_t start_pos;
|
||||
size_t data_size;
|
||||
|
||||
// for AVIStreamData
|
||||
int streamNumber() {
|
||||
return object_type == AVIStreamData ? (chunk_id[1] << 8) | chunk_id[0] : 0;
|
||||
}
|
||||
bool isAudio() {
|
||||
return object_type == AVIStreamData
|
||||
? chunk_id[2] == 'w' && chunk_id[3] == 'b'
|
||||
: false;
|
||||
}
|
||||
bool isVideoUncompressed() {
|
||||
return object_type == AVIStreamData
|
||||
? chunk_id[2] == 'd' && chunk_id[3] == 'b'
|
||||
: false;
|
||||
}
|
||||
bool isVideoCompressed() {
|
||||
return object_type == AVIStreamData
|
||||
? chunk_id[2] == 'd' && chunk_id[3] == 'c'
|
||||
: false;
|
||||
}
|
||||
bool isVideo() { return isVideoCompressed() || isVideoUncompressed(); }
|
||||
|
||||
protected:
|
||||
// ParseBuffer data_buffer;
|
||||
char chunk_id[5] = {};
|
||||
ParseObjectType object_type;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief AVI Container Decoder which can be fed with small chunks of data. The
|
||||
* minimum length must be bigger then the header size! The file structure is
|
||||
* documented at
|
||||
* https://learn.microsoft.com/en-us/windows/win32/directshow/avi-riff-file-reference
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @ingroup video
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class AVIDecoder : public ContainerDecoder {
|
||||
public:
|
||||
AVIDecoder(int bufferSize = 1024) {
|
||||
parse_buffer.resize(bufferSize);
|
||||
p_decoder = ©_decoder;
|
||||
p_output_audio = new EncodedAudioOutput(©_decoder);
|
||||
}
|
||||
|
||||
AVIDecoder(AudioDecoder *audioDecoder, VideoOutput *videoOut = nullptr,
|
||||
int bufferSize = 1024) {
|
||||
parse_buffer.resize(bufferSize);
|
||||
p_decoder = audioDecoder;
|
||||
p_output_audio = new EncodedAudioOutput(audioDecoder);
|
||||
if (videoOut != nullptr) {
|
||||
setOutputVideoStream(*videoOut);
|
||||
}
|
||||
}
|
||||
|
||||
~AVIDecoder() {
|
||||
if (p_output_audio != nullptr)
|
||||
delete p_output_audio;
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
parse_state = ParseHeader;
|
||||
header_is_avi = false;
|
||||
is_parsing_active = true;
|
||||
current_pos = 0;
|
||||
header_is_avi = false;
|
||||
stream_header_idx = -1;
|
||||
is_metadata_ready = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Defines the audio output stream - usually called by EncodedAudioStream
|
||||
virtual void setOutput(Print &out_stream) override {
|
||||
// p_output_audio = &out_stream;
|
||||
p_output_audio->setOutput(&out_stream);
|
||||
}
|
||||
|
||||
///
|
||||
void setMute(bool mute) { is_mute = mute; }
|
||||
|
||||
virtual void setOutputVideoStream(VideoOutput &out_stream) {
|
||||
p_output_video = &out_stream;
|
||||
}
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", (int)len);
|
||||
int result = parse_buffer.writeArray((uint8_t *)data, len);
|
||||
if (is_parsing_active) {
|
||||
// we expect the first parse to succeed
|
||||
if (parse()) {
|
||||
// if so we process the parse_buffer
|
||||
while (parse_buffer.available() > 4) {
|
||||
if (!parse())
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
LOGD("Parse Error");
|
||||
parse_buffer.clear();
|
||||
result = len;
|
||||
is_parsing_active = false;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
operator bool() override { return is_parsing_active; }
|
||||
|
||||
void end() override { is_parsing_active = false; };
|
||||
|
||||
/// Provides the information from the main header chunk
|
||||
AVIMainHeader mainHeader() { return main_header; }
|
||||
|
||||
/// Provides the information from the stream header chunks
|
||||
AVIStreamHeader streamHeader(int idx) { return stream_header[idx]; }
|
||||
|
||||
/// Provides the video information
|
||||
BitmapInfoHeader aviVideoInfo() { return video_info; };
|
||||
|
||||
const char *videoFormat() { return video_format; }
|
||||
|
||||
/// Provides the audio information
|
||||
WAVFormatX aviAudioInfo() { return audio_info; }
|
||||
|
||||
/// Provides the audio_info.wFormatTag
|
||||
AudioFormat audioFormat() { return audio_info.wFormatTag; }
|
||||
|
||||
/// Returns true if all metadata has been parsed and is available
|
||||
bool isMetadataReady() { return is_metadata_ready; }
|
||||
/// Register a validation callback which is called after parsing just before
|
||||
/// playing the audio
|
||||
void setValidationCallback(bool (*cb)(AVIDecoder &avi)) {
|
||||
validation_cb = cb;
|
||||
}
|
||||
|
||||
/// Provide the length of the video in seconds
|
||||
int videoSeconds() { return video_seconds; }
|
||||
|
||||
/// Replace the synchronization logic with your implementation
|
||||
void setVideoAudioSync(VideoAudioSync *yourSync) { p_synch = yourSync; }
|
||||
|
||||
protected:
|
||||
bool header_is_avi = false;
|
||||
bool is_parsing_active = true;
|
||||
ParseState parse_state = ParseHeader;
|
||||
ParseBuffer parse_buffer;
|
||||
AVIMainHeader main_header;
|
||||
int stream_header_idx = -1;
|
||||
Vector<AVIStreamHeader> stream_header;
|
||||
BitmapInfoHeader video_info;
|
||||
WAVFormatX audio_info;
|
||||
Vector<StreamContentType> content_types;
|
||||
Stack<ParseObject> object_stack;
|
||||
ParseObject current_stream_data;
|
||||
EncodedAudioOutput *p_output_audio = nullptr;
|
||||
VideoOutput *p_output_video = nullptr;
|
||||
long open_subchunk_len = 0;
|
||||
long current_pos = 0;
|
||||
long movi_end_pos = 0;
|
||||
Str spaces;
|
||||
Str str;
|
||||
char video_format[5] = {0};
|
||||
bool is_metadata_ready = false;
|
||||
bool (*validation_cb)(AVIDecoder &avi) = nullptr;
|
||||
bool is_mute = false;
|
||||
CopyDecoder copy_decoder;
|
||||
AudioDecoder *p_decoder = nullptr;
|
||||
int video_seconds = 0;
|
||||
VideoAudioSync defaultSynch;
|
||||
VideoAudioSync *p_synch = &defaultSynch;
|
||||
|
||||
bool isCurrentStreamAudio() {
|
||||
return strncmp(stream_header[stream_header_idx].fccType, "auds", 4) == 0;
|
||||
}
|
||||
|
||||
bool isCurrentStreamVideo() {
|
||||
return strncmp(stream_header[stream_header_idx].fccType, "vids", 4) == 0;
|
||||
}
|
||||
|
||||
// we return true if at least one parse step was successful
|
||||
bool parse() {
|
||||
bool result = true;
|
||||
switch (parse_state) {
|
||||
case ParseHeader: {
|
||||
result = parseHeader();
|
||||
if (result)
|
||||
parse_state = ParseHdrl;
|
||||
} break;
|
||||
|
||||
case ParseHdrl: {
|
||||
ParseObject hdrl = parseList("hdrl");
|
||||
result = hdrl.isValid();
|
||||
if (result) {
|
||||
parse_state = ParseAvih;
|
||||
}
|
||||
} break;
|
||||
|
||||
case ParseAvih: {
|
||||
ParseObject avih = parseChunk("avih");
|
||||
result = avih.isValid();
|
||||
if (result) {
|
||||
main_header = *(avih.asAVIMainHeader(parse_buffer.data()));
|
||||
stream_header.resize(main_header.dwStreams);
|
||||
consume(avih.size());
|
||||
parse_state = ParseStrl;
|
||||
}
|
||||
} break;
|
||||
|
||||
case ParseStrl: {
|
||||
ParseObject strl = parseList("strl");
|
||||
ParseObject strh = parseChunk("strh");
|
||||
stream_header[++stream_header_idx] =
|
||||
*(strh.asAVIStreamHeader(parse_buffer.data()));
|
||||
consume(strh.size());
|
||||
parse_state = ParseStrf;
|
||||
} break;
|
||||
|
||||
case ParseStrf: {
|
||||
ParseObject strf = parseChunk("strf");
|
||||
if (isCurrentStreamAudio()) {
|
||||
audio_info = *(strf.asAVIAudioFormat(parse_buffer.data()));
|
||||
setupAudioInfo();
|
||||
LOGI("audioFormat: %d (%x)", (int)audioFormat(),(int)audioFormat());
|
||||
content_types.push_back(Audio);
|
||||
consume(strf.size());
|
||||
} else if (isCurrentStreamVideo()) {
|
||||
video_info = *(strf.asAVIVideoFormat(parse_buffer.data()));
|
||||
setupVideoInfo();
|
||||
LOGI("videoFormat: %s", videoFormat());
|
||||
content_types.push_back(Video);
|
||||
video_format[4] = 0;
|
||||
consume(strf.size());
|
||||
} else {
|
||||
result = false;
|
||||
}
|
||||
parse_state = AfterStrf;
|
||||
} break;
|
||||
|
||||
case AfterStrf: {
|
||||
// ignore all data until we find a new List
|
||||
int pos = parse_buffer.indexOf("LIST");
|
||||
if (pos >= 0) {
|
||||
consume(pos);
|
||||
ParseObject tmp = tryParseList();
|
||||
if (StrView(tmp.id()).equals("strl")) {
|
||||
parse_state = ParseStrl;
|
||||
} else if (StrView(tmp.id()).equals("movi")) {
|
||||
parse_state = ParseMovi;
|
||||
} else {
|
||||
// e.g. ignore info
|
||||
consume(tmp.size() + LIST_HEADER_SIZE);
|
||||
}
|
||||
} else {
|
||||
// no valid data, so throw it away, we keep the last 4 digits in case
|
||||
// if it contains the beginning of a LIST
|
||||
cleanupStack();
|
||||
consume(parse_buffer.available() - 4);
|
||||
}
|
||||
} break;
|
||||
|
||||
case ParseMovi: {
|
||||
ParseObject movi = tryParseList();
|
||||
if (StrView(movi.id()).equals("movi")) {
|
||||
consume(LIST_HEADER_SIZE);
|
||||
is_metadata_ready = true;
|
||||
if (validation_cb)
|
||||
is_parsing_active = (validation_cb(*this));
|
||||
processStack(movi);
|
||||
movi_end_pos = movi.end_pos;
|
||||
parse_state = SubChunk;
|
||||
// trigger new write
|
||||
result = false;
|
||||
}
|
||||
} break;
|
||||
|
||||
case SubChunk: {
|
||||
// rec is optinal
|
||||
ParseObject hdrl = tryParseList();
|
||||
if (StrView(hdrl.id()).equals("rec")) {
|
||||
consume(CHUNK_HEADER_SIZE);
|
||||
processStack(hdrl);
|
||||
}
|
||||
|
||||
current_stream_data = parseAVIStreamData();
|
||||
parse_state = SubChunkContinue;
|
||||
open_subchunk_len = current_stream_data.open;
|
||||
if (current_stream_data.isVideo()) {
|
||||
LOGI("video:[%d]->[%d]", (int)current_stream_data.start_pos,
|
||||
(int)current_stream_data.end_pos);
|
||||
if (p_output_video != nullptr)
|
||||
p_output_video->beginFrame(current_stream_data.open);
|
||||
} else if (current_stream_data.isAudio()) {
|
||||
LOGI("audio:[%d]->[%d]", (int)current_stream_data.start_pos,
|
||||
(int)current_stream_data.end_pos);
|
||||
} else {
|
||||
LOGW("unknown subchunk at %d", (int)current_pos);
|
||||
}
|
||||
|
||||
} break;
|
||||
|
||||
case SubChunkContinue: {
|
||||
writeData();
|
||||
if (open_subchunk_len == 0) {
|
||||
if (current_stream_data.isVideo() && p_output_video != nullptr) {
|
||||
uint32_t time_used_ms = p_output_video->endFrame();
|
||||
p_synch->delayVideoFrame(main_header.dwMicroSecPerFrame, time_used_ms);
|
||||
}
|
||||
if (tryParseChunk("idx").isValid()) {
|
||||
parse_state = ParseIgnore;
|
||||
} else if (tryParseList("rec").isValid()) {
|
||||
parse_state = ParseRec;
|
||||
} else {
|
||||
if (current_pos >= movi_end_pos) {
|
||||
parse_state = ParseIgnore;
|
||||
} else {
|
||||
parse_state = SubChunk;
|
||||
}
|
||||
}
|
||||
}
|
||||
} break;
|
||||
|
||||
case ParseIgnore: {
|
||||
LOGD("ParseIgnore");
|
||||
parse_buffer.clear();
|
||||
} break;
|
||||
|
||||
default:
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void setupAudioInfo() {
|
||||
info.channels = audio_info.nChannels;
|
||||
info.bits_per_sample = audio_info.wBitsPerSample;
|
||||
info.sample_rate = audio_info.nSamplesPerSec;
|
||||
info.logInfo();
|
||||
// adjust the audio info if necessary
|
||||
if (p_decoder != nullptr) {
|
||||
p_decoder->setAudioInfo(info);
|
||||
info = p_decoder->audioInfo();
|
||||
}
|
||||
notifyAudioChange(info);
|
||||
}
|
||||
|
||||
void setupVideoInfo() {
|
||||
memcpy(video_format, stream_header[stream_header_idx].fccHandler, 4);
|
||||
AVIStreamHeader *vh = &stream_header[stream_header_idx];
|
||||
if (vh->dwScale <= 0) {
|
||||
vh->dwScale = 1;
|
||||
}
|
||||
int rate = vh->dwRate / vh->dwScale;
|
||||
video_seconds = rate <= 0 ? 0 : vh->dwLength / rate;
|
||||
LOGI("videoSeconds: %d seconds", video_seconds);
|
||||
}
|
||||
|
||||
void writeData() {
|
||||
long to_write = min((long)parse_buffer.available(), open_subchunk_len);
|
||||
if (current_stream_data.isAudio()) {
|
||||
LOGD("audio %d", (int)to_write);
|
||||
if (!is_mute){
|
||||
p_synch->writeAudio(p_output_audio, parse_buffer.data(), to_write);
|
||||
}
|
||||
open_subchunk_len -= to_write;
|
||||
cleanupStack();
|
||||
consume(to_write);
|
||||
} else if (current_stream_data.isVideo()) {
|
||||
LOGD("video %d", (int)to_write);
|
||||
if (p_output_video != nullptr)
|
||||
p_output_video->write(parse_buffer.data(), to_write);
|
||||
open_subchunk_len -= to_write;
|
||||
cleanupStack();
|
||||
consume(to_write);
|
||||
}
|
||||
}
|
||||
|
||||
// 'RIFF' fileSize fileType (data)
|
||||
bool parseHeader() {
|
||||
bool header_is_avi = false;
|
||||
int headerSize = 12;
|
||||
if (getStr(0, 4).equals("RIFF")) {
|
||||
ParseObject result;
|
||||
uint32_t header_file_size = getInt(4);
|
||||
header_is_avi = getStr(8, 4).equals("AVI ");
|
||||
result.set(current_pos, "AVI ", header_file_size, AVIChunk);
|
||||
processStack(result);
|
||||
consume(headerSize);
|
||||
|
||||
} else {
|
||||
LOGE("parseHeader");
|
||||
}
|
||||
return header_is_avi;
|
||||
}
|
||||
|
||||
/// We parse a chunk and provide the FOURCC id and size: No content data is
|
||||
/// stored
|
||||
ParseObject tryParseChunk() {
|
||||
ParseObject result;
|
||||
result.set(current_pos, getStr(0, 4), 0, AVIChunk);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// We try to parse the indicated chunk and determine the size: No content
|
||||
/// data is stored
|
||||
ParseObject tryParseChunk(const char *id) {
|
||||
ParseObject result;
|
||||
if (getStr(0, 4).equals(id)) {
|
||||
result.set(current_pos, id, 0, AVIChunk);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseObject tryParseList(const char *id) {
|
||||
ParseObject result;
|
||||
StrView &list_id = getStr(8, 4);
|
||||
if (list_id.equals(id) && getStr(0, 3).equals("LIST")) {
|
||||
result.set(current_pos, getStr(8, 4), getInt(4), AVIList);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// We try to parse the actual state for any list
|
||||
ParseObject tryParseList() {
|
||||
ParseObject result;
|
||||
if (getStr(0, 4).equals("LIST")) {
|
||||
result.set(current_pos, getStr(8, 4), getInt(4), AVIList);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// We load the indicated chunk from the current data
|
||||
ParseObject parseChunk(const char *id) {
|
||||
ParseObject result;
|
||||
int chunk_size = getInt(4);
|
||||
if (getStr(0, 4).equals(id) && parse_buffer.size() >= chunk_size) {
|
||||
result.set(current_pos, id, chunk_size, AVIChunk);
|
||||
processStack(result);
|
||||
consume(CHUNK_HEADER_SIZE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// We load the indicated list from the current data
|
||||
ParseObject parseList(const char *id) {
|
||||
ParseObject result;
|
||||
if (getStr(0, 4).equals("LIST") && getStr(8, 4).equals(id)) {
|
||||
int size = getInt(4);
|
||||
result.set(current_pos, id, size, AVIList);
|
||||
processStack(result);
|
||||
consume(LIST_HEADER_SIZE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseObject parseAVIStreamData() {
|
||||
ParseObject result;
|
||||
int size = getInt(4);
|
||||
result.set(current_pos, getStr(0, 4), size, AVIStreamData);
|
||||
if (result.isValid()) {
|
||||
processStack(result);
|
||||
consume(8);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void processStack(ParseObject &result) {
|
||||
cleanupStack();
|
||||
object_stack.push(result);
|
||||
spaces.setChars(' ', object_stack.size());
|
||||
LOGD("%s - %s (%d-%d) size:%d", spaces.c_str(), result.id(),
|
||||
(int)result.start_pos, (int)result.end_pos, (int)result.data_size);
|
||||
}
|
||||
|
||||
void cleanupStack() {
|
||||
ParseObject current;
|
||||
// make sure that we remove the object from the stack of we past the end
|
||||
object_stack.peek(current);
|
||||
while (current.end_pos <= current_pos) {
|
||||
object_stack.pop(current);
|
||||
object_stack.peek(current);
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides the string at the indicated byte offset with the indicated length
|
||||
StrView &getStr(int offset, int len) {
|
||||
str.setCapacity(len + 1);
|
||||
const char *data = (const char *)parse_buffer.data();
|
||||
str.copyFrom((data + offset), len, 5);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
/// Provides the int32 at the indicated byte offset
|
||||
uint32_t getInt(int offset) {
|
||||
uint32_t *result = (uint32_t *)(parse_buffer.data() + offset);
|
||||
return *result;
|
||||
}
|
||||
|
||||
/// We remove the processed bytes from the beginning of the buffer
|
||||
void consume(int len) {
|
||||
current_pos += len;
|
||||
parse_buffer.consume(len);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,407 @@
|
||||
/**
|
||||
* @file ContainerBinary.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief A lean and efficient container format which provides Header records
|
||||
* with audio info, Audio records with the audio and Meta which
|
||||
* can contain any additional information. This can be used together with a
|
||||
* codec which does not transmit the audio information or has variable frame
|
||||
* lengths. We expect that a single write() is providing full frames.
|
||||
*
|
||||
* @version 0.1
|
||||
* @date 2022-05-04
|
||||
*
|
||||
* @copyright Copyright (c) 2022
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
enum class ContainerType : uint8_t {
|
||||
Header = 1,
|
||||
Audio = 2,
|
||||
Meta = 3,
|
||||
Undefined = 0
|
||||
};
|
||||
|
||||
struct CommonHeader {
|
||||
CommonHeader() = default;
|
||||
CommonHeader(ContainerType type, uint16_t len) {
|
||||
this->type = type;
|
||||
this->len = len;
|
||||
}
|
||||
char header[2] = {'\r','\n'};
|
||||
ContainerType type;
|
||||
uint16_t len;
|
||||
uint8_t checksum = 0;
|
||||
};
|
||||
|
||||
struct SimpleContainerConfig {
|
||||
SimpleContainerConfig() = default;
|
||||
CommonHeader common{ContainerType::Header, sizeof(AudioInfo)};
|
||||
AudioInfo info;
|
||||
};
|
||||
|
||||
struct SimpleContainerDataHeader {
|
||||
CommonHeader common{ContainerType::Audio, 0};
|
||||
};
|
||||
|
||||
struct SimpleContainerMetaDataHeader {
|
||||
CommonHeader common{ContainerType::Meta, 0};
|
||||
};
|
||||
|
||||
// struct ProcessedResult {
|
||||
// ContainerType type = ContainerType::Undefined;
|
||||
// // total length incl header
|
||||
// int total_len = 0;
|
||||
// // processed bytes incl header of last step
|
||||
// int processed = 0;
|
||||
// // still (total) open
|
||||
// int open = 0;
|
||||
// };
|
||||
|
||||
/// @brief Calculates the checksum
|
||||
static uint8_t checkSum(const uint8_t *data, size_t len) {
|
||||
uint8_t result = 0;
|
||||
for (int j = 0; j < len; j++) {
|
||||
result ^= data[j];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
/// @brief Error types
|
||||
enum BinaryContainerEncoderError { InvalidHeader, InvalidChecksum, DataMissing};
|
||||
|
||||
/**
|
||||
* @brief Wraps the encoded data into Config, Data, and Meta segments so that we
|
||||
* can recover the audio configuration and orignial segments if this is
|
||||
* relevant. We assume that a full segment is written with each call of write();
|
||||
* The segments are separated with a new line character.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class BinaryContainerEncoder : public AudioEncoder {
|
||||
public:
|
||||
BinaryContainerEncoder() = default;
|
||||
BinaryContainerEncoder(AudioEncoder &encoder) { p_codec = &encoder; }
|
||||
BinaryContainerEncoder(AudioEncoder *encoder) { p_codec = encoder; }
|
||||
|
||||
void setEncoder(AudioEncoder *encoder) { p_codec = encoder; }
|
||||
|
||||
void setOutput(Print &outStream) {
|
||||
LOGD("BinaryContainerEncoder::setOutput");
|
||||
p_out = &outStream;
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
// target.begin();
|
||||
bool rc = p_codec->begin();
|
||||
p_codec->setAudioInfo(cfg.info);
|
||||
is_beginning = true;
|
||||
return rc;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo info) override {
|
||||
TRACED();
|
||||
if (info != audioInfo()) {
|
||||
cfg.info = info;
|
||||
}
|
||||
}
|
||||
|
||||
AudioInfo audioInfo() override { return cfg.info; }
|
||||
|
||||
/// Adds meta data segment
|
||||
size_t writeMeta(const uint8_t *data, size_t len) {
|
||||
LOGD("BinaryContainerEncoder::writeMeta: %d", (int)len);
|
||||
meta.common.len = len + sizeof(SimpleContainerMetaDataHeader);
|
||||
uint8_t tmp_array[meta.common.len];
|
||||
memcpy(tmp_array, &meta, sizeof(meta));
|
||||
memcpy(tmp_array + sizeof(meta), data, len);
|
||||
output(tmp_array, meta.common.len);
|
||||
return len;
|
||||
}
|
||||
|
||||
/// Add data segment. On first write we also add a AudioInfo header
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("BinaryContainerEncoder::write: %d", (int)len);
|
||||
if (is_beginning) {
|
||||
writeHeader();
|
||||
is_beginning = false;
|
||||
}
|
||||
writeAudio((uint8_t *)data, len);
|
||||
return len;
|
||||
}
|
||||
|
||||
void end() { p_codec->end(); }
|
||||
|
||||
operator bool() { return true; };
|
||||
|
||||
virtual const char *mime() { return "audio/binary"; };
|
||||
|
||||
protected:
|
||||
uint64_t packet_count = 0;
|
||||
bool is_beginning = true;
|
||||
int repeat_header;
|
||||
SimpleContainerConfig cfg;
|
||||
SimpleContainerDataHeader dh;
|
||||
SimpleContainerMetaDataHeader meta;
|
||||
AudioEncoder *p_codec = nullptr;
|
||||
Print *p_out = nullptr;
|
||||
|
||||
void writeAudio(const uint8_t *data, size_t len) {
|
||||
LOGD("writeAudio: %d", (int)len);
|
||||
// encode data
|
||||
SingleBuffer<uint8_t> tmp_buffer{(int)len};
|
||||
QueueStream<uint8_t> tmp{tmp_buffer};
|
||||
tmp.begin();
|
||||
p_codec->setOutput(tmp);
|
||||
p_codec->write(data, len);
|
||||
|
||||
// output of audio data header
|
||||
dh.common.len = tmp.available() + sizeof(CommonHeader);
|
||||
dh.common.checksum = checkSum(tmp_buffer.data(), tmp_buffer.available());
|
||||
output((uint8_t *)&dh, sizeof(dh));
|
||||
|
||||
// output of data
|
||||
output(tmp_buffer.data(), tmp_buffer.available());
|
||||
}
|
||||
|
||||
void writeHeader() {
|
||||
LOGD("writeHeader");
|
||||
output((uint8_t *)&cfg, sizeof(cfg));
|
||||
}
|
||||
|
||||
size_t output(const uint8_t *data, size_t len) {
|
||||
if (p_out != nullptr) {
|
||||
int written = p_out->write((uint8_t *)data, len);
|
||||
LOGD("output: %d -> %d", (int)len, written);
|
||||
} else
|
||||
LOGW("output not defined");
|
||||
return len;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Decodes the provided data from the DAT and CFG segments
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class BinaryContainerDecoder : public ContainerDecoder {
|
||||
public:
|
||||
BinaryContainerDecoder() = default;
|
||||
BinaryContainerDecoder(AudioDecoder &decoder) { p_codec = &decoder; }
|
||||
BinaryContainerDecoder(AudioDecoder *decoder) { p_codec = decoder; }
|
||||
|
||||
void setDecoder(AudioDecoder *decoder){
|
||||
p_codec = decoder;
|
||||
}
|
||||
|
||||
// Defines the output: this method is called 2 times: first to define
|
||||
// output defined in the EnocdedAudioStream and then to define the
|
||||
// real output in the output chain.
|
||||
void setOutput(Print &outStream) {
|
||||
LOGD("BinaryContainerDecoder::setOutput")
|
||||
p_out = &outStream;
|
||||
}
|
||||
|
||||
void setMetaCallback(void (*callback)(uint8_t*, int, void*)) {
|
||||
meta_callback = callback;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACED();
|
||||
is_first = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() { TRACED(); }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("write: %d", (int)len);
|
||||
uint8_t *data8 = (uint8_t *)data;
|
||||
if (buffer.size() < len) {
|
||||
buffer.resize(
|
||||
std::max(static_cast<int>(DEFAULT_BUFFER_SIZE + header_size),
|
||||
static_cast<int>(len * 4 + header_size)));
|
||||
}
|
||||
|
||||
size_t result = buffer.writeArray(data8, len);
|
||||
while (parseBuffer())
|
||||
;
|
||||
return ignore_write_errors ? len : result;
|
||||
}
|
||||
|
||||
operator bool() { return true; };
|
||||
|
||||
void addErrorHandler(void (*error_handler)(BinaryContainerEncoderError error, BinaryContainerDecoder* source, void* ref)){
|
||||
this->error_handler = error_handler;
|
||||
}
|
||||
|
||||
/// If set to true we do not expect a retry to write the missing data but continue just with the next. (Default is true);
|
||||
void setIgnoreWriteErrors(bool flag){
|
||||
ignore_write_errors = flag;
|
||||
}
|
||||
|
||||
/// Provide additional information for callback
|
||||
void setReference(void* ref){
|
||||
reference = ref;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool is_first = true;
|
||||
CommonHeader header;
|
||||
const size_t header_size = sizeof(header);
|
||||
AudioDecoder *p_codec = nullptr;
|
||||
SingleBuffer<uint8_t> buffer{0};
|
||||
Print *p_out = nullptr;
|
||||
void (*meta_callback)(uint8_t* data, int len, void* ref) = nullptr;
|
||||
void (*error_handler)(BinaryContainerEncoderError error, BinaryContainerDecoder* source, void* ref) = nullptr;
|
||||
bool ignore_write_errors = true;
|
||||
void * reference = nullptr;
|
||||
|
||||
|
||||
bool parseBuffer() {
|
||||
LOGD("parseBuffer");
|
||||
bool result = false;
|
||||
|
||||
StrView str{(const char *)buffer.data()};
|
||||
int start = str.indexOf("\r\n");
|
||||
LOGD("start: %d", start);
|
||||
if (start < 0) {
|
||||
return false;
|
||||
}
|
||||
// get next record
|
||||
if (buffer.available() - start > sizeof(header)) {
|
||||
// determine header
|
||||
memmove((uint8_t *)&header, buffer.data() + start, sizeof(header));
|
||||
|
||||
// check header
|
||||
if (!isValidHeader()) {
|
||||
LOGW("invalid header: %d", header.type);
|
||||
if (error_handler) error_handler(InvalidHeader, this, reference);
|
||||
nextRecord();
|
||||
return false;
|
||||
};
|
||||
|
||||
if (buffer.available() - start >= header.len) {
|
||||
// move to start of frame
|
||||
buffer.clearArray(start);
|
||||
// process frame
|
||||
result = processData();
|
||||
} else {
|
||||
LOGD("not enough data - available %d / req: %d", buffer.available(),
|
||||
header.len);
|
||||
if (error_handler) error_handler(DataMissing, this, reference);
|
||||
|
||||
}
|
||||
} else {
|
||||
LOGD("not enough data for header: %d", buffer.available());
|
||||
if (error_handler) error_handler(DataMissing, this, reference);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// processes the completed data from the buffer: e.g. writes it
|
||||
bool processData() {
|
||||
LOGD("processData");
|
||||
bool rc = false;
|
||||
switch (header.type) {
|
||||
case ContainerType::Header: {
|
||||
LOGD("Header");
|
||||
SimpleContainerConfig config;
|
||||
buffer.readArray((uint8_t *)&config, sizeof(config));
|
||||
info = config.info;
|
||||
notifyAudioChange(info);
|
||||
info.logInfo();
|
||||
p_codec->setAudioInfo(info);
|
||||
p_codec->begin();
|
||||
rc = true;
|
||||
} break;
|
||||
|
||||
case ContainerType::Audio: {
|
||||
LOGD("Audio");
|
||||
buffer.clearArray(sizeof(header));
|
||||
int data_len = header.len - header_size;
|
||||
uint8_t crc = checkSum(buffer.data(), data_len);
|
||||
if (header.checksum == crc) {
|
||||
// decode
|
||||
SingleBuffer<uint8_t> tmp_buffer{data_len * 5};
|
||||
QueueStream<uint8_t> tmp{tmp_buffer};
|
||||
tmp.begin();
|
||||
p_codec->setOutput(tmp);
|
||||
p_codec->write(buffer.data(), data_len);
|
||||
|
||||
// output decoded data
|
||||
output(tmp_buffer.data(), tmp_buffer.available());
|
||||
buffer.clearArray(data_len);
|
||||
} else {
|
||||
LOGW("invalid checksum");
|
||||
if (error_handler) error_handler(InvalidChecksum, this, reference);
|
||||
// move to next record
|
||||
nextRecord();
|
||||
return false;
|
||||
}
|
||||
rc = true;
|
||||
} break;
|
||||
|
||||
case ContainerType::Meta: {
|
||||
LOGD("Meta");
|
||||
buffer.clearArray(sizeof(header));
|
||||
int data_len = header.len - header_size;
|
||||
if (meta_callback != nullptr) {
|
||||
meta_callback(buffer.data(), data_len, reference);
|
||||
}
|
||||
buffer.clearArray(data_len);
|
||||
rc = true;
|
||||
} break;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
bool isValidHeader() {
|
||||
switch (header.type) {
|
||||
case ContainerType::Header:
|
||||
return header.checksum == 0;
|
||||
case ContainerType::Audio:
|
||||
return true;
|
||||
case ContainerType::Meta:
|
||||
return header.checksum == 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t peekBufferValue(){
|
||||
uint8_t byte_value=0;
|
||||
buffer.peek(byte_value);
|
||||
return byte_value;
|
||||
}
|
||||
|
||||
void nextRecord() {
|
||||
TRACED();
|
||||
uint8_t byte_value;
|
||||
while (buffer.available() && peekBufferValue() != '\n')
|
||||
buffer.read(byte_value);
|
||||
}
|
||||
|
||||
// writes the data to the decoder which forwards it to the output; if there
|
||||
// is no coded we write to the output instead
|
||||
size_t output(uint8_t *data, size_t len) {
|
||||
LOGD("output: %d", (int)len);
|
||||
if (p_out != nullptr)
|
||||
p_out->write((uint8_t *)data, len);
|
||||
else
|
||||
LOGW("output not defined");
|
||||
|
||||
return len;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
174
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerM4A.h
Normal file
174
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerM4A.h
Normal file
@@ -0,0 +1,174 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/M4AAudioDemuxer.h"
|
||||
#include "AudioTools/AudioCodecs/MultiDecoder.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief M4A Demuxer that extracts audio from M4A/MP4 containers.
|
||||
* The audio is decoded into pcm with the help of the provided decoder.
|
||||
* format.
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class ContainerM4A : public ContainerDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Default constructor: If no decoder is provided, the
|
||||
* raw audio data is provided to the defined output.
|
||||
*/
|
||||
ContainerM4A() {
|
||||
demux.setReference(this);
|
||||
demux.setCallback(decodeAudio);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Constructor with decoder. Sets up the demuxer and decoder
|
||||
* notification.
|
||||
* @param decoder Reference to a MultiDecoder for PCM output.
|
||||
*/
|
||||
ContainerM4A(MultiDecoder& decoder) : ContainerM4A() { setDecoder(decoder); }
|
||||
|
||||
/**
|
||||
* @brief Set the output stream for decoded or raw audio.
|
||||
* @param out_stream Output AudioStream.
|
||||
*/
|
||||
void setOutput(Print& out_stream) override {
|
||||
if (p_decoder != nullptr && p_decoder->getOutput()!=&out_stream) {
|
||||
p_decoder->setOutput(out_stream);
|
||||
}
|
||||
ContainerDecoder::setOutput(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns true if the result is PCM (decoder is present).
|
||||
* @return true if PCM output, false otherwise.
|
||||
*/
|
||||
bool isResultPCM() override { return p_decoder != nullptr ? true : false; }
|
||||
|
||||
/**
|
||||
* @brief Initialize the demuxer and decoder.
|
||||
* @return true on success.
|
||||
*/
|
||||
bool begin() override {
|
||||
demux.begin();
|
||||
if (p_decoder) p_decoder->begin();
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief End the demuxer and decoder, releasing resources.
|
||||
*/
|
||||
void end() override {
|
||||
TRACED();
|
||||
is_active = false;
|
||||
is_magic_cookie_processed = false;
|
||||
if (p_decoder) p_decoder->end();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Feed data to the demuxer for parsing.
|
||||
* @param data Pointer to input data.
|
||||
* @param len Length of input data.
|
||||
* @return Number of bytes processed (always len).
|
||||
*/
|
||||
size_t write(const uint8_t* data, size_t len) override {
|
||||
if (is_active == false) return len;
|
||||
demux.write(data, len);
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns true if the demuxer is active.
|
||||
* @return true if active, false otherwise.
|
||||
*/
|
||||
operator bool() override { return is_active; }
|
||||
/**
|
||||
* @brief Sets the buffer to use for sample sizes.
|
||||
* You can use this to provide a custom buffer that
|
||||
* does not rely on RAM (e.g a file based buffer or
|
||||
* one using Redis)
|
||||
* @param buffer Reference to the buffer to use.
|
||||
*/
|
||||
virtual void setSampleSizesBuffer(BaseBuffer<stsz_sample_size_t>& buffer) {
|
||||
demux.setSampleSizesBuffer(buffer);
|
||||
}
|
||||
/**
|
||||
* @brief Sets the buffer to use for sample sizes. This is currently
|
||||
* not used!
|
||||
* @param buffer Reference to the buffer to use.
|
||||
*/
|
||||
virtual void setChunkOffsetsBuffer(BaseBuffer<uint32_t>& buffer) {
|
||||
demux.setChunkOffsetsBuffer(buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets the decoder to use for audio frames.
|
||||
* @param decoder Reference to a MultiDecoder for PCM output.
|
||||
* @return true if set successfully, false otherwise.
|
||||
*/
|
||||
bool setDecoder(MultiDecoder& decoder) {
|
||||
p_decoder = &decoder;
|
||||
p_decoder->addNotifyAudioChange(*this);
|
||||
return true;
|
||||
}
|
||||
|
||||
M4AAudioDemuxer& getDemuxer() {
|
||||
return demux;
|
||||
}
|
||||
|
||||
protected:
|
||||
bool is_active = false; ///< True if demuxer is active.
|
||||
bool is_magic_cookie_processed =
|
||||
false; ///< True if ALAC magic cookie has been processed.
|
||||
MultiDecoder* p_decoder = nullptr; ///< Pointer to the MultiDecoder.
|
||||
M4AAudioDemuxer demux; ///< Internal demuxer instance.
|
||||
|
||||
/**
|
||||
* @brief Static callback for demuxed audio frames.
|
||||
* Handles decoder selection and magic cookie for ALAC.
|
||||
* @param frame The demuxed audio frame.
|
||||
* @param ref Reference to the ContainerM4A instance.
|
||||
*/
|
||||
static void decodeAudio(const M4AAudioDemuxer::Frame& frame, void* ref) {
|
||||
ContainerM4A* self = static_cast<ContainerM4A*>(ref);
|
||||
if (self->p_decoder == nullptr) {
|
||||
self->p_print->write(frame.data, frame.size);
|
||||
return;
|
||||
}
|
||||
MultiDecoder& dec = *(self->p_decoder);
|
||||
const char* old_mime = dec.selectedMime();
|
||||
|
||||
// select decoder based on mime type
|
||||
if (!dec.selectDecoder(frame.mime)) {
|
||||
const char* mime = frame.mime ? frame.mime : "(nullptr)";
|
||||
LOGE("No decoder found for mime type: %s", mime);
|
||||
return;
|
||||
}
|
||||
|
||||
// for ALAC only: process magic cookie if not done yet
|
||||
if (StrView(frame.mime) == "audio/alac" &&
|
||||
!self->is_magic_cookie_processed) {
|
||||
auto& magic_cookie = self->demux.getALACMagicCookie();
|
||||
if (magic_cookie.size() > 0) {
|
||||
if (!dec.setCodecConfig(magic_cookie.data(), magic_cookie.size())) {
|
||||
LOGE("Failed to set ALAC magic cookie for decoder: %s",
|
||||
dec.selectedMime());
|
||||
}
|
||||
}
|
||||
self->is_magic_cookie_processed = true;
|
||||
}
|
||||
// write encoded data to decoder
|
||||
dec.write(frame.data, frame.size);
|
||||
|
||||
// restore previous decoder
|
||||
dec.selectDecoder(old_mime);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
331
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerOSC.h
Normal file
331
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerOSC.h
Normal file
@@ -0,0 +1,331 @@
|
||||
/**
|
||||
* @file ContainerOSC.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief A simple container format which uses OSC messages to
|
||||
* tramsmit Header records with audio info and Audio records with the audio
|
||||
* data.
|
||||
*
|
||||
* @version 0.1
|
||||
* @date 2025-05-20
|
||||
*
|
||||
* @copyright Copyright (c) 2022
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
#include <string.h>
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/MultiDecoder.h"
|
||||
#include "AudioTools/Communication/OSCData.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Wraps the encoded data into OSC info and data segments so that the
|
||||
* receiver can recover the audio configuration and orignial segments.
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OSCContainerEncoder : public AudioEncoder {
|
||||
public:
|
||||
OSCContainerEncoder() = default;
|
||||
OSCContainerEncoder(AudioEncoder &encoder) { p_codec = &encoder; }
|
||||
|
||||
void setEncoder(AudioEncoder *encoder) { p_codec = encoder; }
|
||||
|
||||
void setOutput(Print &outStream) { p_out = &outStream; }
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
if (p_codec == nullptr) return false;
|
||||
osc_out.setOutput(*p_out);
|
||||
osc_out.begin();
|
||||
p_codec->setOutput(osc_out);
|
||||
p_codec->setAudioInfo(audioInfo());
|
||||
is_active = p_codec->begin();
|
||||
writeAudioInfo(audioInfo(), p_codec->mime());
|
||||
return is_active;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo info) override {
|
||||
TRACED();
|
||||
if (is_active) writeAudioInfo(audioInfo(), p_codec->mime());
|
||||
AudioWriter::setAudioInfo(info);
|
||||
}
|
||||
|
||||
/// Add data segment. On first write we also add a AudioInfo header
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("OSCContainerEncoder::write: %d", (int)len);
|
||||
if ((repeat_info > 0) && (packet_count % repeat_info == 0)) {
|
||||
writeAudioInfo(audioInfo(), p_codec->mime());
|
||||
}
|
||||
p_codec->write(data, len);
|
||||
packet_count++;
|
||||
return len;
|
||||
}
|
||||
|
||||
void end() {
|
||||
p_codec->end();
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
operator bool() { return is_active; };
|
||||
|
||||
virtual const char *mime() { return "audio/OSC"; };
|
||||
|
||||
/// Activate/deactivate the sending of the audio info
|
||||
void setInfoActive(bool flag) { is_send_info_active = flag; }
|
||||
/// Automatically resend audio info ever nth write.
|
||||
void setRepeatInfoEvery(int packet_count) {
|
||||
this->repeat_info = packet_count;
|
||||
}
|
||||
|
||||
/// Returns the sequence number of the next packet
|
||||
uint64_t getSequenceNumber() { return osc_out.getSequenceNumber(); }
|
||||
|
||||
/// Define a reference object to be provided by the callback
|
||||
void setReference(void *ref) { osc_out.setReference(ref); }
|
||||
|
||||
/// Get informed about the encoded packages
|
||||
void setEncodedWriteCallback(void (*write_callback)(uint8_t *data, size_t len,
|
||||
uint64_t seq,
|
||||
void *ref)) {
|
||||
osc_out.setEncodedWriteCallback(write_callback);
|
||||
}
|
||||
|
||||
/// Resend the encoded data
|
||||
size_t resendEncodedData(uint8_t *data, size_t len, uint64_t seq) {
|
||||
return osc_out.write(data, len, seq);
|
||||
}
|
||||
|
||||
protected:
|
||||
uint64_t packet_count = 0;
|
||||
int repeat_info = 0;
|
||||
bool is_active = false;
|
||||
bool is_send_info_active = true;
|
||||
AudioEncoder *p_codec = nullptr;
|
||||
Print *p_out = nullptr;
|
||||
|
||||
/// Output Encoded Audio via OSC
|
||||
class OSCOutput : public AudioOutput {
|
||||
public:
|
||||
void setReference(void *ref) { this->ref = ref; }
|
||||
void setOutput(Print &outStream) { p_out = &outStream; }
|
||||
void setEncodedWriteCallback(void (*write_callback)(
|
||||
uint8_t *data, size_t len, uint64_t seq, void *ref)) {
|
||||
this->encoded_write_callback = write_callback;
|
||||
}
|
||||
uint64_t getSequenceNumber() { return sequence_number; }
|
||||
bool begin() {
|
||||
sequence_number = 0;
|
||||
return true;
|
||||
}
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
size_t result = write(data, len);
|
||||
sequence_number++;
|
||||
return result;
|
||||
}
|
||||
size_t write(const uint8_t *data, size_t len, uint64_t seq) {
|
||||
LOGD("writeAudio: %d", (int)len);
|
||||
if (encoded_write_callback != nullptr) {
|
||||
encoded_write_callback((uint8_t *)data, len, sequence_number, ref);
|
||||
}
|
||||
uint8_t osc_data[len + 20]; // 20 is guess to cover address & fmt
|
||||
OSCData osc{osc_data, sizeof(osc_data)};
|
||||
osc.setAddress("/audio/data");
|
||||
osc.setFormat("ttb");
|
||||
osc.write((uint64_t)millis());
|
||||
// we use a uint64_t for a sequence number
|
||||
osc.write(sequence_number);
|
||||
osc.write(data, len);
|
||||
p_out->write(osc_data, osc.size());
|
||||
return len;
|
||||
}
|
||||
|
||||
protected:
|
||||
void (*encoded_write_callback)(uint8_t *data, size_t len, uint64_t seq,
|
||||
void *ref) = nullptr;
|
||||
Print *p_out = nullptr;
|
||||
uint64_t sequence_number = 0;
|
||||
void *ref = nullptr;
|
||||
} osc_out;
|
||||
|
||||
/// OUtput AudioInfo via OSC
|
||||
void writeAudioInfo(AudioInfo info, const char *mime) {
|
||||
if (is_send_info_active) {
|
||||
LOGD("writeAudioInfo");
|
||||
uint8_t osc_data[100];
|
||||
OSCData osc{osc_data, sizeof(osc_data)};
|
||||
osc.setAddress("/audio/info");
|
||||
osc.setFormat("iiis");
|
||||
osc.write((int32_t)info.sample_rate);
|
||||
osc.write((int32_t)info.channels);
|
||||
osc.write((int32_t)info.bits_per_sample);
|
||||
osc.write(mime);
|
||||
p_out->write(osc_data, osc.size());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Decodes the provided data from the OSC segments. I recommend to
|
||||
* assign a MultiDecoder so that we can support muiltiple audio types.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OSCContainerDecoder : public ContainerDecoder {
|
||||
public:
|
||||
OSCContainerDecoder() = default;
|
||||
OSCContainerDecoder(AudioDecoder &decoder) {
|
||||
setDecoder(decoder);
|
||||
}
|
||||
OSCContainerDecoder(MultiDecoder &decoder) {
|
||||
setDecoder(decoder);
|
||||
}
|
||||
|
||||
/// Defines the decoder to be used
|
||||
void setDecoder(AudioDecoder &decoder) { p_codec = &decoder; }
|
||||
|
||||
/// Defines the decoder to be used: special logic for multidecoder
|
||||
void setDecoder(MultiDecoder &decoder) {
|
||||
p_codec = &decoder;
|
||||
is_multi_decoder = true;
|
||||
}
|
||||
|
||||
/// Optionally define you own OSCData object
|
||||
void setOSCData(OSCData &osc) { p_osc = &osc; }
|
||||
|
||||
void setOutput(Print &outStream) {
|
||||
LOGD("OSCContainerDecoder::setOutput")
|
||||
p_out = &outStream;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACED();
|
||||
if (p_codec == nullptr || p_osc == nullptr) return false;
|
||||
p_osc->setReference(this);
|
||||
p_osc->addCallback("/audio/info", parseInfo, OSCCompare::StartsWith);
|
||||
p_osc->addCallback("/audio/data", parseData, OSCCompare::StartsWith);
|
||||
is_active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() { is_active = false; }
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
if (!is_active) return 0;
|
||||
LOGD("write: %d", (int)len);
|
||||
if (!p_osc->parse((uint8_t *)data, len)) {
|
||||
return 0;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
operator bool() { return is_active; };
|
||||
|
||||
/// Provides the mime type from the encoder
|
||||
const char *mime() { return mime_str.c_str(); };
|
||||
|
||||
/// Provides the sequence number of the last packet
|
||||
uint64_t getSequenceNumber() { return seq_no; }
|
||||
|
||||
/// Adds an new parser callback for a specific address matching string
|
||||
bool addParserCallback(const char *address,
|
||||
bool (*callback)(OSCData &data, void *ref),
|
||||
OSCCompare compare = OSCCompare::Matches) {
|
||||
if (p_osc == nullptr) return false;
|
||||
p_osc->addCallback(address, callback, compare);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Replace the write to the decoder with a callback:
|
||||
void setWriteCallback(bool (*write_callback)(uint64_t time, uint64_t seq,
|
||||
uint8_t *data, size_t len,
|
||||
void *ref)) {
|
||||
this->write_callback = write_callback;
|
||||
}
|
||||
|
||||
/// Callback to be called when data is missing
|
||||
void setMissingDataCallback(void (*missing_data_callback)(uint64_t from_seq,
|
||||
uint64_t to_seq,
|
||||
void *ref)) {
|
||||
this->missing_data_callback = missing_data_callback;
|
||||
}
|
||||
|
||||
/// Provide a reference object to the callback
|
||||
void setReference(void *ref) { this->ref = ref; }
|
||||
|
||||
protected:
|
||||
bool is_active = false;
|
||||
bool is_multi_decoder = false;
|
||||
AudioDecoder *p_codec = nullptr;
|
||||
SingleBuffer<uint8_t> buffer{0};
|
||||
Print *p_out = nullptr;
|
||||
OSCData osc_default;
|
||||
OSCData *p_osc = &osc_default;
|
||||
Str mime_str;
|
||||
uint64_t seq_no = 0;
|
||||
/// Return false to complete the processing w/o writing to the decoder
|
||||
bool (*write_callback)(uint64_t time, uint64_t seq, uint8_t *data, size_t len,
|
||||
void *ref) = nullptr;
|
||||
void (*missing_data_callback)(uint64_t from_seq, uint64_t to_seq,
|
||||
void *ref) = missingDataCallback;
|
||||
void *ref = nullptr;
|
||||
|
||||
/// Default callback for missing data: just log the missing range
|
||||
static void missingDataCallback(uint64_t from_seq, uint64_t to_seq,
|
||||
void *ref) {
|
||||
LOGW("Missing sequence numbers %d - %d", from_seq, to_seq);
|
||||
}
|
||||
|
||||
static bool parseData(OSCData &osc, void *ref) {
|
||||
uint64_t time = osc.readTime();
|
||||
uint64_t seq = osc.readTime();
|
||||
OSCBinaryData data = osc.readData();
|
||||
OSCContainerDecoder *self = static_cast<OSCContainerDecoder *>(ref);
|
||||
// Check for missing sequence numbers
|
||||
if (self->seq_no + 1 != seq) {
|
||||
self->missing_data_callback(self->seq_no + 1, seq - 1, self->ref);
|
||||
}
|
||||
// store the actual sequence number
|
||||
self->seq_no = seq;
|
||||
// call write callbak if defined
|
||||
if (self->write_callback != nullptr) {
|
||||
bool ok = self->write_callback(time, seq, data.data, data.len, ref);
|
||||
if (!ok) return true;
|
||||
}
|
||||
// output to decoder
|
||||
if (self->p_codec != nullptr) {
|
||||
self->p_codec->write(data.data, data.len);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool parseInfo(OSCData &osc, void *ref) {
|
||||
AudioInfo info;
|
||||
info.sample_rate = osc.readInt32();
|
||||
info.channels = osc.readInt32();
|
||||
info.bits_per_sample = osc.readInt32();
|
||||
const char *mime = osc.readString();
|
||||
|
||||
OSCContainerDecoder *self = static_cast<OSCContainerDecoder *>(ref);
|
||||
if (self != nullptr) {
|
||||
self->setAudioInfo(info);
|
||||
self->mime_str = mime;
|
||||
LOGI("mime: %s", mime);
|
||||
// select the right decoder based on the mime type
|
||||
if (self->is_multi_decoder)
|
||||
static_cast<MultiDecoder*>(self->p_codec)->selectDecoder(mime);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
421
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerOgg.h
Normal file
421
libraries/audio-tools/src/AudioTools/AudioCodecs/ContainerOgg.h
Normal file
@@ -0,0 +1,421 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/CodecOpus.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
#include "oggz.h"
|
||||
|
||||
#define OGG_READ_SIZE (1024)
|
||||
#define OGG_DEFAULT_BUFFER_SIZE (OGG_READ_SIZE)
|
||||
// #define OGG_DEFAULT_BUFFER_SIZE (246)
|
||||
// #define OGG_READ_SIZE (512)
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Decoder for Ogg Container. Decodes a packet from an Ogg
|
||||
* container. The Ogg begin segment contains the AudioInfo structure. You can
|
||||
* subclass and overwrite the beginOfSegment() method to implement your own
|
||||
* headers
|
||||
* Dependency: https://github.com/pschatzmann/arduino-libopus
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OggContainerDecoder : public ContainerDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new OggContainerDecoder object
|
||||
*/
|
||||
|
||||
OggContainerDecoder() {
|
||||
p_codec = &dec_copy;
|
||||
out.setDecoder(p_codec);
|
||||
}
|
||||
|
||||
OggContainerDecoder(AudioDecoder *decoder) { setDecoder(decoder); }
|
||||
|
||||
OggContainerDecoder(AudioDecoder &decoder) { setDecoder(&decoder); }
|
||||
|
||||
void setDecoder(AudioDecoder *decoder) {
|
||||
p_codec = decoder;
|
||||
out.setDecoder(p_codec);
|
||||
}
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &print) override { out.setOutput(&print); }
|
||||
|
||||
void addNotifyAudioChange(AudioInfoSupport &bi) override {
|
||||
out.addNotifyAudioChange(bi);
|
||||
ContainerDecoder::addNotifyAudioChange(bi);
|
||||
}
|
||||
|
||||
AudioInfo audioInfo() override { return out.audioInfo(); }
|
||||
|
||||
bool begin(AudioInfo info) override {
|
||||
TRACED();
|
||||
this->info = info;
|
||||
return begin();
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
out.setAudioInfo(info);
|
||||
out.begin();
|
||||
if (p_oggz == nullptr) {
|
||||
p_oggz = oggz_new(OGGZ_READ | OGGZ_AUTO); // OGGZ_NONSTRICT
|
||||
is_open = true;
|
||||
// Callback to Replace standard IO
|
||||
if (oggz_io_set_read(p_oggz, ogg_io_read, this) != 0) {
|
||||
LOGE("oggz_io_set_read");
|
||||
is_open = false;
|
||||
}
|
||||
// Callback
|
||||
if (oggz_set_read_callback(p_oggz, -1, read_packet, this) != 0) {
|
||||
LOGE("oggz_set_read_callback");
|
||||
is_open = false;
|
||||
}
|
||||
|
||||
if (oggz_set_read_page(p_oggz, -1, read_page, this) != 0) {
|
||||
LOGE("oggz_set_read_page");
|
||||
is_open = false;
|
||||
}
|
||||
}
|
||||
return is_open;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACED();
|
||||
flush();
|
||||
out.end();
|
||||
is_open = false;
|
||||
oggz_close(p_oggz);
|
||||
p_oggz = nullptr;
|
||||
}
|
||||
|
||||
void flush() {
|
||||
LOGD("oggz_read...");
|
||||
while ((oggz_read(p_oggz, OGG_READ_SIZE)) > 0)
|
||||
;
|
||||
}
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("write: %d", (int)len);
|
||||
|
||||
// fill buffer
|
||||
size_t size_consumed = buffer.writeArray((uint8_t *)data, len);
|
||||
if (buffer.availableForWrite() == 0) {
|
||||
// Read all bytes into oggz, calling any read callbacks on the fly.
|
||||
flush();
|
||||
}
|
||||
// write remaining bytes
|
||||
if (size_consumed < len) {
|
||||
size_consumed += buffer.writeArray((uint8_t *)data + size_consumed,
|
||||
len - size_consumed);
|
||||
flush();
|
||||
}
|
||||
return size_consumed;
|
||||
}
|
||||
|
||||
virtual operator bool() override { return is_open; }
|
||||
|
||||
protected:
|
||||
EncodedAudioOutput out;
|
||||
CopyDecoder dec_copy;
|
||||
AudioDecoder *p_codec = nullptr;
|
||||
RingBuffer<uint8_t> buffer{OGG_DEFAULT_BUFFER_SIZE};
|
||||
OGGZ *p_oggz = nullptr;
|
||||
bool is_open = false;
|
||||
long pos = 0;
|
||||
|
||||
// Final Stream Callback -> provide data to ogg
|
||||
static size_t ogg_io_read(void *user_handle, void *buf, size_t n) {
|
||||
LOGD("ogg_io_read: %d", (int)n);
|
||||
size_t result = 0;
|
||||
OggContainerDecoder *self = (OggContainerDecoder *)user_handle;
|
||||
if (self->buffer.available() >= n) {
|
||||
OggContainerDecoder *self = (OggContainerDecoder *)user_handle;
|
||||
result = self->buffer.readArray((uint8_t *)buf, n);
|
||||
self->pos += result;
|
||||
|
||||
} else {
|
||||
result = 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Process full packet
|
||||
static int read_packet(OGGZ *oggz, oggz_packet *zp, long serialno,
|
||||
void *user_data) {
|
||||
LOGD("read_packet: %d", (int)zp->op.bytes);
|
||||
OggContainerDecoder *self = (OggContainerDecoder *)user_data;
|
||||
ogg_packet *op = &zp->op;
|
||||
int result = op->bytes;
|
||||
if (op->b_o_s) {
|
||||
self->beginOfSegment(op);
|
||||
} else if (op->e_o_s) {
|
||||
self->endOfSegment(op);
|
||||
} else {
|
||||
if (memcmp(op->packet, "OpusTags", 8) == 0) {
|
||||
self->beginOfSegment(op);
|
||||
} else {
|
||||
LOGD("process audio packet");
|
||||
int eff = self->out.write(op->packet, op->bytes);
|
||||
if (eff != result) {
|
||||
LOGE("Incomplere write");
|
||||
}
|
||||
}
|
||||
}
|
||||
// 0 = success
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_page(OGGZ *oggz, const ogg_page *og, long serialno,
|
||||
void *user_data) {
|
||||
LOGD("read_page: %d", (int)og->body_len);
|
||||
// 0 = success
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual void beginOfSegment(ogg_packet *op) {
|
||||
LOGD("bos");
|
||||
if (op->bytes == sizeof(AudioInfo)) {
|
||||
AudioInfo cfg(*(AudioInfo*)op->packet);
|
||||
cfg.logInfo();
|
||||
if (cfg.bits_per_sample == 16 || cfg.bits_per_sample == 24 ||
|
||||
cfg.bits_per_sample == 32) {
|
||||
setAudioInfo(cfg);
|
||||
} else {
|
||||
LOGE("Invalid AudioInfo")
|
||||
}
|
||||
} else {
|
||||
LOGE("Invalid Header")
|
||||
}
|
||||
}
|
||||
|
||||
virtual void endOfSegment(ogg_packet *op) {
|
||||
// end segment not supported
|
||||
LOGW("e_o_s");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Output class for the OggContainerEncoder. Each
|
||||
* write is ending up as container entry
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OggContainerOutput : public AudioOutput {
|
||||
public:
|
||||
// Empty Constructor - the output stream must be provided with begin()
|
||||
OggContainerOutput() = default;
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &print) { p_out = &print; }
|
||||
|
||||
/// starts the processing using the actual AudioInfo
|
||||
virtual bool begin() override {
|
||||
TRACED();
|
||||
assert(cfg.channels != 0);
|
||||
assert(cfg.sample_rate != 0);
|
||||
is_open = true;
|
||||
if (p_oggz == nullptr) {
|
||||
p_oggz = oggz_new(OGGZ_WRITE | OGGZ_NONSTRICT | OGGZ_AUTO);
|
||||
serialno = oggz_serialno_new(p_oggz);
|
||||
oggz_io_set_write(p_oggz, ogg_io_write, this);
|
||||
packetno = 0;
|
||||
granulepos = 0;
|
||||
|
||||
if (!writeHeader()) {
|
||||
is_open = false;
|
||||
LOGE("writeHeader");
|
||||
}
|
||||
}
|
||||
return is_open;
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override {
|
||||
TRACED();
|
||||
|
||||
writeFooter();
|
||||
|
||||
is_open = false;
|
||||
oggz_close(p_oggz);
|
||||
p_oggz = nullptr;
|
||||
}
|
||||
|
||||
/// Writes raw data to be encoded and packaged
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (data == nullptr) return 0;
|
||||
LOGD("OggContainerOutput::write: %d", (int)len);
|
||||
assert(cfg.channels != 0);
|
||||
|
||||
// encode the data
|
||||
op.packet = (uint8_t *)data;
|
||||
op.bytes = len;
|
||||
if (op.bytes > 0) {
|
||||
int bytes_per_sample = cfg.bits_per_sample / 8;
|
||||
granulepos += op.bytes / bytes_per_sample; // sample
|
||||
op.granulepos = granulepos;
|
||||
op.b_o_s = false;
|
||||
op.e_o_s = false;
|
||||
op.packetno = packetno++;
|
||||
is_audio = true;
|
||||
if (!writePacket(op, OGGZ_FLUSH_AFTER)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
// trigger pysical write
|
||||
while ((oggz_write(p_oggz, len)) > 0)
|
||||
;
|
||||
|
||||
return len;
|
||||
}
|
||||
bool isOpen() { return is_open; }
|
||||
|
||||
protected:
|
||||
Print *p_out = nullptr;
|
||||
bool is_open = false;
|
||||
OGGZ *p_oggz = nullptr;
|
||||
ogg_packet op;
|
||||
ogg_packet oh;
|
||||
size_t granulepos = 0;
|
||||
size_t packetno = 0;
|
||||
long serialno = -1;
|
||||
bool is_audio = false;
|
||||
|
||||
virtual bool writePacket(ogg_packet &op, int flag = 0) {
|
||||
LOGD("writePacket: %d", (int)op.bytes);
|
||||
long result = oggz_write_feed(p_oggz, &op, serialno, flag, NULL);
|
||||
if (result < 0 && result != OGGZ_ERR_OUT_OF_MEMORY) {
|
||||
LOGE("oggz_write_feed: %d", (int)result);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool writeHeader() {
|
||||
TRACED();
|
||||
oh.packet = (uint8_t *)&cfg;
|
||||
oh.bytes = sizeof(AudioInfo);
|
||||
oh.granulepos = 0;
|
||||
oh.packetno = packetno++;
|
||||
oh.b_o_s = true;
|
||||
oh.e_o_s = false;
|
||||
is_audio = false;
|
||||
return writePacket(oh);
|
||||
}
|
||||
|
||||
virtual bool writeFooter() {
|
||||
TRACED();
|
||||
op.packet = (uint8_t *)nullptr;
|
||||
op.bytes = 0;
|
||||
op.granulepos = granulepos;
|
||||
op.packetno = packetno++;
|
||||
op.b_o_s = false;
|
||||
op.e_o_s = true;
|
||||
is_audio = false;
|
||||
return writePacket(op, OGGZ_FLUSH_AFTER);
|
||||
}
|
||||
|
||||
// Final Stream Callback
|
||||
static size_t ogg_io_write(void *user_handle, void *buf, size_t n) {
|
||||
LOGD("ogg_io_write: %d", (int)n);
|
||||
OggContainerOutput *self = (OggContainerOutput *)user_handle;
|
||||
if (self == nullptr) {
|
||||
LOGE("self is null");
|
||||
return 0;
|
||||
}
|
||||
// self->out.write((uint8_t *)buf, n);
|
||||
writeData<uint8_t>(self->p_out, (uint8_t *)buf, n);
|
||||
// 0 = continue
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Encoder for Ogg Container. Encodes a packet for an Ogg
|
||||
* container. The Ogg begin segment contains the AudioInfo structure. You can
|
||||
* subclass ond overwrite the writeHeader() method to implement your own header
|
||||
* logic. When an optional encoder is specified in the constructor we package
|
||||
* the encoded data.
|
||||
* Dependency: https://github.com/pschatzmann/arduino-libopus
|
||||
* @ingroup codecs
|
||||
* @ingroup encoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OggContainerEncoder : public AudioEncoder {
|
||||
public:
|
||||
// Empty Constructor - the output stream must be provided with begin()
|
||||
OggContainerEncoder() = default;
|
||||
|
||||
OggContainerEncoder(AudioEncoder *encoder) { setEncoder(encoder); }
|
||||
|
||||
OggContainerEncoder(AudioEncoder &encoder) { setEncoder(&encoder); }
|
||||
|
||||
/// Defines the output Stream
|
||||
void setOutput(Print &print) override { p_ogg->setOutput(print); }
|
||||
|
||||
/// Provides "audio/pcm"
|
||||
const char *mime() override { return mime_pcm; }
|
||||
|
||||
/// We actually do nothing with this
|
||||
virtual void setAudioInfo(AudioInfo info) override {
|
||||
AudioEncoder::setAudioInfo(info);
|
||||
p_ogg->setAudioInfo(info);
|
||||
if (p_codec != nullptr) p_codec->setAudioInfo(info);
|
||||
}
|
||||
|
||||
virtual bool begin(AudioInfo from) override {
|
||||
setAudioInfo(from);
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// starts the processing using the actual AudioInfo
|
||||
virtual bool begin() override {
|
||||
TRACED();
|
||||
p_ogg->begin();
|
||||
if (p_codec==nullptr) return false;
|
||||
p_codec->setOutput(*p_ogg);
|
||||
return p_codec->begin(p_ogg->audioInfo());
|
||||
}
|
||||
|
||||
/// stops the processing
|
||||
void end() override {
|
||||
TRACED();
|
||||
if (p_codec != nullptr) p_codec->end();
|
||||
p_ogg->end();
|
||||
}
|
||||
|
||||
/// Writes raw data to be encoded and packaged
|
||||
virtual size_t write(const uint8_t *data, size_t len) override {
|
||||
if (!p_ogg->isOpen() || data == nullptr) return 0;
|
||||
LOGD("OggContainerEncoder::write: %d", (int)len);
|
||||
size_t result = 0;
|
||||
if (p_codec == nullptr) {
|
||||
result = p_ogg->write((const uint8_t *)data, len);
|
||||
} else {
|
||||
result = p_codec->write(data, len);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
operator bool() override { return p_ogg->isOpen(); }
|
||||
|
||||
bool isOpen() { return p_ogg->isOpen(); }
|
||||
|
||||
protected:
|
||||
AudioEncoder *p_codec = nullptr;
|
||||
OggContainerOutput ogg;
|
||||
OggContainerOutput *p_ogg = &ogg;
|
||||
|
||||
void setEncoder(AudioEncoder *enc) { p_codec = enc; }
|
||||
|
||||
/// Replace the ogg output class
|
||||
void setOggOutput(OggContainerOutput *out) { p_ogg = out; }
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioCodecs/CodecADTS.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief AAC header parser to check if the data is a valid ADTS aac which
|
||||
* can extract some relevant audio information.
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class HeaderParserAAC {
|
||||
public:
|
||||
/// parses the header string and returns true if this is a valid aac adts
|
||||
/// stream
|
||||
bool isValid(const uint8_t* data, int len) {
|
||||
if (len < 7) return false;
|
||||
parser.begin();
|
||||
// regular validation
|
||||
if (!parser.parse((uint8_t*)data)) return false;
|
||||
// check if we have a valid 2nd frame
|
||||
if (len > getFrameLength()) {
|
||||
int pos = findSyncWord(data, len, getFrameLength());
|
||||
if (pos == -1) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int getSampleRate() { return parser.getSampleRate(); }
|
||||
|
||||
uint8_t getChannels() { return parser.data().channel_cfg; }
|
||||
|
||||
/// Determines the frame length
|
||||
int getFrameLength() { return parser.getFrameLength(); }
|
||||
|
||||
/// Finds the mp3/aac sync word
|
||||
int findSyncWord(const uint8_t* buf, int nBytes, int start = 0) {
|
||||
return parser.findSyncWord(buf, nBytes, start);
|
||||
}
|
||||
|
||||
ADTSParser::ADTSHeader getHeader() { return parser.data(); }
|
||||
|
||||
protected:
|
||||
ADTSParser parser;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,722 @@
|
||||
#pragma once
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief MP3 header parser that processes MP3 data incrementally and
|
||||
* extracts complete MP3 frames. Can validate MP3 data and extract audio
|
||||
* information. When used with a Print output, it splits incoming data into
|
||||
* complete MP3 frames and writes them to the output stream.
|
||||
*
|
||||
* Features:
|
||||
* - Incremental processing of MP3 data in small chunks
|
||||
* - Frame synchronization and validation
|
||||
* - Extraction of audio information (sample rate, bit rate, etc.)
|
||||
* - Output of complete MP3 frames only
|
||||
* - Support for all MPEG versions (1, 2, 2.5) and layers
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class HeaderParserMP3 {
|
||||
/// @brief MPEG audio frame header fields parsed from 4 serialized bytes
|
||||
struct FrameHeader {
|
||||
static const unsigned int SERIALIZED_SIZE = 4;
|
||||
|
||||
enum class MPEGVersionID : unsigned {
|
||||
MPEG_2_5 = 0b00,
|
||||
INVALID = 0b01, // reserved
|
||||
MPEG_2 = 0b10,
|
||||
MPEG_1 = 0b11,
|
||||
};
|
||||
|
||||
enum class LayerID : unsigned {
|
||||
INVALID = 0b00, // reserved
|
||||
LAYER_3 = 0b01,
|
||||
LAYER_2 = 0b10,
|
||||
LAYER_1 = 0b11,
|
||||
};
|
||||
|
||||
enum class ChannelModeID : unsigned {
|
||||
STEREO = 0b00,
|
||||
JOINT = 0b01, // joint stereo
|
||||
DUAL = 0b10, // dual channel (2 mono channels)
|
||||
SINGLE = 0b11, // single channel (mono)
|
||||
};
|
||||
|
||||
enum class EmphasisID : unsigned {
|
||||
NONE = 0b00,
|
||||
MS_50_15 = 0b01,
|
||||
INVALID = 0b10,
|
||||
CCIT_J17 = 0b11,
|
||||
};
|
||||
|
||||
enum SpecialBitrate { INVALID_BITRATE = -8000, ANY = 0 };
|
||||
enum SpecialSampleRate { RESERVED = 0 };
|
||||
|
||||
// Parsed fields
|
||||
MPEGVersionID audioVersion = MPEGVersionID::INVALID;
|
||||
LayerID layer = LayerID::INVALID;
|
||||
bool protection = false;
|
||||
uint8_t bitrateIndex = 0; // 0..15
|
||||
uint8_t sampleRateIndex = 0; // 0..3
|
||||
bool padding = false;
|
||||
bool isPrivate = false;
|
||||
ChannelModeID channelMode = ChannelModeID::STEREO;
|
||||
uint8_t extensionMode = 0; // 0..3
|
||||
bool copyright = false;
|
||||
bool original = false;
|
||||
EmphasisID emphasis = EmphasisID::NONE;
|
||||
|
||||
// Decode 4 bytes into the fields above. Returns false if sync invalid.
|
||||
static bool decode(const uint8_t* b, FrameHeader& out) {
|
||||
if (b == nullptr) return false;
|
||||
if (!(b[0] == 0xFF && (b[1] & 0xE0) == 0xE0))
|
||||
return false; // 11-bit sync
|
||||
|
||||
uint8_t b1 = b[1];
|
||||
uint8_t b2 = b[2];
|
||||
uint8_t b3 = b[3];
|
||||
|
||||
out.audioVersion = static_cast<MPEGVersionID>((b1 >> 3) & 0x03);
|
||||
out.layer = static_cast<LayerID>((b1 >> 1) & 0x03);
|
||||
out.protection = !(b1 & 0x01); // 0 means protected (CRC present)
|
||||
|
||||
out.bitrateIndex = (b2 >> 4) & 0x0F;
|
||||
out.sampleRateIndex = (b2 >> 2) & 0x03;
|
||||
out.padding = (b2 >> 1) & 0x01;
|
||||
out.isPrivate = (b2 & 0x01) != 0;
|
||||
|
||||
out.channelMode = static_cast<ChannelModeID>((b3 >> 6) & 0x03);
|
||||
out.extensionMode = (b3 >> 4) & 0x03;
|
||||
out.copyright = (b3 >> 3) & 0x01;
|
||||
out.original = (b3 >> 2) & 0x01;
|
||||
out.emphasis = static_cast<EmphasisID>(b3 & 0x03);
|
||||
return true;
|
||||
}
|
||||
|
||||
signed int getBitRate() const {
|
||||
// version, layer, bit index
|
||||
static const signed char rateTable[4][4][16] = {
|
||||
// version[00] = MPEG_2_5
|
||||
{
|
||||
// layer[00] = INVALID
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
// layer[01] = LAYER_3
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
|
||||
// layer[10] = LAYER_2
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
|
||||
// layer[11] = LAYER_1
|
||||
{0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
|
||||
},
|
||||
|
||||
// version[01] = INVALID
|
||||
{
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
},
|
||||
|
||||
// version[10] = MPEG_2
|
||||
{
|
||||
// layer[00] = INVALID
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
// layer[01] = LAYER_3
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
|
||||
// layer[10] = LAYER_2
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 18, 20, -1},
|
||||
// layer[11] = LAYER_1
|
||||
{0, 4, 6, 7, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, -1},
|
||||
},
|
||||
|
||||
// version[11] = MPEG_1
|
||||
{
|
||||
// layer[00] = INVALID
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
// layer[01] = LAYER_3
|
||||
{0, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, -1},
|
||||
// layer[10] = LAYER_2
|
||||
{0, 4, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, -1},
|
||||
// layer[11] = LAYER_1
|
||||
{0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, -1},
|
||||
},
|
||||
};
|
||||
signed char rate_byte =
|
||||
rateTable[(int)audioVersion][(int)layer][(int)bitrateIndex];
|
||||
if (rate_byte == -1) {
|
||||
LOGE("Unsupported bitrate");
|
||||
return 0;
|
||||
}
|
||||
return rate_byte * 8000;
|
||||
}
|
||||
|
||||
unsigned short getSampleRate() const {
|
||||
// version, sample rate index
|
||||
static const unsigned short rateTable[4][4] = {
|
||||
// version[00] = MPEG_2_5
|
||||
{11025, 12000, 8000, 0},
|
||||
// version[01] = INVALID
|
||||
{0, 0, 0, 0},
|
||||
// version[10] = MPEG_2
|
||||
{22050, 24000, 16000, 0},
|
||||
// version[11] = MPEG_1
|
||||
{44100, 48000, 32000, 0},
|
||||
};
|
||||
|
||||
return rateTable[(int)audioVersion][(int)sampleRateIndex];
|
||||
}
|
||||
|
||||
int getFrameLength() const {
|
||||
int sample_rate = getSampleRate();
|
||||
if (sample_rate == 0) return 0;
|
||||
int value =
|
||||
(audioVersion == FrameHeader::MPEGVersionID::MPEG_1) ? 144 : 72;
|
||||
return int((value * getBitRate() / sample_rate) + (padding ? 1 : 0));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
/// Default constructor
|
||||
HeaderParserMP3() = default;
|
||||
|
||||
/// Constructor for write support
|
||||
HeaderParserMP3(Print& output, int bufferSize = 2048)
|
||||
: p_output(&output), buffer_size(bufferSize) {}
|
||||
|
||||
void setOutput(Print& output) { p_output = &output; }
|
||||
|
||||
void resize(int size) { buffer_size = size; }
|
||||
|
||||
/// split up the data into mp3 segements and write to output
|
||||
size_t write(const uint8_t* data, size_t len) {
|
||||
if (buffer.size() < buffer_size) buffer.resize(buffer_size);
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
buffer.write(data[i]);
|
||||
if (buffer.isFull()) {
|
||||
while (processBuffer());
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
void flush() {
|
||||
if (p_output == nullptr) return;
|
||||
while (processBuffer());
|
||||
}
|
||||
|
||||
/// Returns true if a valid frame has been detected
|
||||
bool isValid() { return last_frame_size > 0; }
|
||||
|
||||
/// parses the header string and returns true if this is a valid mp3 file
|
||||
bool isValid(const uint8_t* data, int len) {
|
||||
if (data == nullptr || len < 10) {
|
||||
LOGE("Invalid input data or too small");
|
||||
return false;
|
||||
}
|
||||
|
||||
header = FrameHeader{};
|
||||
int valid_frames_found = 0;
|
||||
int consecutive_frames = 0;
|
||||
const int MIN_FRAMES_TO_VALIDATE =
|
||||
3; // Require at least 3 consecutive valid frames
|
||||
const int MAX_SEARCH_DISTANCE =
|
||||
8192; // Limit search to prevent endless loops
|
||||
|
||||
// Check for ID3v2 tag at beginning
|
||||
if (len >= 10 && memcmp(data, "ID3", 3) == 0) {
|
||||
LOGI("ID3v2 tag found");
|
||||
// Skip ID3v2 tag to find actual audio data
|
||||
int id3_size = ((data[6] & 0x7F) << 21) | ((data[7] & 0x7F) << 14) |
|
||||
((data[8] & 0x7F) << 7) | (data[9] & 0x7F);
|
||||
int audio_start = 10 + id3_size;
|
||||
if (audio_start < len) {
|
||||
return isValid(data + audio_start, len - audio_start);
|
||||
}
|
||||
return true; // Valid ID3 tag, assume MP3
|
||||
}
|
||||
|
||||
// Look for first frame sync
|
||||
int sync_pos = seekFrameSync(data, min(len, MAX_SEARCH_DISTANCE));
|
||||
if (sync_pos == -1) {
|
||||
LOGE("No frame sync found in first %d bytes", MAX_SEARCH_DISTANCE);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Quick check for VBR headers (Xing/Info/VBRI)
|
||||
if (contains(data + sync_pos, "Xing", len - sync_pos) ||
|
||||
contains(data + sync_pos, "Info", len - sync_pos) ||
|
||||
contains(data + sync_pos, "VBRI", len - sync_pos)) {
|
||||
LOGI("VBR header found (Xing/Info/VBRI)");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Validate multiple consecutive frames for higher confidence
|
||||
int current_pos = sync_pos;
|
||||
FrameHeader first_header;
|
||||
bool first_header_set = false;
|
||||
|
||||
while (current_pos < len &&
|
||||
(current_pos - sync_pos) < MAX_SEARCH_DISTANCE) {
|
||||
int len_available = len - current_pos;
|
||||
|
||||
// Need at least header size
|
||||
if (len_available < (int)FrameHeader::SERIALIZED_SIZE) {
|
||||
LOGD("Not enough data for header at position %d", current_pos);
|
||||
break;
|
||||
}
|
||||
|
||||
// Read and validate frame header
|
||||
FrameHeader temp_header;
|
||||
if (!FrameHeader::decode(data + current_pos, temp_header) ||
|
||||
validateFrameHeader(temp_header) != FrameReason::VALID) {
|
||||
LOGD("Invalid frame header at position %d", current_pos);
|
||||
consecutive_frames = 0;
|
||||
// Look for next sync
|
||||
int next_sync_off =
|
||||
seekFrameSync(data + current_pos + 1, len - current_pos - 1);
|
||||
if (next_sync_off == -1) break;
|
||||
current_pos = current_pos + 1 + next_sync_off; // Adjust for offset
|
||||
continue;
|
||||
}
|
||||
|
||||
// Calculate frame length
|
||||
int frame_len = temp_header.getFrameLength();
|
||||
if (frame_len <= 0 || frame_len > 4096) {
|
||||
LOGD("Invalid frame length %d at position %d", frame_len, current_pos);
|
||||
consecutive_frames = 0;
|
||||
current_pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// For first valid frame, store header for consistency checking
|
||||
if (!first_header_set) {
|
||||
first_header = temp_header;
|
||||
first_header_set = true;
|
||||
header = temp_header; // Store for external access
|
||||
|
||||
// For small buffers, do additional single-frame validation
|
||||
if (len < 1024) {
|
||||
// Verify this looks like a reasonable MP3 frame
|
||||
if (temp_header.getSampleRate() == 0 ||
|
||||
temp_header.getBitRate() <= 0) {
|
||||
LOGD("Invalid audio parameters in frame at position %d",
|
||||
current_pos);
|
||||
first_header_set = false;
|
||||
consecutive_frames = 0;
|
||||
current_pos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if frame length is reasonable for the given bitrate
|
||||
int expected_frame_size =
|
||||
(temp_header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1)
|
||||
? (144 * temp_header.getBitRate() /
|
||||
temp_header.getSampleRate())
|
||||
: (72 * temp_header.getBitRate() /
|
||||
temp_header.getSampleRate());
|
||||
if (abs(frame_len - expected_frame_size) >
|
||||
expected_frame_size * 0.1) { // Allow 10% variance
|
||||
LOGD("Frame length %d doesn't match expected %d for bitrate",
|
||||
frame_len, expected_frame_size);
|
||||
first_header_set = false;
|
||||
consecutive_frames = 0;
|
||||
current_pos++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Check consistency with first frame (sample rate, version, layer
|
||||
// should match in CBR)
|
||||
if (temp_header.audioVersion != first_header.audioVersion ||
|
||||
temp_header.layer != first_header.layer ||
|
||||
temp_header.getSampleRate() != first_header.getSampleRate()) {
|
||||
LOGD("Frame parameters inconsistent at position %d", current_pos);
|
||||
// This might be VBR, but continue validation
|
||||
}
|
||||
}
|
||||
|
||||
valid_frames_found++;
|
||||
consecutive_frames++;
|
||||
|
||||
// Check if we have enough data for the complete frame
|
||||
if (len_available < frame_len) {
|
||||
LOGD("Incomplete frame at position %d (need %d, have %d)", current_pos,
|
||||
frame_len, len_available);
|
||||
break;
|
||||
}
|
||||
|
||||
// Look for next frame sync at expected position
|
||||
int next_pos = current_pos + frame_len;
|
||||
if (next_pos + 1 < len) {
|
||||
if (seekFrameSync(data + next_pos, min(4, len - next_pos)) == 0) {
|
||||
// Found sync at expected position
|
||||
current_pos = next_pos;
|
||||
continue;
|
||||
} else {
|
||||
LOGD("No sync at expected position %d", next_pos);
|
||||
consecutive_frames = 0;
|
||||
}
|
||||
} else {
|
||||
// End of data reached
|
||||
break;
|
||||
}
|
||||
|
||||
// If we lost sync, search for next frame
|
||||
int next_sync =
|
||||
seekFrameSync(data + current_pos + 1, len - current_pos - 1);
|
||||
if (next_sync == -1) break;
|
||||
current_pos = current_pos + 1 + next_sync;
|
||||
}
|
||||
|
||||
// Adaptive validation criteria based on available data
|
||||
bool is_valid_mp3 = false;
|
||||
|
||||
if (len >= 2048) {
|
||||
// For larger buffers, require strict consecutive frame validation
|
||||
is_valid_mp3 = (consecutive_frames >= MIN_FRAMES_TO_VALIDATE);
|
||||
} else if (len >= 1024) {
|
||||
// For 1KB+ buffers, require at least 2 consecutive frames OR 3 total
|
||||
// valid frames
|
||||
is_valid_mp3 = (consecutive_frames >= 2) ||
|
||||
(valid_frames_found >= MIN_FRAMES_TO_VALIDATE);
|
||||
} else {
|
||||
// For smaller buffers, be more lenient - 1 good frame with proper
|
||||
// validation
|
||||
is_valid_mp3 = (valid_frames_found >= 1) && first_header_set;
|
||||
}
|
||||
|
||||
if (is_valid_mp3 && first_header_set) {
|
||||
LOGI("-------------------");
|
||||
LOGI("MP3 validation: VALID");
|
||||
LOGI("Data size: %d bytes", len);
|
||||
LOGI("Valid frames found: %d", valid_frames_found);
|
||||
LOGI("Consecutive frames: %d", consecutive_frames);
|
||||
if (len >= 2048) {
|
||||
LOGI("Validation mode: STRICT (large buffer)");
|
||||
} else if (len >= 1024) {
|
||||
LOGI("Validation mode: MODERATE (1KB+ buffer)");
|
||||
} else {
|
||||
LOGI("Validation mode: LENIENT (small buffer)");
|
||||
}
|
||||
LOGI("Frame size: %d", getFrameLength());
|
||||
LOGI("Sample rate: %u", getSampleRate());
|
||||
LOGI("Bit rate: %d", getBitRate());
|
||||
LOGI("Padding: %d", getFrameHeader().padding);
|
||||
LOGI("Layer: %s (0x%x)", getLayerStr(), (int)getFrameHeader().layer);
|
||||
LOGI("Version: %s (0x%x)", getVersionStr(),
|
||||
(int)getFrameHeader().audioVersion);
|
||||
LOGI("-------------------");
|
||||
} else {
|
||||
LOGI("MP3 validation: INVALID (frames: %d, consecutive: %d, size: %d)",
|
||||
valid_frames_found, consecutive_frames, len);
|
||||
}
|
||||
|
||||
return is_valid_mp3;
|
||||
}
|
||||
|
||||
/// Sample rate from mp3 header
|
||||
uint16_t getSampleRate() const {
|
||||
return frame_header_valid ? header.getSampleRate() : 0;
|
||||
}
|
||||
|
||||
/// Bit rate from mp3 header
|
||||
int getBitRate() const {
|
||||
return frame_header_valid ? header.getBitRate() : 0;
|
||||
}
|
||||
|
||||
/// Number of channels from mp3 header
|
||||
int getChannels() const {
|
||||
if (!frame_header_valid) return 0;
|
||||
// SINGLE = mono (1 channel), all others = stereo (2 channels)
|
||||
return (header.channelMode == FrameHeader::ChannelModeID::SINGLE) ? 1 : 2;
|
||||
}
|
||||
|
||||
/// Frame length from mp3 header
|
||||
int getFrameLength() {
|
||||
return frame_header_valid ? header.getFrameLength() : 0;
|
||||
}
|
||||
|
||||
/// Provides the estimated playing time in seconds based on the bitrate of the
|
||||
/// first segment
|
||||
size_t getPlayingTime(size_t fileSizeBytes) {
|
||||
int bitrate = getBitRate();
|
||||
if (bitrate == 0) return 0;
|
||||
return fileSizeBytes / bitrate;
|
||||
}
|
||||
|
||||
/// Provides a string representation of the MPEG version
|
||||
const char* getVersionStr() const {
|
||||
return header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1 ? "1"
|
||||
: header.audioVersion == FrameHeader::MPEGVersionID::MPEG_2 ? "2"
|
||||
: header.audioVersion == FrameHeader::MPEGVersionID::MPEG_2_5
|
||||
? "2.5"
|
||||
: "INVALID";
|
||||
}
|
||||
|
||||
/// Provides a string representation of the MPEG layer
|
||||
const char* getLayerStr() const {
|
||||
return header.layer == FrameHeader::LayerID::LAYER_1 ? "1"
|
||||
: header.layer == FrameHeader::LayerID::LAYER_2 ? "2"
|
||||
: header.layer == FrameHeader::LayerID::LAYER_3 ? "3"
|
||||
: "INVALID";
|
||||
}
|
||||
|
||||
/// number of samples per mp3 frame
|
||||
int getSamplesPerFrame() {
|
||||
if (header.layer != FrameHeader::LayerID::LAYER_3) return 0;
|
||||
// samples for layer 3 are fixed
|
||||
return header.audioVersion == FrameHeader::MPEGVersionID::MPEG_1 ? 1152
|
||||
: 576;
|
||||
}
|
||||
|
||||
/// playing time per frame in ms
|
||||
size_t getTimePerFrameMs() {
|
||||
int sample_rate = getSampleRate();
|
||||
if (sample_rate == 0) return 0;
|
||||
return (1000 * getSamplesPerFrame()) / sample_rate;
|
||||
}
|
||||
|
||||
/// frame rate in Hz (frames per second)
|
||||
size_t getFrameRateHz() {
|
||||
int time_per_frame = getTimePerFrameMs();
|
||||
if (time_per_frame == 0) return 0;
|
||||
return 1000 / time_per_frame;
|
||||
}
|
||||
|
||||
// provides the parsed MP3 frame header
|
||||
FrameHeader getFrameHeader() {
|
||||
return frame_header_valid ? header : FrameHeader{};
|
||||
}
|
||||
|
||||
/// Returns true if we have parsed at least one valid frame
|
||||
bool hasValidFrame() const { return frame_header_valid; }
|
||||
|
||||
/// Clears internal buffer and resets state
|
||||
void reset() {
|
||||
buffer.reset();
|
||||
frame_header_valid = false;
|
||||
header = FrameHeader{};
|
||||
}
|
||||
|
||||
/// Finds the mp3/aac sync word
|
||||
int findSyncWord(const uint8_t* buf, size_t nBytes, uint8_t synch = 0xFF,
|
||||
uint8_t syncl = 0xF0) {
|
||||
for (int i = 0; i < nBytes - 1; i++) {
|
||||
if ((buf[i + 0] & synch) == synch && (buf[i + 1] & syncl) == syncl)
|
||||
return i;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
protected:
|
||||
FrameHeader header;
|
||||
Print* p_output = nullptr;
|
||||
SingleBuffer<uint8_t> buffer{0}; // Max MP3 frame ~4KB + reserves
|
||||
bool frame_header_valid = false;
|
||||
size_t buffer_size = 0;
|
||||
size_t last_frame_size = 0;
|
||||
|
||||
/// Processes the internal buffer to extract complete mp3 frames
|
||||
bool processBuffer() {
|
||||
bool progress = false;
|
||||
size_t available = buffer.available();
|
||||
|
||||
while (available >=
|
||||
FrameHeader::SERIALIZED_SIZE) { // Need 4 bytes for header
|
||||
// Get direct access to buffer data
|
||||
uint8_t* temp_data = buffer.data();
|
||||
|
||||
// Find frame sync
|
||||
int sync_pos = seekFrameSync(temp_data, available);
|
||||
if (sync_pos == -1) {
|
||||
// No sync found, keep last few bytes in case sync spans buffer boundary
|
||||
size_t to_remove = (available > 3) ? available - 3 : 0;
|
||||
if (to_remove > 0) {
|
||||
buffer.clearArray(to_remove);
|
||||
}
|
||||
// Recompute available after mutation
|
||||
available = buffer.available();
|
||||
break;
|
||||
}
|
||||
|
||||
// Remove any data before sync
|
||||
if (sync_pos > 0) {
|
||||
buffer.clearArray(sync_pos);
|
||||
progress = true;
|
||||
// Recompute available after mutation
|
||||
available = buffer.available();
|
||||
continue; // Check again from new position
|
||||
}
|
||||
|
||||
// We have sync at position 0, try to read header
|
||||
if (available < FrameHeader::SERIALIZED_SIZE) {
|
||||
break; // Need more data for complete header
|
||||
}
|
||||
|
||||
// Read and validate frame header
|
||||
FrameHeader temp_header;
|
||||
if (!FrameHeader::decode(temp_data, temp_header) ||
|
||||
validateFrameHeader(temp_header) != FrameReason::VALID) {
|
||||
// Invalid header, skip this sync and look for next
|
||||
buffer.clearArray(1);
|
||||
progress = true;
|
||||
available = buffer.available();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Calculate frame length
|
||||
int frame_len = temp_header.getFrameLength();
|
||||
if (frame_len <= 0 ||
|
||||
frame_len > buffer_size) { // Sanity check on frame size
|
||||
// Invalid frame length, skip this sync
|
||||
buffer.clearArray(1);
|
||||
progress = true;
|
||||
available = buffer.available();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if we have complete frame
|
||||
if (available < frame_len) {
|
||||
break; // Need more data for complete frame
|
||||
}
|
||||
|
||||
// Verify next frame sync if we have enough data
|
||||
if (available >= frame_len + 2) {
|
||||
if (seekFrameSync(temp_data + frame_len, 2) != 0) {
|
||||
// No sync at expected position, this might not be a valid frame
|
||||
buffer.clearArray(1);
|
||||
progress = true;
|
||||
available = buffer.available();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// We have a complete valid frame, write it to output
|
||||
if (p_output != nullptr) {
|
||||
size_t written = p_output->write(temp_data, frame_len);
|
||||
if (written != frame_len) {
|
||||
// Output error, we still need to remove the frame from buffer
|
||||
LOGE("Failed to write complete frame");
|
||||
}
|
||||
}
|
||||
|
||||
// Update header for external access
|
||||
last_frame_size = frame_len;
|
||||
header = temp_header;
|
||||
frame_header_valid = true;
|
||||
|
||||
// Remove processed frame from buffer
|
||||
buffer.clearArray(frame_len);
|
||||
available = buffer.available();
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool validate(const uint8_t* data, size_t len) {
|
||||
(void)data;
|
||||
(void)len;
|
||||
return FrameReason::VALID == validateFrameHeader(header);
|
||||
}
|
||||
|
||||
bool contains(const uint8_t* data, const char* toFind, size_t len) {
|
||||
if (data == nullptr || len == 0) return false;
|
||||
int find_str_len = strlen(toFind);
|
||||
for (int j = 0; j < len - find_str_len; j++) {
|
||||
if (memcmp(data + j, toFind, find_str_len) == 0) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Seeks to the byte at the end of the next continuous run of 11 set bits.
|
||||
//(ie. after seeking the cursor will be on the byte of which its 3 most
|
||||
// significant bits are part of the frame sync)
|
||||
int seekFrameSync(const uint8_t* str, size_t len) {
|
||||
for (int j = 0; j < static_cast<int>(len) - 1; j++) {
|
||||
// Look for 11-bit sync: 0xFFE? (0xFF followed by next byte with 0xE0 set)
|
||||
if (str[j] == 0xFF && (str[j + 1] & 0xE0) == 0xE0) {
|
||||
return j;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void readFrameHeader(const uint8_t* data) {
|
||||
if (!FrameHeader::decode(data, header)) return;
|
||||
LOGI("- sample rate: %u", getSampleRate());
|
||||
LOGI("- bit rate: %d", getBitRate());
|
||||
}
|
||||
|
||||
enum class FrameReason {
|
||||
VALID,
|
||||
INVALID_BITRATE_FOR_VERSION,
|
||||
INVALID_SAMPLERATE_FOR_VERSION,
|
||||
INVALID_MPEG_VERSION,
|
||||
INVALID_LAYER,
|
||||
INVALID_LAYER_II_BITRATE_AND_MODE,
|
||||
INVALID_EMPHASIS,
|
||||
INVALID_CRC,
|
||||
};
|
||||
|
||||
FrameReason validateFrameHeader(const FrameHeader& header) {
|
||||
if (header.audioVersion == FrameHeader::MPEGVersionID::INVALID) {
|
||||
LOGI("invalid mpeg version");
|
||||
return FrameReason::INVALID_MPEG_VERSION;
|
||||
}
|
||||
|
||||
if (header.layer == FrameHeader::LayerID::INVALID) {
|
||||
LOGI("invalid layer");
|
||||
return FrameReason::INVALID_LAYER;
|
||||
}
|
||||
|
||||
if (header.getBitRate() <= 0) {
|
||||
LOGI("invalid bitrate");
|
||||
return FrameReason::INVALID_BITRATE_FOR_VERSION;
|
||||
}
|
||||
|
||||
if (header.getSampleRate() ==
|
||||
(unsigned short)FrameHeader::SpecialSampleRate::RESERVED) {
|
||||
LOGI("invalid samplerate");
|
||||
return FrameReason::INVALID_SAMPLERATE_FOR_VERSION;
|
||||
}
|
||||
|
||||
// For Layer II there are some combinations of bitrate and mode which are
|
||||
// not allowed
|
||||
if (header.layer == FrameHeader::LayerID::LAYER_2) {
|
||||
if (header.channelMode == FrameHeader::ChannelModeID::SINGLE) {
|
||||
if (header.getBitRate() >= 224000) {
|
||||
LOGI("invalid bitrate >224000");
|
||||
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
|
||||
}
|
||||
} else {
|
||||
if (header.getBitRate() >= 32000 && header.getBitRate() <= 56000) {
|
||||
LOGI("invalid bitrate >32000");
|
||||
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
|
||||
}
|
||||
|
||||
if (header.getBitRate() == 80000) {
|
||||
LOGI("invalid bitrate >80000");
|
||||
return FrameReason::INVALID_LAYER_II_BITRATE_AND_MODE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (header.emphasis == FrameHeader::EmphasisID::INVALID) {
|
||||
LOGI("invalid Emphasis");
|
||||
return FrameReason::INVALID_EMPHASIS;
|
||||
}
|
||||
|
||||
return FrameReason::VALID;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,131 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/M4ACommonDemuxer.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief A simple M4A audio data demuxer which is providing
|
||||
* AAC, MP3 and ALAC frames.
|
||||
*/
|
||||
class M4AAudioDemuxer : public M4ACommonDemuxer {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor. Sets up parser callbacks.
|
||||
*/
|
||||
M4AAudioDemuxer() { setupParser(); }
|
||||
|
||||
/**
|
||||
* @brief Defines the callback that returns the audio frames.
|
||||
* @param cb Frame callback function.
|
||||
*/
|
||||
void setCallback(FrameCallback cb) override {
|
||||
sampleExtractor.setReference(ref);
|
||||
sampleExtractor.setCallback(cb);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initializes the demuxer and resets state.
|
||||
*/
|
||||
bool begin() {
|
||||
audio_config.codec = Codec::Unknown;
|
||||
audio_config.alacMagicCookie.clear();
|
||||
resize(default_size);
|
||||
|
||||
stsz_processed = false;
|
||||
stco_processed = false;
|
||||
|
||||
// When codec/sampleSizes/callback/ref change, update the extractor:
|
||||
parser.begin();
|
||||
sampleExtractor.begin();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Writes data to the demuxer for parsing.
|
||||
* @param data Pointer to input data.
|
||||
* @param len Length of input data.
|
||||
*/
|
||||
void write(const uint8_t* data, size_t len) { parser.write(data, len); }
|
||||
|
||||
/**
|
||||
* @brief Returns the available space for writing.
|
||||
* @return Number of bytes available for writing.
|
||||
*/
|
||||
int availableForWrite() { return parser.availableForWrite(); }
|
||||
|
||||
/**
|
||||
* @brief Returns the ALAC magic cookie (codec config).
|
||||
* @return Reference to the ALAC magic cookie vector.
|
||||
*/
|
||||
Vector<uint8_t>& getALACMagicCookie() { return audio_config.alacMagicCookie; }
|
||||
|
||||
/**
|
||||
* @brief Sets a reference pointer for callbacks.
|
||||
* @param ref Reference pointer.
|
||||
*/
|
||||
void setReference(void* ref) { this->ref = ref; }
|
||||
|
||||
void copyFrom(M4ACommonDemuxer& source) {
|
||||
audio_config = source.getM4AAudioConfig();
|
||||
}
|
||||
|
||||
protected:
|
||||
void* ref = nullptr; ///< Reference pointer for callbacks.
|
||||
|
||||
/**
|
||||
* @brief Setup all parser callbacks
|
||||
*/
|
||||
void setupParser() override {
|
||||
// global box data callback to get sizes
|
||||
parser.setReference(this);
|
||||
|
||||
// parsing for content of stsd (Sample Description Box)
|
||||
parser.setCallback("stsd", [](MP4Parser::Box& box, void* ref) {
|
||||
static_cast<M4AAudioDemuxer*>(ref)->onStsd(box);
|
||||
});
|
||||
|
||||
// parsing for content of stsd (Sample Description Box)
|
||||
parser.setCallback("esds", [](MP4Parser::Box& box, void* ref) {
|
||||
static_cast<M4AAudioDemuxer*>(ref)->onEsds(box);
|
||||
});
|
||||
parser.setCallback("mp4a", [](MP4Parser::Box& box, void* ref) {
|
||||
static_cast<M4AAudioDemuxer*>(ref)->onMp4a(box);
|
||||
});
|
||||
parser.setCallback("alac", [](MP4Parser::Box& box, void* ref) {
|
||||
static_cast<M4AAudioDemuxer*>(ref)->onAlac(box);
|
||||
});
|
||||
|
||||
// mdat
|
||||
parser.setCallback(
|
||||
"mdat",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
|
||||
// mdat must not be buffered
|
||||
LOGI("#%d Box: %s, size: %u of %u bytes", (unsigned) box.seq, box.type,(unsigned) box.available, (unsigned)box.size);
|
||||
if (box.seq == 0) self.sampleExtractor.setMaxSize(box.size);
|
||||
size_t written = self.sampleExtractor.write(box.data, box.available, box.is_complete);
|
||||
assert(written == box.available);
|
||||
},
|
||||
false); // 'false' prevents the generic callback from being executed
|
||||
|
||||
// stsz
|
||||
parser.setCallback(
|
||||
"stsz",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
|
||||
self.onStsz(box);
|
||||
},
|
||||
false); // 'false' prevents the generic callback from being executed
|
||||
|
||||
// parser.setCallback(
|
||||
// "stco",
|
||||
// [](MP4Parser::Box& box, void* ref) {
|
||||
// M4AAudioDemuxer& self = *static_cast<M4AAudioDemuxer*>(ref);
|
||||
// self.onStco(box);
|
||||
// },
|
||||
// false); // 'false' prevents the generic callback from being executed
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,340 @@
|
||||
#pragma once
|
||||
|
||||
#include <Arduino.h>
|
||||
#include <SD.h>
|
||||
|
||||
#include "M4AAudioDemuxer.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Demuxer for M4A/MP4 files to extract audio data using an Arduino File.
|
||||
* This class locates the mdat and stsz boxes using MP4Parser.
|
||||
*
|
||||
* It provides a copy() method to extract frames from the file by reading
|
||||
* sample sizes directly from the stsz box in the file. This class is quite
|
||||
* memory efficient because no table of sample sizes are kept in memory. It just
|
||||
* reads the sample sizes from the stsz box and uses the mdat offset to read the
|
||||
* sample data directly from the file.
|
||||
*
|
||||
* The result is written to the provided decoder or alternatively will be
|
||||
* provided via the frame_callback.
|
||||
*
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
class M4AAudioFileDemuxer : public M4ACommonDemuxer {
|
||||
public:
|
||||
using M4ACommonDemuxer::Frame;
|
||||
using M4ACommonDemuxer::FrameCallback;
|
||||
|
||||
/**
|
||||
* @brief Default constructor. Sets up parser callbacks.
|
||||
*/
|
||||
M4AAudioFileDemuxer() { setupParser(); };
|
||||
|
||||
/**
|
||||
* @brief Constructor with decoder.
|
||||
* @param decoder Reference to MultiDecoder.
|
||||
*/
|
||||
M4AAudioFileDemuxer(MultiDecoder& decoder) : M4AAudioFileDemuxer() {
|
||||
setDecoder(decoder);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets the decoder to use for audio frames. Please note that
|
||||
* calls setCallback() to register the decoder callback.
|
||||
* @param decoder Reference to MultiDecoder.
|
||||
* @return true if set successfully.
|
||||
*/
|
||||
bool setDecoder(MultiDecoder& decoder) {
|
||||
this->p_decoder = &decoder;
|
||||
if (decoder.getOutput() == nullptr) {
|
||||
LOGE("No output defined for MultiDecoder");
|
||||
return false;
|
||||
}
|
||||
setCallback([&decoder](const Frame& frame, void* /*ref*/) {
|
||||
LOGI("Decoding frame: %s with %d bytes", frame.mime, (int)frame.size);
|
||||
if (!decoder.selectDecoder(frame.mime)) {
|
||||
LOGE("Failed to select decoder for %s", frame.mime);
|
||||
return;
|
||||
}
|
||||
decoder.write(frame.data, frame.size);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets the callback for extracted audio frames.
|
||||
* @param cb Frame callback function.
|
||||
*/
|
||||
void setCallback(FrameCallback cb) override { frame_callback = cb; }
|
||||
|
||||
/**
|
||||
* @brief Sets the size of the samples buffer (in bytes).
|
||||
* @param size Buffer size in bytes.
|
||||
*/
|
||||
void setSamplesBufferSize(int size) {
|
||||
stsz_bufsize = size / 4;
|
||||
stsz_buf.resize(stsz_bufsize);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Open and parse the given file.
|
||||
* @param file Reference to an open Arduino File object.
|
||||
* @return true on success, false on failure.
|
||||
*/
|
||||
bool begin(File& file) {
|
||||
M4ACommonDemuxer::begin();
|
||||
this->p_file = &file;
|
||||
if (!file) return false;
|
||||
parser.begin();
|
||||
end();
|
||||
if (p_decoder) p_decoder->begin();
|
||||
if (!parseFile()) return false;
|
||||
if (!readStszHeader()) return false;
|
||||
if (!checkMdat()) return false;
|
||||
mdat_sample_pos = mdat_offset + mdat_pos;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief End demuxing and reset state.
|
||||
*/
|
||||
void end() {
|
||||
audio_config.codec = M4ACommonDemuxer::Codec::Unknown;
|
||||
audio_config.alacMagicCookie.clear();
|
||||
// resize(default_size);
|
||||
sample_index = 0;
|
||||
sample_count = 0;
|
||||
mdat_pos = 0;
|
||||
stsd_processed = false;
|
||||
mdat_offset = 0;
|
||||
mdat_size = 0;
|
||||
stsz_offset = 0;
|
||||
stsz_size = 0;
|
||||
mdat_pos = 0;
|
||||
fixed_sample_size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copies the next audio frame from the file using the sample size
|
||||
* table and mdat offset. Calls the frame callback if set.
|
||||
* @return true if a frame was copied and callback called, false if end of
|
||||
* samples or error.
|
||||
*/
|
||||
bool copy() {
|
||||
if (!p_file || sample_index >= sample_count) return false;
|
||||
size_t currentSize = getNextSampleSize();
|
||||
if (currentSize == 0) return false;
|
||||
if (!p_file->seek(mdat_sample_pos)) return false;
|
||||
if (buffer.size() < currentSize) buffer.resize(currentSize);
|
||||
size_t bytesRead = p_file->read(buffer.data(), currentSize);
|
||||
if (bytesRead != currentSize) return false;
|
||||
buffer.setWritePos(bytesRead);
|
||||
executeCallback(currentSize, buffer);
|
||||
mdat_sample_pos += currentSize;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true as long as there are samples to process.
|
||||
operator bool() { return sample_count > 0 && sample_index < sample_count; }
|
||||
|
||||
uint32_t sampleIndex() const { return sample_index; }
|
||||
|
||||
uint32_t size() const { return sample_count; }
|
||||
|
||||
uint32_t getMdatOffset() const { return mdat_offset; }
|
||||
|
||||
/**
|
||||
* @brief Provides the next sample size (= frame size) from the stsz box queue
|
||||
* @return stsz sample size in bytes.
|
||||
*/
|
||||
uint32_t getNextSampleSize() {
|
||||
assert(p_file != nullptr);
|
||||
if (sample_index >= sample_count) return 0;
|
||||
uint32_t currentSize = 0;
|
||||
if (fixed_sample_size) {
|
||||
currentSize = fixed_sample_size;
|
||||
} else {
|
||||
// if buffer is empty, fill it again
|
||||
if (stsz_buf.isEmpty()) {
|
||||
uint64_t pos = stsz_offset + 20 + sample_index * 4;
|
||||
if (!p_file->seek(pos)) return false;
|
||||
stsz_buf.clear();
|
||||
size_t read_bytes = p_file->read(
|
||||
reinterpret_cast<uint8_t*>(stsz_buf.data()), stsz_bufsize * 4);
|
||||
stsz_buf.setWritePos(read_bytes / 4);
|
||||
if (stsz_buf.isEmpty()) return 0;
|
||||
}
|
||||
// provide next size
|
||||
uint32_t val = 0;
|
||||
if (!stsz_buf.read(val)) return 0;
|
||||
currentSize = readU32(val);
|
||||
}
|
||||
sample_index++;
|
||||
return currentSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initializes the demuxer for reading sample sizes from the stsz box.
|
||||
*
|
||||
* This method sets the file pointer, resets the sample index, sets the total
|
||||
* sample count, and records the offset of the stsz box in the file. It is
|
||||
* typically called before reading sample sizes directly from the file,
|
||||
* ensuring the demuxer is properly positioned.
|
||||
*
|
||||
* @param filePtr Pointer to the open file.
|
||||
* @param sampleCount Total number of samples in the file.
|
||||
* @param stszOffset Offset of the stsz box in the file.
|
||||
*/
|
||||
|
||||
void beginSampleSizeAccess(File* filePtr, uint32_t sampleCount,
|
||||
uint32_t stszOffset) {
|
||||
p_file = filePtr;
|
||||
sample_index = 0;
|
||||
sample_count = sampleCount;
|
||||
stsz_offset = stszOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Parses the file and feeds data to the parser until we have
|
||||
* all the necessary data: 1) stsd box processed, 2) mdat offset found,
|
||||
* 3) stsz offset found.
|
||||
* Usually this method is not needed, but it comes in handy if you need
|
||||
* to process a file which is not in streaming format!
|
||||
* @param file Reference to the file to parse.
|
||||
*/
|
||||
bool parseFile() {
|
||||
uint8_t buffer[1024];
|
||||
p_file->seek(0);
|
||||
while (p_file->available()) {
|
||||
int to_read = min(sizeof(buffer), parser.availableForWrite());
|
||||
size_t len = p_file->read(buffer, to_read);
|
||||
parser.write(buffer, len);
|
||||
// stop if we have all the data
|
||||
if (stsd_processed && mdat_offset && stsz_offset) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
File* p_file = nullptr; ///< Pointer to the open file
|
||||
uint64_t mdat_offset = 0; ///< Offset of mdat box payload
|
||||
uint64_t mdat_size = 0; ///< Size of mdat box payload
|
||||
uint64_t stsz_offset = 0; ///< Offset of stsz box
|
||||
uint64_t stsz_size = 0; ///< Size of stsz box
|
||||
uint32_t sample_index = 0; ///< Current sample index
|
||||
uint64_t mdat_pos = 0; ///< Current position in mdat box
|
||||
SingleBuffer<uint8_t> buffer; ///< Buffer for sample data
|
||||
int stsz_bufsize = 256; ///< Number of sample sizes to buffer
|
||||
SingleBuffer<uint32_t> stsz_buf{
|
||||
stsz_bufsize}; ///< Buffer for stsz sample sizes
|
||||
uint32_t fixed_sample_size = 0; ///< Fixed sample size (if nonzero)
|
||||
MultiDecoder* p_decoder = nullptr; ///< Pointer to decoder
|
||||
uint64_t mdat_sample_pos = 0;
|
||||
/**
|
||||
* @brief Sets up the MP4 parser and registers box callbacks.
|
||||
*/
|
||||
void setupParser() override {
|
||||
parser.setReference(this);
|
||||
|
||||
// Callback for ESDS box (AAC config)
|
||||
parser.setCallback(
|
||||
"esds",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
static_cast<M4AAudioFileDemuxer*>(ref)->onEsds(box);
|
||||
},
|
||||
false);
|
||||
|
||||
// Callback for MP4A box (AAC sample entry)
|
||||
parser.setCallback(
|
||||
"mp4a",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
static_cast<M4AAudioFileDemuxer*>(ref)->onMp4a(box);
|
||||
},
|
||||
false);
|
||||
|
||||
// Callback for ALAC box (ALAC sample entry)
|
||||
parser.setCallback(
|
||||
"alac",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
static_cast<M4AAudioFileDemuxer*>(ref)->onAlac(box);
|
||||
},
|
||||
false);
|
||||
|
||||
// Callback for STSZ box (sample sizes)
|
||||
parser.setCallback(
|
||||
"stsz",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
auto* self = static_cast<M4AAudioFileDemuxer*>(ref);
|
||||
if (box.seq == 0) {
|
||||
self->stsz_offset = box.file_offset;
|
||||
self->stsz_size = box.size;
|
||||
}
|
||||
},
|
||||
false);
|
||||
|
||||
// Callback for MDAT box (media data)
|
||||
parser.setCallback(
|
||||
"mdat",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
auto* self = static_cast<M4AAudioFileDemuxer*>(ref);
|
||||
if (box.seq == 0) {
|
||||
self->mdat_offset = box.file_offset + 8; // skip box header
|
||||
self->mdat_size = box.size;
|
||||
}
|
||||
},
|
||||
false);
|
||||
|
||||
// Callback for STSD box (sample description)
|
||||
parser.setCallback(
|
||||
"stsd",
|
||||
[](MP4Parser::Box& box, void* ref) {
|
||||
auto* self = static_cast<M4AAudioFileDemuxer*>(ref);
|
||||
self->onStsd(box); // for aac and alac
|
||||
self->stsd_processed = true;
|
||||
},
|
||||
false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Executes the callback for a completed frame.
|
||||
* @param size Size of the frame.
|
||||
* @param buffer Buffer containing the frame data.
|
||||
*/
|
||||
void executeCallback(size_t size, SingleBuffer<uint8_t>& buffer) {
|
||||
Frame frame = sampleExtractor.getFrame(size, buffer);
|
||||
if (frame_callback)
|
||||
frame_callback(frame, nullptr);
|
||||
else
|
||||
LOGW("No frame callback defined");
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads the stsz header (sample count and fixed sample size) from
|
||||
* the file.
|
||||
* @return true if successful, false otherwise.
|
||||
*/
|
||||
bool readStszHeader() {
|
||||
if (!p_file || stsz_offset == 0) return false;
|
||||
uint8_t buffer[20];
|
||||
if (!p_file->seek(stsz_offset)) return false;
|
||||
if (p_file->read(buffer, 20) != 20) return false;
|
||||
if (!checkType(buffer, "stsz", 4)) return false;
|
||||
uint8_t* cont = buffer + 8;
|
||||
fixed_sample_size = readU32(cont + 4);
|
||||
sample_count = readU32(cont + 8);
|
||||
stsz_processed = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool checkMdat() {
|
||||
p_file->seek(mdat_offset - 8);
|
||||
uint8_t buffer[8];
|
||||
if (p_file->read(buffer, 8) != 8) return false;
|
||||
return checkType(buffer, "mdat", 4);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,695 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "AudioTools/AudioCodecs/MP4Parser.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
#include "MP4Parser.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/// The stsz sample size type should usually be uint32_t: However for audio
|
||||
/// we expect that the sample size is usually aound 1 - 2k, so uint16_t
|
||||
/// should be more then sufficient! Microcontolles only have a limited
|
||||
/// amount of RAM, so this makes a big difference!
|
||||
/// @brief Sample size type optimized for microcontrollers
|
||||
/// @ingroup codecs
|
||||
using stsz_sample_size_t = uint16_t;
|
||||
|
||||
/**
|
||||
* @brief Abstract base class for M4A/MP4 demuxers.
|
||||
* Provides shared functionality for both file-based and stream-based demuxers.
|
||||
*/
|
||||
class M4ACommonDemuxer {
|
||||
public:
|
||||
enum class Codec { Unknown, AAC, ALAC, MP3 };
|
||||
|
||||
struct Frame {
|
||||
Codec codec;
|
||||
const char* mime = nullptr;
|
||||
const uint8_t* data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
struct M4AAudioConfig {
|
||||
Codec codec = Codec::Unknown; ///< Current codec.
|
||||
// aac
|
||||
int aacProfile = 2, sampleRateIdx = 4, channelCfg = 2; ///< AAC config.
|
||||
// cookie
|
||||
Vector<uint8_t> alacMagicCookie; ///< ALAC codec config.
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A parser for the ESDS segment to extract the relevant aac
|
||||
* information.
|
||||
*
|
||||
*/
|
||||
struct ESDSParser {
|
||||
uint8_t audioObjectType;
|
||||
uint8_t samplingRateIndex;
|
||||
uint8_t channelConfiguration;
|
||||
|
||||
// Parses esds content to extract audioObjectType, frequencyIndex, and
|
||||
// channelConfiguration
|
||||
bool parse(const uint8_t* data, size_t size) {
|
||||
const uint8_t* ptr = data;
|
||||
const uint8_t* end = data + size;
|
||||
|
||||
if (ptr + 4 > end) return false;
|
||||
ptr += 4; // skip version + flags
|
||||
|
||||
if (ptr >= end || *ptr++ != 0x03) return false;
|
||||
size_t es_len = parse_descriptor_length(ptr, end);
|
||||
if (ptr + es_len > end) return false;
|
||||
|
||||
ptr += 2; // skip ES_ID
|
||||
ptr += 1; // skip flags
|
||||
|
||||
if (ptr >= end || *ptr++ != 0x04) return false;
|
||||
size_t dec_len = parse_descriptor_length(ptr, end);
|
||||
if (ptr + dec_len > end) return false;
|
||||
|
||||
ptr += 13; // skip objectTypeIndication, streamType, bufferSizeDB,
|
||||
// maxBitrate, avgBitrate
|
||||
|
||||
if (ptr >= end || *ptr++ != 0x05) return false;
|
||||
size_t dsi_len = parse_descriptor_length(ptr, end);
|
||||
if (ptr + dsi_len > end || dsi_len < 2) return false;
|
||||
|
||||
uint8_t byte1 = ptr[0];
|
||||
uint8_t byte2 = ptr[1];
|
||||
|
||||
audioObjectType = (byte1 >> 3) & 0x1F;
|
||||
samplingRateIndex = ((byte1 & 0x07) << 1) | ((byte2 >> 7) & 0x01);
|
||||
channelConfiguration = (byte2 >> 3) & 0x0F;
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Helper to decode variable-length descriptor lengths (e.g. 0x80 80 80 05)
|
||||
inline size_t parse_descriptor_length(const uint8_t*& ptr,
|
||||
const uint8_t* end) {
|
||||
size_t len = 0;
|
||||
for (int i = 0; i < 4 && ptr < end; ++i) {
|
||||
uint8_t b = *ptr++;
|
||||
len = (len << 7) | (b & 0x7F);
|
||||
if ((b & 0x80) == 0) break;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Extracts audio data based on the sample sizes defined in the stsz
|
||||
* box. It collects the data from the mdat box and calls the callback with the
|
||||
* extracted frames.
|
||||
*/
|
||||
class SampleExtractor {
|
||||
public:
|
||||
using Frame = M4ACommonDemuxer::Frame;
|
||||
using Codec = M4ACommonDemuxer::Codec;
|
||||
using M4AAudioConfig = M4ACommonDemuxer::M4AAudioConfig;
|
||||
using FrameCallback = std::function<void(const Frame&, void*)>;
|
||||
|
||||
/**
|
||||
* @brief Constructor. Initializes the extractor.
|
||||
*/
|
||||
SampleExtractor(M4AAudioConfig& cfg) : audio_config{cfg} { begin(); }
|
||||
|
||||
/**
|
||||
* @brief Resets the extractor state.
|
||||
*/
|
||||
void begin() {
|
||||
sampleIndex = 0;
|
||||
buffer.clear();
|
||||
p_chunk_offsets->clear();
|
||||
p_sample_sizes->clear();
|
||||
buffer.resize(1024);
|
||||
current_size = 0;
|
||||
box_pos = 0;
|
||||
box_size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets the callback to be called for each extracted frame.
|
||||
* @param cb Callback function.
|
||||
*/
|
||||
void setCallback(FrameCallback cb) { callback = cb; }
|
||||
|
||||
/**
|
||||
* @brief Sets a reference pointer passed to the callback.
|
||||
* @param r Reference pointer.
|
||||
*/
|
||||
void setReference(void* r) { ref = r; }
|
||||
|
||||
/**
|
||||
* @brief Sets the maximum box size (e.g., for mdat). This is called before
|
||||
* the mdat data is posted. In order to be able to play a file multiple
|
||||
* times we just reset the sampleIndex!
|
||||
* @param size Maximum size in bytes.
|
||||
*/
|
||||
void setMaxSize(size_t size) {
|
||||
box_size = size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Writes data to the extractor, extracting frames as sample sizes
|
||||
* are met. Provides the data via the callback.
|
||||
* @param data Pointer to input data.
|
||||
* @param len Length of input data.
|
||||
* @param is_final True if this is the last chunk of the box.
|
||||
* @return Number of bytes processed.
|
||||
*/
|
||||
size_t write(const uint8_t* data, size_t len, bool is_final) {
|
||||
// Resize buffer to the current sample size
|
||||
size_t currentSize = currentSampleSize();
|
||||
if (currentSize == 0) {
|
||||
LOGE("No sample size defined: e.g. mdat before stsz!");
|
||||
return 0;
|
||||
}
|
||||
resize(currentSize);
|
||||
|
||||
/// fill buffer up to the current sample size
|
||||
for (int j = 0; j < len; j++) {
|
||||
assert(buffer.write(data[j]));
|
||||
if (buffer.available() >= currentSize) {
|
||||
LOGI("Sample# %zu: size %zu bytes", sampleIndex, currentSize);
|
||||
executeCallback(currentSize);
|
||||
buffer.clear();
|
||||
box_pos += currentSize;
|
||||
++sampleIndex;
|
||||
currentSize = currentSampleSize();
|
||||
if (box_pos >= box_size) {
|
||||
LOGI("Reached end of box: %s write",
|
||||
is_final ? "final" : "not final");
|
||||
return j;
|
||||
}
|
||||
if (currentSize == 0) {
|
||||
LOGE("No sample size defined, cannot write data");
|
||||
return j;
|
||||
}
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the buffer of sample sizes.
|
||||
* @return Reference to the buffer of sample sizes.
|
||||
*/
|
||||
BaseBuffer<stsz_sample_size_t>& getSampleSizesBuffer() {
|
||||
return *p_sample_sizes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets the buffer to use for sample sizes.
|
||||
* @param buffer Reference to the buffer to use.
|
||||
*/
|
||||
void setSampleSizesBuffer(BaseBuffer<stsz_sample_size_t>& buffer) {
|
||||
p_sample_sizes = &buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the buffer of chunk offsets.
|
||||
* @return Reference to the buffer of chunk offsets.
|
||||
*/
|
||||
BaseBuffer<uint32_t>& getChunkOffsetsBuffer() { return *p_chunk_offsets; }
|
||||
|
||||
/**
|
||||
* @brief Sets the buffer to use for chunk offsets.
|
||||
* @param buffer Reference to the buffer to use.
|
||||
*/
|
||||
void setChunkOffsetsBuffer(BaseBuffer<uint32_t>& buffer) {
|
||||
p_chunk_offsets = &buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets a fixed sample size/count instead of using the sampleSizes
|
||||
* table.
|
||||
* @param sampleSize Size of each sample.
|
||||
* @param sampleCount Number of samples.
|
||||
*/
|
||||
void setFixedSampleCount(uint32_t sampleSize, uint32_t sampleCount) {
|
||||
fixed_sample_size = sampleSize;
|
||||
fixed_sample_count = sampleCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Constructs a Frame object for the current codec.
|
||||
* @param size Size of the frame.
|
||||
* @param buffer SingleBuffer with data.
|
||||
* @return Frame object.
|
||||
*/
|
||||
Frame getFrame(size_t size, SingleBuffer<uint8_t>& buffer) {
|
||||
Frame frame;
|
||||
frame.codec = audio_config.codec;
|
||||
frame.data = buffer.data();
|
||||
frame.size = size;
|
||||
switch (audio_config.codec) {
|
||||
case Codec::AAC: {
|
||||
// Prepare ADTS header + AAC frame
|
||||
tmp.resize(size + 7);
|
||||
writeAdtsHeader(tmp.data(), audio_config.aacProfile,
|
||||
audio_config.sampleRateIdx, audio_config.channelCfg,
|
||||
size);
|
||||
memcpy(tmp.data() + 7, buffer.data(), size);
|
||||
frame.data = tmp.data();
|
||||
frame.size = size + 7;
|
||||
frame.mime = "audio/aac";
|
||||
break;
|
||||
}
|
||||
case Codec::ALAC:
|
||||
frame.mime = "audio/alac";
|
||||
break;
|
||||
case Codec::MP3:
|
||||
frame.mime = "audio/mpeg";
|
||||
break;
|
||||
default:
|
||||
frame.mime = nullptr;
|
||||
break;
|
||||
}
|
||||
return frame;
|
||||
}
|
||||
|
||||
protected:
|
||||
M4AAudioConfig& audio_config;
|
||||
SingleBuffer<stsz_sample_size_t>
|
||||
defaultSampleSizes; ///< Table of sample sizes.
|
||||
SingleBuffer<uint32_t> defaultChunkOffsets; ///< Table of chunk offsets.
|
||||
BaseBuffer<stsz_sample_size_t>* p_sample_sizes = &defaultSampleSizes;
|
||||
BaseBuffer<uint32_t>* p_chunk_offsets = &defaultChunkOffsets;
|
||||
Vector<uint8_t> tmp;
|
||||
FrameCallback callback = nullptr; ///< Frame callback.
|
||||
void* ref = nullptr; ///< Reference pointer for callback.
|
||||
size_t sampleIndex = 0; ///< Current sample index.
|
||||
SingleBuffer<uint8_t> buffer; ///< Buffer for accumulating sample data.
|
||||
uint32_t fixed_sample_size = 0; ///< Fixed sample size (if used).
|
||||
uint32_t fixed_sample_count = 0; ///< Fixed sample count (if used).
|
||||
size_t current_size = 0; ///< Current sample size.
|
||||
size_t box_size = 0; ///< Maximum size of the current sample.
|
||||
size_t box_pos = 0; ///< Current position in the box.
|
||||
|
||||
/**
|
||||
* @brief Executes the callback for a completed frame.
|
||||
* @param size Size of the frame.
|
||||
*/
|
||||
void executeCallback(size_t size) {
|
||||
Frame frame = getFrame(size, buffer);
|
||||
if (callback)
|
||||
callback(frame, ref);
|
||||
else
|
||||
LOGE("No callback defined for audio frame extraction");
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Resizes the internal buffer if needed.
|
||||
* @param newSize New buffer size.
|
||||
*/
|
||||
void resize(size_t newSize) {
|
||||
if (buffer.size() < newSize) {
|
||||
buffer.resize(newSize);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the current sample size.
|
||||
* @return Size of the current sample.
|
||||
*/
|
||||
size_t currentSampleSize() {
|
||||
static size_t last_index = -1;
|
||||
static size_t last_size = -1;
|
||||
|
||||
// Return cached size
|
||||
if (sampleIndex == last_index) {
|
||||
return last_size;
|
||||
}
|
||||
|
||||
// using fixed sizes w/o table
|
||||
if (fixed_sample_size > 0 && fixed_sample_count > 0 &&
|
||||
sampleIndex < fixed_sample_count) {
|
||||
return fixed_sample_size;
|
||||
}
|
||||
stsz_sample_size_t nextSize = 0;
|
||||
if (p_sample_sizes->read(nextSize)) {
|
||||
last_index = sampleIndex;
|
||||
last_size = nextSize;
|
||||
return nextSize;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Writes an ADTS header for an AAC frame.
|
||||
* @param adts Output buffer for the header.
|
||||
* @param aacProfile AAC profile.
|
||||
* @param sampleRateIdx Sample rate index.
|
||||
* @param channelCfg Channel configuration.
|
||||
* @param frameLen Frame length.
|
||||
*/
|
||||
static void writeAdtsHeader(uint8_t* adts, int aacProfile,
|
||||
int sampleRateIdx, int channelCfg,
|
||||
int frameLen) {
|
||||
adts[0] = 0xFF;
|
||||
adts[1] = 0xF1;
|
||||
adts[2] = ((aacProfile - 1) << 6) | (sampleRateIdx << 2) |
|
||||
((channelCfg >> 2) & 0x1);
|
||||
adts[3] = ((channelCfg & 0x3) << 6) | ((frameLen + 7) >> 11);
|
||||
adts[4] = ((frameLen + 7) >> 3) & 0xFF;
|
||||
adts[5] = (((frameLen + 7) & 0x7) << 5) | 0x1F;
|
||||
adts[6] = 0xFC;
|
||||
}
|
||||
};
|
||||
|
||||
using FrameCallback = std::function<void(const Frame&, void* ref)>;
|
||||
|
||||
M4ACommonDemuxer() = default;
|
||||
virtual ~M4ACommonDemuxer() = default;
|
||||
|
||||
/**
|
||||
* @brief Sets the callback for extracted audio frames.
|
||||
* @param cb Frame callback function.
|
||||
*/
|
||||
virtual void setCallback(FrameCallback cb) { frame_callback = cb; }
|
||||
/**
|
||||
* @brief Sets the buffer to use for sample sizes.
|
||||
* @param buffer Reference to the buffer to use.
|
||||
*/
|
||||
void setSampleSizesBuffer(BaseBuffer<stsz_sample_size_t>& buffer) {
|
||||
sampleExtractor.setSampleSizesBuffer(buffer);
|
||||
}
|
||||
/**
|
||||
* @brief Sets the buffer to use for sample sizes.
|
||||
* @param buffer Reference to the buffer to use.
|
||||
*/
|
||||
void setChunkOffsetsBuffer(BaseBuffer<uint32_t>& buffer) {
|
||||
sampleExtractor.setChunkOffsetsBuffer(buffer);
|
||||
}
|
||||
|
||||
void begin() {
|
||||
stsz_processed = false;
|
||||
stco_processed = false;
|
||||
audio_config.alacMagicCookie.clear();
|
||||
audio_config.codec = Codec::Unknown;
|
||||
parser.begin();
|
||||
sampleExtractor.begin();
|
||||
chunk_offsets_count = 0;
|
||||
sample_count = 0;
|
||||
}
|
||||
/**
|
||||
* @brief Sets the AAC configuration for ADTS header generation.
|
||||
* @param profile AAC profile.
|
||||
* @param srIdx Sample rate index.
|
||||
* @param chCfg Channel configuration.
|
||||
*/
|
||||
void setAACConfig(int profile, int srIdx, int chCfg) {
|
||||
audio_config.aacProfile = profile;
|
||||
audio_config.sampleRateIdx = srIdx;
|
||||
audio_config.channelCfg = chCfg;
|
||||
}
|
||||
|
||||
void setM4AAudioConfig(M4AAudioConfig cfg) { audio_config = cfg; }
|
||||
|
||||
M4AAudioConfig getM4AAudioConfig() { return audio_config; }
|
||||
|
||||
void resize(int size) {
|
||||
default_size = size;
|
||||
if (buffer.size() < size) {
|
||||
buffer.resize(size);
|
||||
}
|
||||
}
|
||||
|
||||
/// File offset of stsz box
|
||||
uint32_t getStszFileOffset() const {
|
||||
return stsz_offset;
|
||||
}
|
||||
|
||||
/// samples in stsz
|
||||
uint32_t getSampleCount() const {
|
||||
return sample_count;
|
||||
}
|
||||
|
||||
virtual void setupParser() = 0;
|
||||
|
||||
protected:
|
||||
FrameCallback frame_callback = nullptr;
|
||||
SampleExtractor sampleExtractor{
|
||||
audio_config}; ///< Extractor for audio samples.
|
||||
MP4Parser parser; ///< Underlying MP4 parser.
|
||||
bool stsz_processed = false; ///< Marks the stsz table as processed
|
||||
bool stco_processed = false; ///< Marks the stco table as processed
|
||||
bool stsd_processed = false;
|
||||
M4AAudioConfig audio_config;
|
||||
SingleBuffer<uint8_t> buffer; ///< Buffer for incremental data.
|
||||
uint32_t sample_count = 0; ///< Number of samples in stsz
|
||||
uint32_t stsz_offset = 0;
|
||||
uint32_t chunk_offsets_count = 0;
|
||||
size_t default_size = 2 * 1024; ///< Default buffer size.
|
||||
|
||||
/**
|
||||
* @brief Reads a 32-bit big-endian unsigned integer from a buffer.
|
||||
* @param p Pointer to buffer.
|
||||
* @return 32-bit unsigned integer.
|
||||
*/
|
||||
static uint32_t readU32(const uint8_t* p) {
|
||||
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
|
||||
}
|
||||
|
||||
static uint32_t readU32(const uint32_t num) {
|
||||
uint8_t* p = (uint8_t*)#
|
||||
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
|
||||
}
|
||||
|
||||
uint32_t readU32Buffer() {
|
||||
uint32_t nextSize = 0;
|
||||
buffer.readArray((uint8_t*)&nextSize, 4);
|
||||
return readU32(nextSize);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Checks if the buffer at the given offset matches the specified type.
|
||||
* @param buffer Pointer to the buffer.
|
||||
* @param type 4-character type string (e.g. "mp4a").
|
||||
* @param offset Offset in the buffer to check.
|
||||
* @return true if the type matches, false otherwise.
|
||||
*/
|
||||
bool checkType(uint8_t* buffer, const char* type, int offset) {
|
||||
if (buffer == nullptr || type == nullptr) return false;
|
||||
bool result = buffer[offset] == type[0] && buffer[offset + 1] == type[1] &&
|
||||
buffer[offset + 2] == type[2] &&
|
||||
buffer[offset + 3] == type[3];
|
||||
return result;
|
||||
}
|
||||
|
||||
void onStsd(const MP4Parser::Box& box) {
|
||||
LOGI("Box: %s, size: %u bytes", box.type, (unsigned)box.available);
|
||||
if (box.seq == 0) {
|
||||
resize(box.size);
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
buffer.writeArray(box.data, box.data_size);
|
||||
|
||||
if (box.is_complete && buffer.available() >= 8) {
|
||||
// printHexDump(box);
|
||||
uint32_t entryCount = readU32(buffer.data() + 4);
|
||||
// One or more sample entry boxes (e.g. mp4a, .mp3, alac)
|
||||
parser.parseString(buffer.data() + 8, box.data_size - 8,
|
||||
box.file_offset + 8 + 8, box.level + 1);
|
||||
buffer.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Handles the mp4a box.
|
||||
* @param box MP4 box.
|
||||
*/
|
||||
void onMp4a(const MP4Parser::Box& box) {
|
||||
LOGI("onMp4a: %s, size: %zu bytes", box.type, box.data_size);
|
||||
|
||||
if (box.is_complete) {
|
||||
// printHexDump(box);
|
||||
|
||||
// use default configuration
|
||||
int aacProfile = 2; // Default: AAC LC
|
||||
int sampleRateIdx = 4; // Default: 44100 Hz
|
||||
int channelCfg = 2; // Default: Stereo
|
||||
setAACConfig(aacProfile, sampleRateIdx, channelCfg);
|
||||
audio_config.codec = Codec::AAC;
|
||||
|
||||
/// for mp4a we expect to contain a esds: child boxes start at 36
|
||||
int pos = 36 - 8;
|
||||
parser.parseString(box.data + pos, box.data_size - pos, box.level + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Handles the esds (Elementary Stream Descriptor) box.
|
||||
* @param box MP4 box.
|
||||
*/
|
||||
void onEsds(const MP4Parser::Box& box) {
|
||||
LOGI("onEsds: %s, size: %zu bytes", box.type, box.data_size);
|
||||
// printHexDump(box);
|
||||
ESDSParser esdsParser;
|
||||
if (!esdsParser.parse(box.data, box.data_size)) {
|
||||
LOGE("Failed to parse esds box");
|
||||
return;
|
||||
}
|
||||
LOGI(
|
||||
"-> esds: AAC objectType: %u, samplingRateIdx: %u, "
|
||||
"channelCfg: %u",
|
||||
esdsParser.audioObjectType, esdsParser.samplingRateIndex,
|
||||
esdsParser.channelConfiguration);
|
||||
setAACConfig(esdsParser.audioObjectType, esdsParser.samplingRateIndex,
|
||||
esdsParser.channelConfiguration);
|
||||
}
|
||||
|
||||
// void fixALACMagicCookie(uint8_t* cookie, size_t len) {
|
||||
// if (len < 28) {
|
||||
// return;
|
||||
// }
|
||||
|
||||
// // Helper to read/write big-endian
|
||||
// auto read32 = [](uint8_t* p) -> uint32_t {
|
||||
// return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
|
||||
// };
|
||||
// auto write32 = [](uint8_t* p, uint32_t val) {
|
||||
// p[0] = (val >> 24) & 0xFF;
|
||||
// p[1] = (val >> 16) & 0xFF;
|
||||
// p[2] = (val >> 8) & 0xFF;
|
||||
// p[3] = val & 0xFF;
|
||||
// };
|
||||
// auto read16 = [](uint8_t* p) -> uint16_t { return (p[0] << 8) | p[1]; };
|
||||
// auto write16 = [](uint8_t* p, uint16_t val) {
|
||||
// p[0] = (val >> 8) & 0xFF;
|
||||
// p[1] = val & 0xFF;
|
||||
// };
|
||||
|
||||
// // Fix values if zero or invalid
|
||||
// if (read32(cookie + 0) == 0) write32(cookie + 0, 4096); // frameLength
|
||||
// if (cookie[6] == 0) cookie[6] = 16; // bitDepth
|
||||
// if (cookie[7] == 0 || cookie[7] > 32) cookie[7] = 10; // pb
|
||||
// if (cookie[8] == 0 || cookie[8] > 32) cookie[8] = 14; // mb
|
||||
// if (cookie[9] == 0 || cookie[9] > 32) cookie[9] = 10; // kb
|
||||
// if (cookie[10] == 0 || cookie[10] > 8) cookie[10] = 2; // numChannels
|
||||
// if (read16(cookie + 11) == 0) write16(cookie + 11, 255); // maxRun
|
||||
// if (read32(cookie + 13) == 0) write32(cookie + 13, 8192); // maxFrameBytes
|
||||
// if (read32(cookie + 17) == 0) write32(cookie + 17, 512000); // avgBitRate
|
||||
// if (read32(cookie + 21) == 0) write32(cookie + 21, 44100); // sampleRate
|
||||
// }
|
||||
|
||||
/**
|
||||
* @brief Handles the alac box.
|
||||
* @param box MP4 box.
|
||||
*/
|
||||
void onAlac(const MP4Parser::Box& box) {
|
||||
LOGI("onAlac: %s, size: %zu bytes", box.type, box.data_size);
|
||||
audio_config.codec = Codec::ALAC;
|
||||
|
||||
// only alac box in alac contains magic cookie
|
||||
MP4Parser::Box alac;
|
||||
if (parser.findBox("alac", box.data, box.data_size, alac)) {
|
||||
// fixALACMagicCookie((uint8_t*)alac.data, alac.data_size);
|
||||
audio_config.alacMagicCookie.resize(alac.data_size - 4);
|
||||
std::memcpy(audio_config.alacMagicCookie.data(), alac.data + 4,
|
||||
alac.data_size - 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Handles the stsz (Sample Size) box.
|
||||
* @param box MP4 box.
|
||||
*/
|
||||
void onStsz(MP4Parser::Box& box) {
|
||||
MP4Parser::defaultCallback(box,0);
|
||||
LOGI("onStsz #%u: %s, size: %u of %u bytes", (unsigned) box.seq, box.type, (unsigned) box.available, (unsigned) box.data_size);
|
||||
if (stsz_processed) return;
|
||||
BaseBuffer<stsz_sample_size_t>& sampleSizes =
|
||||
sampleExtractor.getSampleSizesBuffer();
|
||||
|
||||
buffer.resize(box.available);
|
||||
size_t written = buffer.writeArray(box.data, box.available);
|
||||
assert(written = box.available);
|
||||
|
||||
// get sample count and size from the box
|
||||
if (sample_count == 0 && buffer.available() > 12) {
|
||||
readU32Buffer(); // skip version + flags
|
||||
uint32_t sampleSize = readU32Buffer();
|
||||
uint32_t sampleCount = readU32Buffer();
|
||||
sample_count = sampleCount;
|
||||
stsz_offset = box.file_offset;
|
||||
|
||||
sampleSizes.resize(sample_count);
|
||||
if (sampleSize != 0) {
|
||||
sampleExtractor.setFixedSampleCount(sampleSize, sampleCount);
|
||||
}
|
||||
}
|
||||
|
||||
// incrementally process sampleSize
|
||||
int count = 0;
|
||||
while (buffer.available() >= 4) {
|
||||
stsz_sample_size_t sampleSize = readU32Buffer();
|
||||
assert(sampleSizes.write(sampleSize));
|
||||
count += 4;
|
||||
}
|
||||
// Remove processed data
|
||||
buffer.trim();
|
||||
|
||||
if (box.is_complete) {
|
||||
stsz_processed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// /**
|
||||
// * @brief Handles the stco (Chunk Offset) box.
|
||||
// * @param box MP4 box.
|
||||
// */
|
||||
// void onStco(MP4Parser::Box& box) {
|
||||
// LOGI("onStco: %s, size: %zu bytes", box.type, box.data_size);
|
||||
// if (stco_processed) return;
|
||||
// BaseBuffer<uint32_t>& chunkOffsets =
|
||||
// sampleExtractor.getChunkOffsetsBuffer();
|
||||
|
||||
// buffer.resize(box.available);
|
||||
// buffer.writeArray(box.data, box.available);
|
||||
|
||||
// // get chunk_offsets_count from the box
|
||||
// if (chunk_offsets_count == 0 && buffer.available() > 12) {
|
||||
// chunk_offsets_count = readU32(buffer.data());
|
||||
// buffer.clearArray(4); // clear version + flags
|
||||
// }
|
||||
|
||||
// // incrementally process sampleSize
|
||||
// int j = 0;
|
||||
// for (j = 0; j < buffer.available(); j += 4) {
|
||||
// uint32_t sampleSize = readU32(buffer.data() + j);
|
||||
// chunkOffsets.write(sampleSize);
|
||||
// }
|
||||
// buffer.clearArray(j);
|
||||
|
||||
// if (box.is_complete) {
|
||||
// stco_processed = true;
|
||||
// }
|
||||
// }
|
||||
|
||||
void printHexDump(const MP4Parser::Box& box) {
|
||||
const uint8_t* data = box.data;
|
||||
size_t len = box.data_size;
|
||||
LOGI("===========================");
|
||||
for (size_t i = 0; i < len; i += 16) {
|
||||
char hex[49] = {0};
|
||||
char ascii[17] = {0};
|
||||
for (size_t j = 0; j < 16 && i + j < len; ++j) {
|
||||
sprintf(hex + j * 3, "%02X ", data[i + j]);
|
||||
ascii[j] = (data[i + j] >= 32 && data[i + j] < 127) ? data[i + j] : '.';
|
||||
}
|
||||
ascii[16] = 0;
|
||||
LOGI("%04zx: %-48s |%s|", i, hex, ascii);
|
||||
}
|
||||
LOGI("===========================");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,147 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/CoreAudio/AudioPlayer.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
#include "M4AAudioFileDemuxer.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief A buffer that reads sample sizes from an M4A file using the
|
||||
* M4AAudioFileDemuxer. No RAM is used to store the sample sizes as they are
|
||||
* read directly from the file.
|
||||
*
|
||||
* This buffer is designed to be used with an AudioPlayer instance for audio
|
||||
* sources which are file based only. It provides a read interface that fetches
|
||||
* the next sample size directly from the file via the demuxer, avoiding the
|
||||
* need to store the entire sample size table in RAM.
|
||||
*
|
||||
* @note This buffer is can not be used for streaming sources; it is intended for
|
||||
* the use with file-based playback.
|
||||
* @note This class registers a setOnStreamChangeCallback() with the player
|
||||
*/
|
||||
class M4AFileSampleSizeBuffer : public BaseBuffer<stsz_sample_size_t> {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor.
|
||||
* @param player Reference to the AudioPlayer instance.
|
||||
* @param fileExt File extension to recognize as M4A (default ".m4a").
|
||||
*/
|
||||
M4AFileSampleSizeBuffer(AudioPlayer& player, ContainerM4A& container,
|
||||
const char* fileExt = ".m4a") {
|
||||
this->p_player = &player;
|
||||
this->p_container = &container;
|
||||
player.setReference(this);
|
||||
player.setOnStreamChangeCallback(onFileChange);
|
||||
addFileExtension(fileExt);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the next sample size from the demuxer.
|
||||
* @param data Reference to store the sample size.
|
||||
* @return true if successful, false otherwise.
|
||||
*/
|
||||
bool read(stsz_sample_size_t& data) override {
|
||||
if (p_file != nullptr && demuxer.getMdatOffset() == 0) {
|
||||
uint32_t offset = p_container->getDemuxer().getStszFileOffset();
|
||||
uint32_t s_count = p_container->getDemuxer().getSampleCount();
|
||||
demuxer.beginSampleSizeAccess(p_file, s_count, offset);
|
||||
}
|
||||
size_t pos = p_file->position();
|
||||
data = demuxer.getNextSampleSize();
|
||||
p_file->seek(pos); // reset position after reading
|
||||
return demuxer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines how many samples are buffered with each file read.
|
||||
* @param size Number of bytes to buffer (will be divided by 4 for sample
|
||||
* count).
|
||||
*/
|
||||
void setReadBufferSize(size_t size) { demuxer.setSamplesBufferSize(size); }
|
||||
|
||||
/**
|
||||
* @brief Add a file extension to recognize as relevant for this buffer.
|
||||
* @param fileExt File extension string (e.g., ".m4a").
|
||||
*/
|
||||
void addFileExtension(const char* fileExt) {
|
||||
fileExtensions.push_back(fileExt);
|
||||
}
|
||||
|
||||
void reset() {}
|
||||
|
||||
/**
|
||||
* @brief Write is ignored; sample sizes are read directly from the file.
|
||||
* @param data Sample size value (ignored).
|
||||
* @return Always true. This buffer is read-only.
|
||||
*/
|
||||
bool write(stsz_sample_size_t data) { return true; }
|
||||
|
||||
/**
|
||||
* @brief Peek is not supported for this buffer.
|
||||
* @param result Reference to store the peeked value (unused).
|
||||
* @return Always false. Peeking is not supported.
|
||||
*/
|
||||
bool peek(stsz_sample_size_t& result) { return false; }
|
||||
|
||||
/**
|
||||
* @brief Returns the number of samples already read (i.e., the current sample
|
||||
* index).
|
||||
* @return Number of samples read so far.
|
||||
*/
|
||||
int available() { return demuxer.sampleIndex(); };
|
||||
|
||||
/**
|
||||
* @brief Returns the available space for writing.
|
||||
* @return Always 0, as this buffer does not support writing.
|
||||
*/
|
||||
int availableForWrite() override { return 0; } ///< No write buffer available
|
||||
|
||||
/**
|
||||
* @brief Returns the total number of samples in the file.
|
||||
* @return Total sample count.
|
||||
*/
|
||||
size_t size() override { return demuxer.size(); }
|
||||
|
||||
/**
|
||||
* @brief Returns a pointer to the buffer's physical address.
|
||||
* @return Always nullptr, as this buffer does not have a physical address.
|
||||
*/
|
||||
stsz_sample_size_t* address() override { return nullptr; }
|
||||
|
||||
protected:
|
||||
AudioPlayer* p_player = nullptr; ///< Pointer to the AudioPlayer instance
|
||||
File* p_file = nullptr; ///< Pointer to the currently open file
|
||||
M4AAudioFileDemuxer demuxer; ///< Demuxer used to extract sample sizes
|
||||
Vector<const char*> fileExtensions; ///< List of recognized file extensions
|
||||
ContainerM4A* p_container = nullptr;
|
||||
|
||||
/**
|
||||
* @brief Checks if the given file name matches any of the registered
|
||||
* extensions.
|
||||
* @param name File name to check.
|
||||
* @return true if the file is relevant, false otherwise.
|
||||
*/
|
||||
bool isRelevantFile(const char* name) {
|
||||
for (const auto& ext : fileExtensions) {
|
||||
if (StrView(name).endsWith(name)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Static callback for file change events.
|
||||
* Updates the file pointer and re-parses the file if relevant.
|
||||
* @param streamPtr Pointer to the new file stream.
|
||||
* @param reference Pointer to the M4AFileSampleSizeBuffer instance.
|
||||
*/
|
||||
static void onFileChange(Stream* streamPtr, void* reference) {
|
||||
M4AFileSampleSizeBuffer& self =
|
||||
*static_cast<M4AFileSampleSizeBuffer*>(reference);
|
||||
self.p_file = (File*)streamPtr;
|
||||
LOGI("===> M4AFileSampleSizeBuffer onFileChange: %s",
|
||||
self.p_file ? self.p_file->name() : "nullptr");
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
64
libraries/audio-tools/src/AudioTools/AudioCodecs/MP3Parser.h
Normal file
64
libraries/audio-tools/src/AudioTools/AudioCodecs/MP3Parser.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/HeaderParserMP3.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Parses MP3 frames, extracts audio info, and outputs complete frames.
|
||||
* The frame duration is determined e.g. for RTSP streaming.
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class MP3ParserEncoder : public AudioEncoder {
|
||||
public:
|
||||
MP3ParserEncoder(int bufferSize = 1024 * 2) { buffer_size = bufferSize; }
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
mp3.resize(buffer_size); // 10KB buffer
|
||||
mp3.reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
TRACEI();
|
||||
mp3.flush();
|
||||
mp3.reset();
|
||||
mp3.resize(0);
|
||||
}
|
||||
|
||||
size_t write(const uint8_t* data, size_t len) override {
|
||||
LOGI("write: %d", (int)len);
|
||||
return mp3.write(data, len);
|
||||
}
|
||||
|
||||
void setOutput(Print& out_stream) override {
|
||||
TRACEI();
|
||||
AudioEncoder::setOutput(out_stream);
|
||||
mp3.setOutput(out_stream);
|
||||
}
|
||||
|
||||
AudioInfo audioInfo() override {
|
||||
AudioInfo info;
|
||||
info.sample_rate = mp3.getSampleRate();
|
||||
info.channels = mp3.getChannels();
|
||||
info.bits_per_sample = 16;
|
||||
return info;
|
||||
}
|
||||
|
||||
uint32_t frameDurationUs() override { return mp3.getTimePerFrameMs() * 1000; }
|
||||
|
||||
uint16_t samplesPerFrame() override { return mp3.getSamplesPerFrame(); }
|
||||
|
||||
operator bool() override { return true; }
|
||||
|
||||
virtual const char* mime() override { return "audio/mpeg"; }
|
||||
|
||||
protected:
|
||||
HeaderParserMP3 mp3;
|
||||
int buffer_size = 0;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
603
libraries/audio-tools/src/AudioTools/AudioCodecs/MP4Parser.h
Normal file
603
libraries/audio-tools/src/AudioTools/AudioCodecs/MP4Parser.h
Normal file
@@ -0,0 +1,603 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief MP4Parser is a class that parses MP4 container files and extracts
|
||||
* boxes (atoms). It provides a callback mechanism to process each box as it is
|
||||
* parsed. You can define specific callbacks for individual box types or use a
|
||||
* generic callback for the undefined boxes: By default it just prints the box
|
||||
* information to Serial. If a container box contains data, it will be processed
|
||||
* recursively and if it contains data itself, it might be reported in a second
|
||||
* callback call.
|
||||
* @note This parser expect the mdat box to be the last box in the file. This
|
||||
* can be achieve with the following ffmpeg commands:
|
||||
* - ffmpeg -i ../sine.wav -c:a alac -movflags +faststart alac.m4a
|
||||
* - ffmpeg -i ../sine.wav -c:a aac -movflags +faststart aac.m4a
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
class MP4Parser {
|
||||
public:
|
||||
/**
|
||||
* @brief Represents an individual box in the MP4 file.
|
||||
*/
|
||||
struct Box {
|
||||
friend class MP4Parser; ///< Allow MP4Parser to access private members
|
||||
friend class MP4ParserExt; ///< Allow MP4ParserExt to access private
|
||||
///< members
|
||||
size_t id = 0; ///< Unique box ID
|
||||
size_t seq = 0; ///< Sequence number for the box per id
|
||||
char type[5]; ///< 4-character box type (null-terminated)
|
||||
const uint8_t* data =
|
||||
nullptr; ///< Pointer to box payload (not including header)
|
||||
size_t data_size = 0; ///< Size of payload (not including header)
|
||||
size_t size =
|
||||
0; ///< Size of payload including subboxes (not including header)
|
||||
int level = 0; ///< Nesting depth
|
||||
uint64_t file_offset = 0; ///< File offset where box starts
|
||||
int available = 0; ///< Number of bytes available as data
|
||||
bool is_complete = false; ///< True if the box data is complete
|
||||
bool is_incremental = false; ///< True if the box is being parsed incrementally
|
||||
bool is_container = false; ///< True if the box is a container
|
||||
};
|
||||
|
||||
using BoxCallback = std::function<void(Box&, void* ref)>;
|
||||
|
||||
/**
|
||||
* @brief Structure for type-specific callbacks.
|
||||
*/
|
||||
struct CallbackEntry {
|
||||
char type[5]; ///< 4-character box type
|
||||
BoxCallback cb; ///< Callback function
|
||||
bool callGeneric =
|
||||
true; ///< If true, also call the generic callback after this one
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Defines an optional reference. By default it is the parser itself.
|
||||
* @param ref Pointer to reference object.
|
||||
*/
|
||||
void setReference(void* ref) { this->ref = ref; }
|
||||
|
||||
/**
|
||||
* @brief Defines the generic callback for all boxes.
|
||||
* @param cb Callback function for all boxes.
|
||||
*/
|
||||
void setCallback(BoxCallback cb) { callback = cb; }
|
||||
|
||||
/**
|
||||
* @brief Defines a specific callback for a box type.
|
||||
* @param type 4-character box type (e.g. "moov", "mdat").
|
||||
* @param cb Callback function for this box type.
|
||||
* @param callGeneric If true, the generic callback will also be called after
|
||||
* the type-specific callback.
|
||||
*/
|
||||
void setCallback(const char* type, BoxCallback cb, bool callGeneric = true) {
|
||||
CallbackEntry entry;
|
||||
strncpy(entry.type, type, 4);
|
||||
entry.type[4] = '\0'; // Ensure null-termination
|
||||
entry.cb = cb;
|
||||
entry.callGeneric = callGeneric;
|
||||
callbacks.push_back(entry);
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Defines a specific buffer size.
|
||||
* @param size Buffer size in bytes.
|
||||
* @return true if the buffer was resized successfully.
|
||||
*/
|
||||
bool resize(size_t size) {
|
||||
buffer.resize(size);
|
||||
return buffer.size() == size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initializes the parser.
|
||||
* @return true on success.
|
||||
*/
|
||||
bool begin() {
|
||||
buffer.clear();
|
||||
if (buffer.size() == 0) buffer.resize(2 * 1024);
|
||||
parseOffset = 0;
|
||||
fileOffset = 0;
|
||||
levelStack.clear();
|
||||
box.is_complete = true; // Start with no open box
|
||||
box.data = nullptr;
|
||||
box.size = 0;
|
||||
box.level = 0;
|
||||
box.file_offset = 0;
|
||||
box.id = 0;
|
||||
box.is_incremental = false;
|
||||
box.is_complete = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provide the data to the parser (in chunks if needed).
|
||||
* @param data Pointer to input data.
|
||||
* @param len Length of input data.
|
||||
* @return Number of bytes written to the buffer.
|
||||
*/
|
||||
size_t write(const uint8_t* data, size_t len) {
|
||||
if (is_error) return len; // If an error occurred, skip writing
|
||||
size_t result = buffer.writeArray(data, len);
|
||||
parse();
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provide the data to the parser (in chunks if needed).
|
||||
* @param data Pointer to input data (char*).
|
||||
* @param len Length of input data.
|
||||
* @return Number of bytes written to the buffer.
|
||||
*/
|
||||
size_t write(const char* data, size_t len) {
|
||||
return write(reinterpret_cast<const uint8_t*>(data), len);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the available space for writing.
|
||||
* @return Number of bytes available for writing.
|
||||
*/
|
||||
int availableForWrite() { return buffer.availableForWrite(); }
|
||||
|
||||
/**
|
||||
* @brief Adds a box name that will be interpreted as a container.
|
||||
* @param name Name of the container box.
|
||||
* @param start Offset of child boxes (default 0).
|
||||
*/
|
||||
void addContainer(const char* name, int start = 0) {
|
||||
ContainerInfo info;
|
||||
info.name = name;
|
||||
info.start = start; // offset of child boxes
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Trigger separate parsing (and callbacks) on the indicated string.
|
||||
* @param str Pointer to the string data.
|
||||
* @param len Length of the string data.
|
||||
* @return Number of bytes parsed.
|
||||
*/
|
||||
int parseString(const uint8_t* str, int len, int fileOffset = 0,
|
||||
int level = 0) {
|
||||
char type[5];
|
||||
int idx = 0;
|
||||
Box box;
|
||||
while (true) {
|
||||
if (!isValidType((const char*)str + idx + 4)) {
|
||||
return idx;
|
||||
}
|
||||
size_t box_size = readU32(str + idx) - 8;
|
||||
box.data = str + 8 + idx;
|
||||
box.size = box_size;
|
||||
box.level = level;
|
||||
box.data_size = box.size;
|
||||
box.file_offset = fileOffset + idx;
|
||||
box.is_complete = true;
|
||||
box.is_incremental = false;
|
||||
strncpy(box.type, (char*)(str + idx + 4), 4);
|
||||
box.type[4] = '\0';
|
||||
idx += box.size;
|
||||
processCallback(box);
|
||||
if (idx >= len) break; // No more data to parse
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
/// find box in box
|
||||
bool findBox(const char* name, const uint8_t* data, size_t len, Box& result) {
|
||||
for (int j = 0; j < len - 4; j++) {
|
||||
if (!isValidType((const char*)data + j + 4)) {
|
||||
continue; // Skip invalid types
|
||||
}
|
||||
size_t box_size = readU32(data + j) - 8;
|
||||
if (box_size < 8) continue; // Invalid box size
|
||||
Box box;
|
||||
box.data = data + j + 8;
|
||||
box.size = box_size;
|
||||
box.data_size = box.size;
|
||||
strncpy(box.type, (char*)(data + j + 4), 4);
|
||||
box.type[4] = '\0';
|
||||
if (StrView(box.type) == name) {
|
||||
result = box;
|
||||
return true; // Found the box
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Default callback that prints box information to Serial.
|
||||
* @param box The box being processed.
|
||||
* @param ref Optional reference pointer.
|
||||
*/
|
||||
static void defaultCallback(const Box& box, void* ref) {
|
||||
char space[box.level * 2 + 1];
|
||||
char str_buffer[200];
|
||||
memset(space, ' ', box.level * 2);
|
||||
space[box.level * 2] = '\0'; // Null-terminate the string
|
||||
snprintf(str_buffer, sizeof(str_buffer),
|
||||
"%s- #%u %u) %s, Offset: %u, Size: %u, Data Size: %u, Available: %u", space,
|
||||
(unsigned)box.id, (unsigned) box.seq, box.type, (unsigned)box.file_offset,
|
||||
(unsigned)box.size, (unsigned) box.data_size, (unsigned) box.available);
|
||||
#ifdef ARDUINO
|
||||
Serial.println(str_buffer);
|
||||
#else
|
||||
printf("%s\n", str_buffer);
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
BoxCallback callback = defaultCallback; ///< Generic callback for all boxes
|
||||
Vector<CallbackEntry> callbacks; ///< List of type-specific callbacks
|
||||
SingleBuffer<uint8_t> buffer; ///< Buffer for incoming data
|
||||
Vector<size_t> levelStack; ///< Stack for container box levels
|
||||
size_t parseOffset = 0; ///< Current parse offset in buffer
|
||||
uint64_t fileOffset = 0; ///< Current file offset
|
||||
void* ref = this; ///< Reference pointer for callbacks
|
||||
Box box; ///< Current box being processed
|
||||
bool is_error = false; ///< True if an error occurred
|
||||
|
||||
/**
|
||||
* @brief Structure for container box information.
|
||||
*/
|
||||
struct ContainerInfo {
|
||||
const char* name = nullptr; ///< Name of the container box
|
||||
int start = 0; ///< Offset of child boxes
|
||||
};
|
||||
Vector<ContainerInfo> containers; ///< List of container box info
|
||||
protected:
|
||||
bool box_in_progress =
|
||||
false; ///< True if currently parsing a box incrementally
|
||||
size_t box_bytes_received = 0; ///< Bytes received so far for the current box
|
||||
size_t box_bytes_expected = 0; ///< Total expected bytes for the current box
|
||||
char box_type[5] = {0}; ///< Current box type
|
||||
int box_level = 0; ///< Current box level (nesting)
|
||||
int box_seq = 0;
|
||||
size_t incremental_offset = 0;
|
||||
|
||||
/**
|
||||
* @brief Main parsing loop. Handles incremental and complete boxes.
|
||||
*/
|
||||
void parse() {
|
||||
while (true) {
|
||||
size_t bufferSize = buffer.available();
|
||||
if (!box_in_progress) {
|
||||
if (!tryStartNewBox(bufferSize)) break;
|
||||
} else {
|
||||
if (!continueIncrementalBox()) break;
|
||||
}
|
||||
popLevels();
|
||||
}
|
||||
finalizeParse();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Try to start parsing a new box. Returns false if not enough data.
|
||||
* @param bufferSize Number of bytes available in the buffer.
|
||||
* @return True if a box was started, false otherwise.
|
||||
*/
|
||||
bool tryStartNewBox(size_t bufferSize) {
|
||||
if (parseOffset + 8 > bufferSize) return false;
|
||||
char type[5];
|
||||
box_seq = 0;
|
||||
|
||||
// get basic box information
|
||||
parseOffset = checkParseOffset();
|
||||
const uint8_t* p = buffer.data() + parseOffset;
|
||||
uint32_t size32 = readU32(p);
|
||||
strncpy(type, (char*)(p + 4), 4);
|
||||
type[4] = '\0';
|
||||
uint64_t boxSize = size32;
|
||||
size_t headerSize = 8;
|
||||
|
||||
if (boxSize < headerSize) return false;
|
||||
|
||||
int level = static_cast<int>(levelStack.size());
|
||||
bool is_container = isContainerBox(type);
|
||||
|
||||
if (is_container) {
|
||||
handleContainerBox(type, boxSize, level);
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t payload_size = static_cast<size_t>(boxSize - headerSize);
|
||||
if (parseOffset + boxSize <= bufferSize) {
|
||||
// start with full buffer!
|
||||
handleCompleteBox(type, p, headerSize, payload_size, level);
|
||||
parseOffset += boxSize;
|
||||
} else {
|
||||
startIncrementalBox(type, p, headerSize, payload_size, level, bufferSize);
|
||||
return false; // Wait for more data
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Handles a container box (box with children).
|
||||
* @param type Box type string.
|
||||
* @param boxSize Size of the box.
|
||||
* @param level Nesting level of the box.
|
||||
*/
|
||||
void handleContainerBox(const char* type, uint64_t boxSize, int level) {
|
||||
strcpy(box.type, type);
|
||||
box.id = ++this->box.id;
|
||||
box.data = nullptr;
|
||||
box.size = static_cast<size_t>(boxSize - 8);
|
||||
box.data_size = 0;
|
||||
box.available = 0;
|
||||
box.level = level;
|
||||
box.file_offset = fileOffset + parseOffset;
|
||||
box.is_incremental = false;
|
||||
box.is_complete = true;
|
||||
box.is_container = true;
|
||||
box.seq = 0;
|
||||
|
||||
processCallback(box);
|
||||
|
||||
uint64_t absBoxOffset = fileOffset + parseOffset;
|
||||
levelStack.push_back(absBoxOffset + boxSize);
|
||||
parseOffset += 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Handles a complete (non-incremental) box.
|
||||
* @param type Box type string.
|
||||
* @param p Pointer to the start of the box in the buffer.
|
||||
* @param headerSize Size of the box header.
|
||||
* @param payload_size Size of the box payload.
|
||||
* @param level Nesting level of the box.
|
||||
*/
|
||||
void handleCompleteBox(const char* type, const uint8_t* p, size_t headerSize,
|
||||
size_t payload_size, int level) {
|
||||
strcpy(box.type, type);
|
||||
box.id = ++this->box.id;
|
||||
box.data = p + headerSize;
|
||||
box.size = payload_size;
|
||||
box.data_size = payload_size;
|
||||
box.level = level;
|
||||
box.file_offset = fileOffset + parseOffset;
|
||||
box.is_complete = true;
|
||||
box.is_container = false;
|
||||
box.available = payload_size;
|
||||
box.is_incremental = false;
|
||||
box.seq = 0;
|
||||
|
||||
processCallback(box);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Starts parsing a box incrementally.
|
||||
* @param type Box type string.
|
||||
* @param p Pointer to the start of the box in the buffer.
|
||||
* @param headerSize Size of the box header.
|
||||
* @param payload_size Size of the box payload.
|
||||
* @param level Nesting level of the box.
|
||||
* @param bufferSize Number of bytes available in the buffer.
|
||||
*/
|
||||
void startIncrementalBox(const char* type, const uint8_t* p,
|
||||
size_t headerSize, size_t payload_size, int level,
|
||||
size_t bufferSize) {
|
||||
box_in_progress = true;
|
||||
box_bytes_received = 0;
|
||||
box_bytes_expected = payload_size;
|
||||
strncpy(box_type, type, 5);
|
||||
box_level = level;
|
||||
box_seq = 0;
|
||||
|
||||
size_t available_payload = bufferSize - parseOffset - headerSize;
|
||||
incremental_offset = fileOffset + parseOffset;
|
||||
if (available_payload > 0) {
|
||||
box_bytes_received += available_payload;
|
||||
strcpy(box.type, box_type);
|
||||
box.id = ++this->box.id;
|
||||
box.data = p + headerSize;
|
||||
box.size = box_bytes_expected;
|
||||
box.data_size = box_bytes_expected;
|
||||
box.available = available_payload;
|
||||
box.level = box_level;
|
||||
box.file_offset = incremental_offset;
|
||||
box.seq = 0;
|
||||
box.is_incremental = true;
|
||||
box.is_complete = false;
|
||||
box.is_container = false;
|
||||
processCallback(box);
|
||||
}
|
||||
// fileOffset += (bufferSize - buffer.available());
|
||||
fileOffset += (parseOffset + payload_size + 8);
|
||||
incremental_offset += available_payload;
|
||||
buffer.clear();
|
||||
parseOffset = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Continue filling an incremental box. Returns false if not enough
|
||||
* data.
|
||||
* @return False if more data was processed, true otherwise.
|
||||
*/
|
||||
bool continueIncrementalBox() {
|
||||
size_t to_read = std::min((size_t)box_bytes_expected - box_bytes_received,
|
||||
(size_t)buffer.available());
|
||||
if (to_read == 0) return true;
|
||||
strcpy(box.type, box_type);
|
||||
box.id = ++this->box.id;
|
||||
box.data = buffer.data();
|
||||
box.size = box_bytes_expected;
|
||||
box.data_size = box_bytes_expected;
|
||||
box.available = to_read;
|
||||
box.level = box_level;
|
||||
box.file_offset = incremental_offset;
|
||||
box.is_complete = (box_bytes_received + to_read == box_bytes_expected);
|
||||
box.is_container = false;
|
||||
box.is_incremental = true;
|
||||
box.seq = ++box_seq;
|
||||
processCallback(box);
|
||||
box_bytes_received += to_read;
|
||||
// fileOffset += to_read;
|
||||
buffer.clearArray(to_read);
|
||||
incremental_offset += to_read;
|
||||
|
||||
if (box_bytes_received >= box_bytes_expected) {
|
||||
box_in_progress = false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Finalizes parsing, updating file offset and clearing buffer.
|
||||
*/
|
||||
void finalizeParse() {
|
||||
if (parseOffset > 0) {
|
||||
fileOffset += parseOffset;
|
||||
buffer.clearArray(parseOffset);
|
||||
parseOffset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the current file offset (absolute position in file).
|
||||
* @return Current file offset.
|
||||
*/
|
||||
uint64_t currentFileOffset() { return fileOffset + parseOffset; }
|
||||
|
||||
/**
|
||||
* @brief Reads a 32-bit big-endian unsigned integer from a buffer.
|
||||
* @param p Pointer to buffer.
|
||||
* @return 32-bit unsigned integer.
|
||||
*/
|
||||
static uint32_t readU32(const uint8_t* p) {
|
||||
return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3];
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads a 64-bit big-endian unsigned integer from a buffer.
|
||||
* @param p Pointer to buffer.
|
||||
* @return 64-bit unsigned integer.
|
||||
*/
|
||||
static uint64_t readU64(const uint8_t* p) {
|
||||
return ((uint64_t)readU32(p) << 32) | readU32(p + 4);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Pops levels from the stack if we've passed their bounds.
|
||||
*/
|
||||
void popLevels() {
|
||||
// Pop levels if we've passed their bounds (absolute file offset)
|
||||
while (!levelStack.empty() &&
|
||||
(fileOffset + parseOffset) >= levelStack.back()) {
|
||||
levelStack.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Processes the callback for a box.
|
||||
* Calls the type-specific callback if present, and the generic callback if
|
||||
* allowed.
|
||||
* @param box The box being processed.
|
||||
*/
|
||||
void processCallback(Box& box) {
|
||||
bool is_called = false;
|
||||
bool call_generic = true;
|
||||
for (const auto& entry : callbacks) {
|
||||
if (strncmp(entry.type, box.type, 4) == 0) {
|
||||
entry.cb(box, ref);
|
||||
is_called = true;
|
||||
if (!entry.callGeneric) call_generic = false;
|
||||
}
|
||||
}
|
||||
/// call generic callback if allowed
|
||||
if ((!is_called || call_generic) && callback) callback(box, ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if a box type is a container box.
|
||||
* @param type Box type string.
|
||||
* @return true if container box, false otherwise.
|
||||
*/
|
||||
bool isContainerBox(const char* type) {
|
||||
// fill with default values if nothing has been defined
|
||||
if (containers.empty()) {
|
||||
// pure containers
|
||||
static const char* containers_str[] = {
|
||||
"moov", "trak", "mdia", "minf", "stbl", "edts", "dinf", "udta",
|
||||
"ilst", "moof", "traf", "mfra", "tref", "iprp", "sinf", "schi"};
|
||||
for (const char* c : containers_str) {
|
||||
ContainerInfo info;
|
||||
info.name = c;
|
||||
info.start = 0;
|
||||
containers.push_back(info);
|
||||
}
|
||||
// container with data
|
||||
ContainerInfo info;
|
||||
info.name = "meta";
|
||||
info.start = 4; // 4 bytes: version (1 byte) + flags (3 bytes)
|
||||
containers.push_back(info);
|
||||
}
|
||||
// find the container by name
|
||||
for (auto& cont : containers) {
|
||||
if (StrView(type) == cont.name) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets the start offset for a subcontainer.
|
||||
* @param type Box type string.
|
||||
* @return Offset of the subcontainer.
|
||||
*/
|
||||
int getContainerDataLength(const char* type) {
|
||||
for (auto& cont : containers) {
|
||||
if (StrView(type) == cont.name) return cont.start;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if a type string is a valid 4-character box type.
|
||||
* @param type Pointer to type string.
|
||||
* @param offset Offset in the string.
|
||||
* @return true if valid, false otherwise.
|
||||
*/
|
||||
bool isValidType(const char* type, int offset = 0) const {
|
||||
// Check if the type is a valid 4-character string
|
||||
return (type != nullptr && isalnum(type[offset]) &&
|
||||
isalnum(type[offset + 1]) && isalnum(type[offset + 2]) &&
|
||||
isalnum(type[offset + 3]));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks and adjusts the parse offset for valid box types.
|
||||
* @return Adjusted parse offset.
|
||||
*/
|
||||
size_t checkParseOffset() {
|
||||
size_t current = parseOffset;
|
||||
const char* type = (char*)(buffer.data() + parseOffset + 4);
|
||||
for (int j = 0; j < buffer.available() - parseOffset - 4; j += 4) {
|
||||
if (isValidType(type, j)) {
|
||||
if (j != 0) {
|
||||
// report the data under the last valid box
|
||||
box.size = 0;
|
||||
box.data_size = j;
|
||||
box.level = static_cast<int>(levelStack.size()) + 1;
|
||||
box.data = buffer.data() + parseOffset;
|
||||
processCallback(box);
|
||||
}
|
||||
|
||||
return j + parseOffset;
|
||||
}
|
||||
}
|
||||
return parseOffset;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
378
libraries/audio-tools/src/AudioTools/AudioCodecs/MultiDecoder.h
Normal file
378
libraries/audio-tools/src/AudioTools/AudioCodecs/MultiDecoder.h
Normal file
@@ -0,0 +1,378 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
#include "AudioTools/Communication/HTTP/AbstractURLStream.h"
|
||||
#include "AudioTools/CoreAudio/AudioMetaData/MimeDetector.h"
|
||||
#include "AudioTools/AudioCodecs/StreamingDecoder.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Manage multiple AudioDecoders with automatic format detection
|
||||
*
|
||||
* This class automatically detects the audio format from incoming data and
|
||||
* selects the appropriate decoder from a collection of registered decoders.
|
||||
* The format detection is performed using the MimeDetector on the first chunk
|
||||
* of data written to the decoder.
|
||||
*
|
||||
* Key features:
|
||||
* - Automatic format detection using MimeDetector
|
||||
* - Support for multiple decoder registration
|
||||
* - Custom MIME type detection logic support
|
||||
* - External MIME source integration (e.g., HTTP headers)
|
||||
* - Lazy decoder initialization for memory efficiency
|
||||
* - Seamless integration with existing AudioDecoder architecture
|
||||
*
|
||||
* The actual decoder is only opened when it has been selected, which allows
|
||||
* for memory-efficient operation when dealing with multiple possible formats.
|
||||
* The relevant decoder is determined dynamically at the first write() call
|
||||
* based on the determined MIME type.
|
||||
*
|
||||
* @note This class uses a write-based interface, unlike StreamingDecoder
|
||||
* which uses a pull-based approach. For streaming scenarios with direct
|
||||
* access to input/output streams, consider using MultiStreamingDecoder.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MultiDecoder : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Default constructor
|
||||
*/
|
||||
MultiDecoder() = default;
|
||||
|
||||
/**
|
||||
* @brief Constructor with external MIME source
|
||||
*
|
||||
* Creates a MultiDecoder that uses an external source for MIME type
|
||||
* determination, such as HTTP Content-Type headers. This can be more
|
||||
* efficient than automatic detection as it avoids analyzing data content.
|
||||
*
|
||||
* @param mimeSource Reference to a MimeSource that provides MIME type information
|
||||
*/
|
||||
MultiDecoder(MimeSource& mimeSource) { setMimeSource(mimeSource); }
|
||||
|
||||
#ifdef USE_EXPERIMENTAL
|
||||
/**
|
||||
* @brief Destructor
|
||||
*
|
||||
* Cleans up any internally created DecoderAdapter instances.
|
||||
*/
|
||||
~MultiDecoder() {
|
||||
// Clean up any adapters we created
|
||||
for (auto* adapter : adapters) {
|
||||
delete adapter;
|
||||
}
|
||||
adapters.clear();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Starts the processing and enables automatic MIME type determination
|
||||
*
|
||||
* Initializes the MIME detector and prepares the MultiDecoder for format
|
||||
* detection. This method must be called before any write() operations.
|
||||
*
|
||||
* @return true if initialization was successful, false if no output is defined
|
||||
*/
|
||||
bool begin() override {
|
||||
mime_detector.begin();
|
||||
is_first = true;
|
||||
if (p_print == nullptr) {
|
||||
LOGE("No output defined");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Releases resources and closes the active decoder
|
||||
*
|
||||
* Stops the currently active decoder and resets the MultiDecoder state
|
||||
* for potential reuse. After calling end(), begin() must be called again
|
||||
* before the decoder can process new data.
|
||||
*/
|
||||
void end() override {
|
||||
if (actual_decoder.decoder != nullptr && actual_decoder.is_open) {
|
||||
actual_decoder.decoder->end();
|
||||
}
|
||||
actual_decoder.is_open = false;
|
||||
actual_decoder.decoder = nullptr;
|
||||
actual_decoder.mime = nullptr;
|
||||
is_first = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Adds a decoder that will be selected by its MIME type
|
||||
*
|
||||
* Registers an AudioDecoder that will be automatically selected when
|
||||
* the corresponding MIME type is detected in the input data.
|
||||
*
|
||||
* @param decoder The AudioDecoder to register
|
||||
* @param mime The MIME type string to associate with this decoder
|
||||
*/
|
||||
void addDecoder(AudioDecoder& decoder, const char* mime) {
|
||||
DecoderInfo info{mime, &decoder};
|
||||
decoder.addNotifyAudioChange(*this);
|
||||
decoders.push_back(info);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Adds a decoder with custom MIME detection logic
|
||||
*
|
||||
* Registers an AudioDecoder with a specific MIME type and provides custom
|
||||
* logic for detecting that MIME type from raw data. This allows for
|
||||
* specialized format detection beyond the standard MimeDetector capabilities.
|
||||
*
|
||||
* @param decoder The AudioDecoder to register
|
||||
* @param mime The MIME type string to associate with this decoder
|
||||
* @param check Custom function that analyzes data to detect this MIME type.
|
||||
* Should return true if the data matches this format.
|
||||
*/
|
||||
void addDecoder(AudioDecoder& decoder, const char* mime,
|
||||
bool (*check)(uint8_t* data, size_t len)) {
|
||||
addDecoder(decoder, mime);
|
||||
mime_detector.setCheck(mime, check);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets the output stream for decoded audio data
|
||||
*
|
||||
* Defines where the decoded PCM audio data will be written to.
|
||||
* This output will be automatically configured for the selected decoder.
|
||||
*
|
||||
* @param out_stream The Print stream to write decoded audio data to
|
||||
*/
|
||||
void setOutput(Print& out_stream) override {
|
||||
p_print = &out_stream;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets an external MIME source for format detection
|
||||
*
|
||||
* Provides an alternative to automatic MIME detection by allowing an external
|
||||
* source to provide the MIME type information. This is particularly useful
|
||||
* when the MIME type is available from HTTP headers or other metadata sources.
|
||||
*
|
||||
* When a MIME source is set, it takes precedence over automatic detection,
|
||||
* making the decoder selection process more efficient.
|
||||
*
|
||||
* @param mimeSource Reference to a MimeSource that provides MIME type information
|
||||
*
|
||||
* @note The MimeSource object must remain valid for the lifetime of this
|
||||
* MultiDecoder instance, as only a reference is stored.
|
||||
*/
|
||||
void setMimeSource(MimeSource& mimeSource) { p_mime_source = &mimeSource; }
|
||||
|
||||
/**
|
||||
* @brief Selects the actual decoder by MIME type
|
||||
*
|
||||
* Searches through registered decoders to find one that matches the
|
||||
* specified MIME type, then initializes it for use. This method is
|
||||
* usually called automatically from the determined MIME type during
|
||||
* the first write() operation.
|
||||
*
|
||||
* @param mime The MIME type string to match against registered decoders
|
||||
* @return true if a matching decoder was found and initialized, false otherwise
|
||||
*/
|
||||
bool selectDecoder(const char* mime) {
|
||||
bool result = false;
|
||||
if (mime == nullptr) return false;
|
||||
// do nothing if no change
|
||||
if (StrView(mime).equals(actual_decoder.mime)) {
|
||||
is_first = false;
|
||||
return true;
|
||||
}
|
||||
// close actual decoder
|
||||
if (actual_decoder.decoder != this) end();
|
||||
|
||||
// find the corresponding decoder
|
||||
selected_mime = nullptr;
|
||||
for (int j = 0; j < decoders.size(); j++) {
|
||||
DecoderInfo info = decoders[j];
|
||||
if (StrView(info.mime).equals(mime)) {
|
||||
LOGI("Using decoder for %s (%s)", info.mime, mime);
|
||||
actual_decoder = info;
|
||||
// define output if it has not been defined
|
||||
if (p_print != nullptr && actual_decoder.decoder != this
|
||||
&& actual_decoder.decoder->getOutput() == nullptr) {
|
||||
actual_decoder.decoder->setOutput(*p_print);
|
||||
}
|
||||
if (!*actual_decoder.decoder) {
|
||||
actual_decoder.decoder->begin();
|
||||
LOGI("Decoder %s started", actual_decoder.mime);
|
||||
}
|
||||
result = true;
|
||||
selected_mime = mime;
|
||||
break;
|
||||
}
|
||||
}
|
||||
is_first = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the MIME type that was detected and selected
|
||||
*
|
||||
* @return The MIME type string that was detected and used to select
|
||||
* the current decoder, or nullptr if no decoder has been selected
|
||||
*/
|
||||
const char* selectedMime() { return selected_mime; }
|
||||
|
||||
/**
|
||||
* @brief Writes encoded audio data to be decoded
|
||||
*
|
||||
* On the first call, this method performs MIME type detection to select
|
||||
* the appropriate decoder. Subsequent calls delegate to the selected
|
||||
* decoder's write() method to process the audio data.
|
||||
*
|
||||
* The MIME detection process uses either an external MIME source (if set)
|
||||
* or analyzes the provided data to determine the audio format.
|
||||
*
|
||||
* @param data Buffer containing encoded audio data
|
||||
* @param len Number of bytes to write
|
||||
* @return Number of bytes actually written to the selected decoder
|
||||
*/
|
||||
size_t write(const uint8_t* data, size_t len) override {
|
||||
if (is_first) {
|
||||
const char* mime = nullptr;
|
||||
if (p_mime_source != nullptr) {
|
||||
// get content type from http header
|
||||
mime = p_mime_source->mime();
|
||||
if (mime) LOGI("mime from http request: %s", mime);
|
||||
}
|
||||
if (mime == nullptr) {
|
||||
// use the mime detector
|
||||
mime_detector.write((uint8_t*)data, len);
|
||||
mime = mime_detector.mime();
|
||||
if (mime) LOGI("mime from mime_detector: %s", mime);
|
||||
}
|
||||
if (mime != nullptr) {
|
||||
// select the decoder based on the detemined mime type
|
||||
if (!selectDecoder(mime)) {
|
||||
LOGE("The decoder could not be found for %s", mime);
|
||||
actual_decoder.decoder = &nop;
|
||||
actual_decoder.is_open = true;
|
||||
}
|
||||
}
|
||||
is_first = false;
|
||||
}
|
||||
// check if we have a decoder
|
||||
if (actual_decoder.decoder == nullptr) return 0;
|
||||
// decode the data
|
||||
return actual_decoder.decoder->write(data, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the decoder is active and ready
|
||||
*
|
||||
* @return true if a decoder is selected and active, or if format detection
|
||||
* hasn't been performed yet; false if no suitable decoder was found
|
||||
*/
|
||||
virtual operator bool() override {
|
||||
if (actual_decoder.decoder == &nop) return false;
|
||||
return is_first || actual_decoder.is_open;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Sets codec-specific configuration data
|
||||
*
|
||||
* Forwards codec configuration data to the currently selected decoder.
|
||||
* This method can only be called after a decoder has been selected.
|
||||
*
|
||||
* @param data Buffer containing codec configuration data
|
||||
* @param len Length of the configuration data
|
||||
* @return true if the configuration was successfully applied, false otherwise
|
||||
*/
|
||||
bool setCodecConfig(const uint8_t* data, size_t len) override {
|
||||
if (actual_decoder.decoder == nullptr) {
|
||||
LOGE("No decoder defined, cannot set codec config");
|
||||
return false;
|
||||
}
|
||||
return actual_decoder.decoder->setCodecConfig(data, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides access to the internal MIME detector
|
||||
*
|
||||
* Returns a reference to the MimeDetector instance used for automatic
|
||||
* format detection. This allows direct access to configure custom MIME
|
||||
* detection logic or to query detection results.
|
||||
*
|
||||
* @return Reference to the internal MimeDetector instance
|
||||
*/
|
||||
MimeDetector& mimeDetector() { return mime_detector; }
|
||||
|
||||
#ifdef USE_EXPERIMENTAL
|
||||
|
||||
/**
|
||||
* @brief Adds a StreamingDecoder that will be selected by its MIME type
|
||||
*
|
||||
* Registers a StreamingDecoder that will be automatically selected when
|
||||
* the corresponding MIME type is detected in the input data. The
|
||||
* StreamingDecoder is wrapped in a DecoderAdapter to provide compatibility
|
||||
* with the write-based AudioDecoder interface used by MultiDecoder.
|
||||
*
|
||||
* @param decoder The StreamingDecoder to register
|
||||
* @param mime The MIME type string to associate with this decoder
|
||||
* @param bufferSize Buffer size for the adapter (default: 1024 bytes)
|
||||
*/
|
||||
void addDecoder(StreamingDecoder& decoder, const char* mime,
|
||||
int bufferSize = 1024) {
|
||||
if (mime != nullptr) {
|
||||
// Create a DecoderAdapter to wrap the StreamingDecoder
|
||||
decoder.addNotifyAudioChange(*this);
|
||||
auto adapter = new DecoderAdapter(decoder, bufferSize);
|
||||
adapters.push_back(adapter); // Store for cleanup
|
||||
|
||||
// Add the adapter as a regular AudioDecoder
|
||||
addDecoder(*adapter, mime);
|
||||
} else {
|
||||
LOGE("MIME type is nullptr - cannot add StreamingDecoder");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Information about a registered decoder
|
||||
*/
|
||||
struct DecoderInfo {
|
||||
const char* mime = nullptr; ///< MIME type for this decoder
|
||||
AudioDecoder* decoder = nullptr; ///< Pointer to the decoder instance
|
||||
bool is_open = false; ///< Whether the decoder is currently active
|
||||
|
||||
/**
|
||||
* @brief Default constructor
|
||||
*/
|
||||
DecoderInfo() = default;
|
||||
|
||||
/**
|
||||
* @brief Constructor with parameters
|
||||
*
|
||||
* @param mime MIME type string
|
||||
* @param decoder Pointer to AudioDecoder instance
|
||||
*/
|
||||
DecoderInfo(const char* mime, AudioDecoder* decoder) {
|
||||
this->mime = mime;
|
||||
this->decoder = decoder;
|
||||
}
|
||||
} actual_decoder; ///< Currently active decoder information
|
||||
|
||||
Vector<DecoderInfo> decoders{0}; ///< Collection of registered decoders
|
||||
#ifdef USE_EXPERIMENTAL
|
||||
Vector<DecoderAdapter*> adapters{0}; ///< Collection of internally created adapters
|
||||
#endif
|
||||
MimeDetector mime_detector; ///< MIME type detection engine
|
||||
CodecNOP nop; ///< No-operation codec for unsupported formats
|
||||
MimeSource* p_mime_source = nullptr; ///< Optional external MIME source
|
||||
bool is_first = true; ///< Flag for first write() call
|
||||
const char* selected_mime = nullptr; ///< MIME type that was selected
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,39 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/AudioCodecs/VorbisDecoder.h"
|
||||
#include "AudioTools/AudioCodecs/ContainerOgg.h"
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Ogg Vorbis Decoder
|
||||
*
|
||||
* This class wraps VorbisDecoder in an Ogg container decoder, allowing
|
||||
* decoding of Ogg Vorbis streams with automatic packet extraction.
|
||||
*
|
||||
* Usage:
|
||||
* 1. Instantiate OggVorbisDecoder.
|
||||
* 2. Feed Ogg Vorbis data to the decoder.
|
||||
* 3. PCM output is provided via the underlying VorbisDecoder.
|
||||
*
|
||||
* @author Phil Schatzmann
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class OggVorbisDecoder : public OggContainerDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor for OggVorbisDecoder
|
||||
* Initializes the decoder and sets the underlying VorbisDecoder.
|
||||
*/
|
||||
OggVorbisDecoder() : OggContainerDecoder() { setDecoder(&vorbis); }
|
||||
|
||||
protected:
|
||||
/** @brief Underlying Vorbis decoder */
|
||||
VorbisDecoder vorbis;
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,3 @@
|
||||
|
||||
This directory contains different alternative API implementations for encoders and decoders.
|
||||
Usaually you need to install some additional libraries.
|
||||
@@ -0,0 +1,992 @@
|
||||
#pragma once
|
||||
#include <new>
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
#include "AudioTools/CoreAudio/AudioMetaData/MimeDetector.h"
|
||||
#include "AudioTools/CoreAudio/AudioOutput.h"
|
||||
#include "AudioTools/CoreAudio/BaseStream.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief A Streaming Decoder where we provide both the input and output
|
||||
* as streams.
|
||||
*
|
||||
* This is the base class for all streaming decoders that process audio data
|
||||
* by reading from an input stream and writing decoded PCM data to an output
|
||||
* stream. Unlike AudioDecoder which uses a write-based interface,
|
||||
* StreamingDecoder uses a pull-based approach where you call copy() to process
|
||||
* data.
|
||||
*
|
||||
* @note This is more efficient than the write-based AudioDecoder interface
|
||||
* for streaming scenarios where you have direct access to input and output
|
||||
* streams.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class StreamingDecoder : public AudioInfoSource, public AudioInfoSupport {
|
||||
public:
|
||||
|
||||
virtual ~StreamingDecoder() = default;
|
||||
|
||||
/**
|
||||
* @brief Starts the processing
|
||||
*
|
||||
* Initializes the decoder and prepares it for processing audio data.
|
||||
* Must be called before any copy() operations.
|
||||
*
|
||||
* @return true if initialization was successful, false otherwise
|
||||
*/
|
||||
virtual bool begin() = 0;
|
||||
|
||||
/**
|
||||
* @brief Releases the reserved memory
|
||||
*
|
||||
* Cleans up any resources allocated by the decoder and stops processing.
|
||||
*/
|
||||
virtual void end() = 0;
|
||||
|
||||
/**
|
||||
* @brief Defines the output Stream
|
||||
*
|
||||
* Sets where the decoded PCM audio data will be written to.
|
||||
*
|
||||
* @param out_stream The Print stream to write decoded audio data to
|
||||
*/
|
||||
virtual void setOutput(Print& out_stream) { p_print = &out_stream; }
|
||||
|
||||
/**
|
||||
* @brief Defines the output streams and register to be notified
|
||||
*
|
||||
* Sets the output stream and registers for audio info change notifications.
|
||||
*
|
||||
* @param out_stream The AudioStream to write decoded audio data to
|
||||
*/
|
||||
virtual void setOutput(AudioStream& out_stream) {
|
||||
Print* p_print = &out_stream;
|
||||
setOutput(*p_print);
|
||||
addNotifyAudioChange(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines the output streams and register to be notified
|
||||
*
|
||||
* Sets the output stream and registers for audio info change notifications.
|
||||
*
|
||||
* @param out_stream The AudioOutput to write decoded audio data to
|
||||
*/
|
||||
virtual void setOutput(AudioOutput& out_stream) {
|
||||
Print* p_print = &out_stream;
|
||||
setOutput(*p_print);
|
||||
addNotifyAudioChange(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Stream Interface: Decode directly by taking data from the stream
|
||||
*
|
||||
* This is more efficient than feeding the decoder with write: just call
|
||||
* copy() in the loop to process data from the input stream.
|
||||
*
|
||||
* @param inStream The input stream containing encoded audio data
|
||||
*/
|
||||
void setInput(Stream& inStream) { this->p_input = &inStream; }
|
||||
|
||||
/**
|
||||
* @brief Provides the audio information for the current stream
|
||||
*
|
||||
* Returns audio format information such as sample rate, channels, and
|
||||
* bits per sample that was determined from the decoded audio stream.
|
||||
*
|
||||
* @return AudioInfo structure containing format information
|
||||
*/
|
||||
virtual AudioInfo audioInfo() = 0;
|
||||
|
||||
/**
|
||||
* @brief Checks if the class is active
|
||||
*
|
||||
* @return true if the decoder is ready and active, false otherwise
|
||||
*/
|
||||
virtual operator bool() = 0;
|
||||
|
||||
/**
|
||||
* @brief Process a single read operation - to be called in the loop
|
||||
*
|
||||
* Reads a chunk of data from the input stream, decodes it, and writes
|
||||
* the decoded PCM data to the output stream.
|
||||
*
|
||||
* @return true if data was processed successfully, false if no more data
|
||||
* is available or an error occurred
|
||||
*/
|
||||
virtual bool copy() = 0;
|
||||
|
||||
/**
|
||||
* @brief Process all available data
|
||||
*
|
||||
* Convenience method that calls copy() repeatedly until all available
|
||||
* data has been processed.
|
||||
*
|
||||
* @return true if any data was processed, false if no data was available
|
||||
*/
|
||||
bool copyAll() {
|
||||
bool result = false;
|
||||
while (copy()) {
|
||||
result = true;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides the MIME type of the audio format handled by this decoder
|
||||
*
|
||||
* @return C-string containing the MIME type (e.g., "audio/mpeg",
|
||||
* "audio/flac")
|
||||
*/
|
||||
virtual const char* mime() = 0;
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Reads bytes from the input stream
|
||||
*
|
||||
* Derived classes must implement this to read data from their input source.
|
||||
*
|
||||
* @param data Buffer to store the read data
|
||||
* @param len Maximum number of bytes to read
|
||||
* @return Number of bytes actually read
|
||||
*/
|
||||
virtual size_t readBytes(uint8_t* data, size_t len) = 0;
|
||||
|
||||
void setAudioInfo(AudioInfo newInfo) override {
|
||||
TRACED();
|
||||
if (this->info != newInfo) {
|
||||
this->info = newInfo;
|
||||
notifyAudioChange(info);
|
||||
}
|
||||
}
|
||||
|
||||
Print* p_print = nullptr; ///< Output stream for decoded PCM data
|
||||
Stream* p_input = nullptr; ///< Input stream for encoded audio data
|
||||
AudioInfo info;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Converts any AudioDecoder to a StreamingDecoder
|
||||
*
|
||||
* This adapter class allows you to use any existing AudioDecoder with the
|
||||
* StreamingDecoder interface. It handles the conversion between the write-based
|
||||
* AudioDecoder API and the stream-based StreamingDecoder API by using an
|
||||
* internal buffer.
|
||||
*
|
||||
* @note The adapter reads data from the input stream into a buffer, then
|
||||
* feeds that data to the wrapped AudioDecoder using its write() method.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class StreamingDecoderAdapter : public StreamingDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor
|
||||
*
|
||||
* @param decoder The AudioDecoder to wrap
|
||||
* @param mimeStr The MIME type string for this decoder
|
||||
* @param copySize Buffer size for data transfer (default:
|
||||
* DEFAULT_BUFFER_SIZE)
|
||||
*/
|
||||
StreamingDecoderAdapter(AudioDecoder& decoder, const char* mimeStr,
|
||||
int copySize = DEFAULT_BUFFER_SIZE) {
|
||||
p_decoder = &decoder;
|
||||
p_decoder->addNotifyAudioChange(*this);
|
||||
mime_str = mimeStr;
|
||||
if (copySize > 0) resize(copySize);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Starts the processing
|
||||
*
|
||||
* Initializes the wrapped decoder.
|
||||
*
|
||||
* @return true if initialization was successful, false otherwise
|
||||
*/
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
if (p_decoder == nullptr) return false;
|
||||
if (p_input == nullptr) return false;
|
||||
return p_decoder->begin();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Releases the reserved memory
|
||||
*
|
||||
* Calls end() on the wrapped decoder to clean up resources.
|
||||
*/
|
||||
void end() override { p_decoder->end(); }
|
||||
|
||||
/**
|
||||
* @brief Defines the output Stream
|
||||
*
|
||||
* Sets the output stream for the wrapped decoder.
|
||||
*
|
||||
* @param out_stream The output stream for decoded audio data
|
||||
*/
|
||||
void setOutput(Print& out_stream) override {
|
||||
p_decoder->setOutput(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides the audio information
|
||||
*
|
||||
* Delegates to the wrapped decoder's audioInfo() method.
|
||||
*
|
||||
* @return AudioInfo from the wrapped decoder
|
||||
*/
|
||||
AudioInfo audioInfo() override { return p_decoder->audioInfo(); }
|
||||
|
||||
/**
|
||||
* @brief Checks if the class is active
|
||||
*
|
||||
* @return true if the wrapped decoder is active, false otherwise
|
||||
*/
|
||||
virtual operator bool() override { return *p_decoder; }
|
||||
|
||||
/**
|
||||
* @brief Process a single read operation - to be called in the loop
|
||||
*
|
||||
* Reads data from the input stream into the internal buffer, then feeds
|
||||
* it to the wrapped AudioDecoder for processing.
|
||||
*
|
||||
* @return true if data was processed successfully, false otherwise
|
||||
*/
|
||||
virtual bool copy() override {
|
||||
int read = readBytes(buffer.data(), buffer.size());
|
||||
int written = 0;
|
||||
if (read > 0) written = p_decoder->write(&buffer[0], read);
|
||||
bool rc = written > 0;
|
||||
LOGI("copy: %s", rc ? "success" : "failure");
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Adjust the buffer size
|
||||
*
|
||||
* Changes the internal buffer size. The existing content of the buffer is
|
||||
* lost!
|
||||
*
|
||||
* @param bufferSize New buffer size in bytes
|
||||
*/
|
||||
void resize(int bufferSize) { buffer.resize(bufferSize); }
|
||||
|
||||
/**
|
||||
* @brief Provides the MIME type
|
||||
*
|
||||
* Returns the MIME type that was defined in the constructor.
|
||||
*
|
||||
* @return MIME type string
|
||||
*/
|
||||
const char* mime() override { return mime_str; }
|
||||
|
||||
protected:
|
||||
AudioDecoder* p_decoder = nullptr; ///< Wrapped AudioDecoder instance
|
||||
Vector<uint8_t> buffer{0}; ///< Internal buffer for data transfer
|
||||
const char* mime_str = nullptr; ///< MIME type string
|
||||
|
||||
/**
|
||||
* @brief Reads bytes from the input stream
|
||||
*
|
||||
* @param data Buffer to store the read data
|
||||
* @param len Maximum number of bytes to read
|
||||
* @return Number of bytes actually read
|
||||
*/
|
||||
size_t readBytes(uint8_t* data, size_t len) override {
|
||||
if (p_input == nullptr) return 0;
|
||||
return p_input->readBytes(data, len);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Manage multiple StreamingDecoders with automatic format detection
|
||||
*
|
||||
* This class automatically detects the audio format from incoming streaming
|
||||
* data and selects the appropriate decoder from a collection of registered
|
||||
* decoders. The format detection is performed using the MimeDetector on the
|
||||
* first chunk of data, and the detected data is preserved for the selected
|
||||
* decoder using a buffered stream approach.
|
||||
*
|
||||
* Key features:
|
||||
* - Automatic format detection using MimeDetector
|
||||
* - Support for multiple decoder registration
|
||||
* - Data preservation during format detection
|
||||
* - Custom mime type detection logic support
|
||||
* - Seamless integration with existing streaming architecture
|
||||
*
|
||||
* @note The first call to copy() will consume some data for format detection,
|
||||
* but this data is preserved and made available to the selected decoder through
|
||||
* a BufferedPrefixStream mechanism.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MultiStreamingDecoder : public StreamingDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Default constructor
|
||||
*/
|
||||
MultiStreamingDecoder() = default;
|
||||
|
||||
/**
|
||||
* @brief Destructor
|
||||
*
|
||||
* Cleans up any internally created StreamingDecoderAdapter instances.
|
||||
*/
|
||||
~MultiStreamingDecoder() {
|
||||
// Clean up any adapters we created
|
||||
for (auto* adapter : adapters) {
|
||||
delete adapter;
|
||||
}
|
||||
adapters.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Starts the processing
|
||||
*
|
||||
* Initializes the MIME detector and prepares for format detection.
|
||||
*
|
||||
* @return true if initialization was successful, false if no output is
|
||||
* defined
|
||||
*/
|
||||
bool begin() override {
|
||||
mime_detector.begin();
|
||||
is_first = true;
|
||||
if (p_print == nullptr) {
|
||||
LOGE("No output defined");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Releases the reserved memory
|
||||
*
|
||||
* Stops the currently active decoder and resets the state for next use.
|
||||
*/
|
||||
void end() override {
|
||||
if (actual_decoder.decoder != nullptr && actual_decoder.is_open) {
|
||||
actual_decoder.decoder->end();
|
||||
}
|
||||
actual_decoder.is_open = false;
|
||||
actual_decoder.decoder = nullptr;
|
||||
actual_decoder.mime = nullptr;
|
||||
is_first = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines the output Stream
|
||||
*
|
||||
* @param out_stream The output stream for decoded audio data
|
||||
*/
|
||||
void setOutput(Print& out_stream) override {
|
||||
StreamingDecoder::setOutput(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines the output streams and register to be notified
|
||||
*
|
||||
* @param out_stream The AudioStream for decoded audio data
|
||||
*/
|
||||
void setOutput(AudioStream& out_stream) override {
|
||||
StreamingDecoder::setOutput(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines the output streams and register to be notified
|
||||
*
|
||||
* @param out_stream The AudioOutput for decoded audio data
|
||||
*/
|
||||
void setOutput(AudioOutput& out_stream) override {
|
||||
StreamingDecoder::setOutput(out_stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Stream Interface: Decode directly by taking data from the stream
|
||||
*
|
||||
* @param inStream The input stream containing encoded audio data
|
||||
*/
|
||||
void setInput(Stream& inStream) {
|
||||
StreamingDecoder::setInput(inStream);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Adds a decoder that will be selected by its MIME type
|
||||
*
|
||||
* Registers a StreamingDecoder that will be automatically selected when
|
||||
* the corresponding MIME type is detected in the input stream.
|
||||
*
|
||||
* @param decoder The StreamingDecoder to register
|
||||
*/
|
||||
void addDecoder(StreamingDecoder& decoder) {
|
||||
decoder.addNotifyAudioChange(*this);
|
||||
const char* mime = decoder.mime();
|
||||
if (mime != nullptr) {
|
||||
DecoderInfo info{mime, &decoder};
|
||||
decoders.push_back(info);
|
||||
} else {
|
||||
LOGE("Decoder mime() returned nullptr - cannot add decoder");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Adds a decoder with explicit MIME type
|
||||
*
|
||||
* Registers a StreamingDecoder with a specific MIME type, which may be
|
||||
* different from what the decoder's mime() method returns.
|
||||
*
|
||||
* @param decoder The StreamingDecoder to register
|
||||
* @param mime The MIME type string to associate with this decoder
|
||||
*/
|
||||
void addDecoder(StreamingDecoder& decoder, const char* mime) {
|
||||
if (mime != nullptr) {
|
||||
decoder.addNotifyAudioChange(*this);
|
||||
DecoderInfo info{mime, &decoder};
|
||||
decoders.push_back(info);
|
||||
} else {
|
||||
LOGE("Decoder mime() returned nullptr - cannot add decoder");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Adds an AudioDecoder with explicit MIME type
|
||||
*
|
||||
* Wraps an AudioDecoder in a StreamingDecoderAdapter and registers it with
|
||||
* the specified MIME type. This allows using traditional AudioDecoder
|
||||
* instances with the MultiStreamingDecoder's automatic format detection.
|
||||
*
|
||||
* @param decoder The AudioDecoder to wrap and register
|
||||
* @param mime The MIME type string to associate with this decoder
|
||||
* @param bufferSize Buffer size for the adapter (default:
|
||||
* DEFAULT_BUFFER_SIZE)
|
||||
*
|
||||
* @note The created StreamingDecoderAdapter is stored internally and will be
|
||||
* automatically managed by the MultiStreamingDecoder.
|
||||
*/
|
||||
void addDecoder(AudioDecoder& decoder, const char* mime,
|
||||
int bufferSize = DEFAULT_BUFFER_SIZE) {
|
||||
if (mime != nullptr) {
|
||||
// Create a StreamingDecoderAdapter to wrap the AudioDecoder
|
||||
decoder.addNotifyAudioChange(*this);
|
||||
auto adapter = new StreamingDecoderAdapter(decoder, mime, bufferSize);
|
||||
adapters.push_back(adapter); // Store for cleanup
|
||||
|
||||
DecoderInfo info{mime, adapter};
|
||||
decoders.push_back(info);
|
||||
} else {
|
||||
LOGE("MIME type is nullptr - cannot add AudioDecoder");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks if the class is active
|
||||
*
|
||||
* @return true if a decoder is selected and active, or if format detection
|
||||
* hasn't been performed yet
|
||||
*/
|
||||
virtual operator bool() override {
|
||||
if (actual_decoder.decoder == nullptr) return false;
|
||||
return is_first || actual_decoder.is_open;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Process a single read operation - to be called in the loop
|
||||
*
|
||||
* On the first call, this method reads data for format detection, selects
|
||||
* the appropriate decoder, and sets up a buffered stream. Subsequent calls
|
||||
* delegate to the selected decoder's copy() method.
|
||||
*
|
||||
* @return true if data was processed successfully, false if no data is
|
||||
* available or format detection/decoding failed
|
||||
*/
|
||||
virtual bool copy() override {
|
||||
if (p_input == nullptr) return false;
|
||||
|
||||
// Automatically select decoder if not already selected
|
||||
if (is_first) {
|
||||
// determine the mime and select the decoder
|
||||
if (!selectDecoder()) {
|
||||
return false;
|
||||
}
|
||||
is_first = false;
|
||||
}
|
||||
|
||||
// Check if we have a decoder
|
||||
if (actual_decoder.decoder == nullptr) return false;
|
||||
|
||||
// Use the selected decoder to process data
|
||||
return actual_decoder.decoder->copy();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Selects the actual decoder by MIME type
|
||||
*
|
||||
* Searches through registered decoders to find one that matches the
|
||||
* detected MIME type, then initializes it for use.
|
||||
*
|
||||
* @param mime The MIME type string to match
|
||||
* @return true if a matching decoder was found and initialized, false
|
||||
* otherwise
|
||||
*/
|
||||
bool selectDecoder(const char* mime) {
|
||||
TRACEI();
|
||||
bool result = false;
|
||||
|
||||
// Guard against null MIME type - cannot proceed without valid MIME
|
||||
if (mime == nullptr) {
|
||||
LOGE("mime is null");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Optimization: Check if the requested MIME type is already active
|
||||
// This avoids unnecessary decoder switching when the same format is detected
|
||||
if (StrView(mime).equals(actual_decoder.mime)) {
|
||||
is_first = false; // Mark initialization as complete
|
||||
return true; // Already using the correct decoder
|
||||
}
|
||||
|
||||
// Clean shutdown of currently active decoder before switching
|
||||
// This ensures proper resource cleanup and state reset
|
||||
if (actual_decoder.decoder != nullptr) {
|
||||
actual_decoder.decoder->end();
|
||||
actual_decoder.is_open = false; // Mark as inactive
|
||||
}
|
||||
|
||||
// Search through all registered decoders to find one that handles this MIME type
|
||||
selected_mime = nullptr; // Clear previous selection
|
||||
for (int j = 0; j < decoders.size(); j++) {
|
||||
DecoderInfo info = decoders[j];
|
||||
|
||||
// Check if this decoder supports the detected MIME type
|
||||
if (StrView(info.mime).equals(mime)) {
|
||||
LOGI("Using Decoder %s for %s", toStr(info.mime), toStr(mime));
|
||||
|
||||
// Switch to the matching decoder
|
||||
actual_decoder = info;
|
||||
|
||||
// Configure the decoder's output stream to match our output
|
||||
// This ensures decoded audio data flows to the correct destination
|
||||
if (p_print != nullptr) {
|
||||
actual_decoder.decoder->setOutput(*p_print);
|
||||
}
|
||||
|
||||
// Initialize the selected decoder and mark it as active
|
||||
LOGI("available: %d", p_data_source->available());
|
||||
assert(p_data_source != nullptr);
|
||||
actual_decoder.decoder->setInput(*p_data_source);
|
||||
actual_decoder.decoder->clearNotifyAudioChange();
|
||||
actual_decoder.decoder->addNotifyAudioChange(*this);
|
||||
if (actual_decoder.decoder->begin()) {
|
||||
actual_decoder.is_open = true;
|
||||
LOGI("StreamingDecoder %s started", toStr(actual_decoder.mime));
|
||||
} else {
|
||||
// Decoder failed to start - this is a critical error
|
||||
LOGE("Failed to start StreamingDecoder %s", toStr(actual_decoder.mime));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Successfully found and initialized a decoder
|
||||
result = true;
|
||||
selected_mime = mime; // Store the MIME type that was selected
|
||||
break; // Stop searching once we find a match
|
||||
}
|
||||
}
|
||||
|
||||
// Mark initialization phase as complete regardless of success/failure
|
||||
is_first = false;
|
||||
return result; // true if decoder was found and started, false otherwise
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides the MIME type of the selected decoder
|
||||
* @return MIME type string of the currently active decoder, or nullptr
|
||||
* if no decoder is selected
|
||||
*/
|
||||
const char* mime() override {
|
||||
// fallback to actual decoder
|
||||
if (actual_decoder.decoder != nullptr) {
|
||||
return actual_decoder.decoder->mime();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the MIME type that was detected and selected
|
||||
*
|
||||
* @return The MIME type string that was detected by the MimeDetector
|
||||
*/
|
||||
const char* selectedMime() { return selected_mime; }
|
||||
|
||||
/**
|
||||
* @brief Provides the audio information from the selected decoder
|
||||
*
|
||||
* @return AudioInfo from the currently active decoder, or empty AudioInfo
|
||||
* if no decoder is selected
|
||||
*/
|
||||
AudioInfo audioInfo() override {
|
||||
if (actual_decoder.decoder != nullptr) {
|
||||
return actual_decoder.decoder->audioInfo();
|
||||
}
|
||||
AudioInfo empty;
|
||||
return empty;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Provides access to the internal MIME detector
|
||||
*
|
||||
* Returns a reference to the MimeDetector instance used for automatic
|
||||
* format detection. This allows access to advanced features such as:
|
||||
* - Adding custom MIME type detection logic
|
||||
* - Setting custom detection callbacks
|
||||
* - Configuring default MIME types
|
||||
* - Accessing detection statistics
|
||||
*
|
||||
* @note This method should typically only be used for advanced configuration
|
||||
* before calling begin(). Modifying the detector after format detection
|
||||
* has occurred may lead to unexpected behavior.
|
||||
*
|
||||
* @return Reference to the internal MimeDetector instance
|
||||
*
|
||||
* @see MimeDetector::setCheck() for adding custom detection logic
|
||||
* @see MimeDetector::setMimeCallback() for detection notifications
|
||||
*/
|
||||
MimeDetector& mimeDetector() { return mime_detector; }
|
||||
|
||||
/**
|
||||
* @brief Sets an external MIME source for format detection
|
||||
*
|
||||
* Provides an alternative to automatic MIME detection by allowing an external
|
||||
* source to provide the MIME type information. This is particularly useful
|
||||
* when the MIME type is already known from other sources such as:
|
||||
* - HTTP Content-Type headers
|
||||
* - File extensions
|
||||
* - Metadata from containers or playlists
|
||||
* - User-specified format preferences
|
||||
*
|
||||
* When a MIME source is set, the automatic detection process (which requires
|
||||
* reading and analyzing stream data) is bypassed, making the decoder
|
||||
* initialization more efficient and faster.
|
||||
*
|
||||
* @param mimeSource Reference to a MimeSource object that provides the
|
||||
* MIME type through its mime() method
|
||||
*
|
||||
* @note The MimeSource object must remain valid for the lifetime of this
|
||||
* MultiStreamingDecoder instance, as only a reference is stored.
|
||||
*
|
||||
* @note Setting a MIME source takes precedence over automatic detection.
|
||||
* To revert to automatic detection, the MIME source would need to
|
||||
* return nullptr from its mime() method.
|
||||
*
|
||||
* @see MimeSource interface for implementing custom MIME providers
|
||||
* @see selectDecoder() for how MIME type detection and selection works
|
||||
*
|
||||
* @since This feature allows integration with external metadata sources
|
||||
*/
|
||||
void setMimeSource(MimeSource& mimeSource) { p_mime_source = &mimeSource; }
|
||||
|
||||
protected:
|
||||
|
||||
/**
|
||||
* @brief Information about a registered decoder
|
||||
*/
|
||||
struct DecoderInfo {
|
||||
const char* mime = nullptr; ///< MIME type for this decoder
|
||||
StreamingDecoder* decoder = nullptr; ///< Pointer to the decoder instance
|
||||
bool is_open = false; ///< Whether the decoder is currently active
|
||||
|
||||
/**
|
||||
* @brief Default constructor
|
||||
*/
|
||||
DecoderInfo() = default;
|
||||
|
||||
/**
|
||||
* @brief Constructor with parameters
|
||||
*
|
||||
* @param mime MIME type string
|
||||
* @param decoder Pointer to StreamingDecoder instance
|
||||
*/
|
||||
DecoderInfo(const char* mime, StreamingDecoder* decoder) {
|
||||
this->mime = mime;
|
||||
this->decoder = decoder;
|
||||
}
|
||||
} actual_decoder; ///< Currently active decoder information
|
||||
|
||||
Vector<DecoderInfo> decoders{0}; ///< Collection of registered decoders
|
||||
Vector<StreamingDecoderAdapter*> adapters{
|
||||
0}; ///< Collection of internally created adapters
|
||||
MimeDetector mime_detector; ///< MIME type detection engine
|
||||
Vector<uint8_t> detection_buffer{0}; ///< Buffer for format detection data
|
||||
bool is_first = true; ///< Flag for first copy() call
|
||||
const char* selected_mime = nullptr; ///< MIME type that was selected
|
||||
MimeSource* p_mime_source =
|
||||
nullptr; ///< Optional MIME source for custom logic
|
||||
Stream *p_data_source = nullptr; ///< effective data source for decoder
|
||||
|
||||
BufferedStream buffered_stream{0}; ///< Buffered stream for data preservation
|
||||
const char* toStr(const char* str){
|
||||
return str == nullptr ? "" : str;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Automatically detects MIME type and selects appropriate decoder
|
||||
*
|
||||
* This method performs automatic format detection and decoder selection when
|
||||
* no decoder is currently active. It supports two modes of operation:
|
||||
* 1. External MIME source - Uses a provided MimeSource for format information
|
||||
* 2. Auto-detection - Analyzes stream content to determine the audio format
|
||||
*
|
||||
* The method reads a small sample of data (80 bytes) from the input stream
|
||||
* for format detection, then preserves this data in a buffered stream so it
|
||||
* remains available to the selected decoder. This ensures no audio data is
|
||||
* lost during the detection process.
|
||||
*
|
||||
* @note This method is automatically called by copy() on the first invocation.
|
||||
* Subsequent calls will return immediately if a decoder is already selected.
|
||||
*
|
||||
* @note The detection data is preserved using BufferedPrefixStream, allowing
|
||||
* the selected decoder to process the complete stream including the bytes
|
||||
* used for format identification.
|
||||
*
|
||||
* @return true if a decoder was successfully selected and initialized, or if
|
||||
* a decoder was already active; false if MIME detection failed or no
|
||||
* matching decoder was found
|
||||
*
|
||||
* @see selectDecoder(const char* mime) for explicit decoder selection
|
||||
* @see setMimeSource() for providing external MIME type information
|
||||
* @see MimeDetector for details on automatic format detection
|
||||
*/
|
||||
bool selectDecoder() {
|
||||
// Only perform MIME detection and decoder selection if no decoder is active yet
|
||||
// This prevents re-detection on subsequent calls during the same stream
|
||||
if (actual_decoder.decoder == nullptr) {
|
||||
const char* mime = nullptr;
|
||||
p_data_source = nullptr;
|
||||
|
||||
// Two methods for MIME type determination: external source or auto-detection
|
||||
if (p_mime_source != nullptr) {
|
||||
// Option 1: Use externally provided MIME source (e.g., from HTTP headers)
|
||||
// This is more efficient as it avoids reading and analyzing stream data
|
||||
mime = p_mime_source->mime();
|
||||
LOGI("mime from source: %s", toStr(mime));
|
||||
assert(p_input != nullptr);
|
||||
p_data_source = p_input;
|
||||
} else {
|
||||
// Option 2: Auto-detect MIME type by analyzing stream content
|
||||
// Redirect the decoder to use the buffered stream
|
||||
// we use the buffered stream as input
|
||||
assert(p_input != nullptr);
|
||||
buffered_stream.setStream(*p_input);
|
||||
buffered_stream.resize(DEFAULT_BUFFER_SIZE);
|
||||
p_data_source = &buffered_stream;
|
||||
|
||||
// This requires reading a sample of data to identify the format
|
||||
detection_buffer.resize(160);
|
||||
size_t bytesRead = buffered_stream.peekBytes(detection_buffer.data(), detection_buffer.size()); // If no data is available, we cannot proceed with detection
|
||||
if (bytesRead == 0) return false;
|
||||
|
||||
// Feed the sample data to the MIME detector for format analysis
|
||||
// The detector examines file headers, magic numbers, etc.
|
||||
mime_detector.write(detection_buffer.data(), bytesRead);
|
||||
mime = mime_detector.mime();
|
||||
LOGI("mime from detector: %s", toStr(mime));
|
||||
|
||||
}
|
||||
|
||||
// Process the detected/provided MIME type
|
||||
if (mime != nullptr) {
|
||||
// Delegate to the overloaded selectDecoder(mime) method to find
|
||||
// and initialize the appropriate decoder for this MIME type
|
||||
if (!selectDecoder(mime)) {
|
||||
LOGE("The decoder could not be selected for %s", toStr(mime));
|
||||
return false; // No registered decoder can handle this format
|
||||
}
|
||||
} else {
|
||||
// MIME detection failed - format is unknown or unsupported
|
||||
LOGE("Could not determine mime type");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
LOGI("Decoder already selected: %s", toStr(actual_decoder.mime));
|
||||
assert(p_input != nullptr);
|
||||
actual_decoder.decoder->setInput(*p_input);
|
||||
}
|
||||
|
||||
// Success: either decoder was already selected or selection completed successfully
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Reads bytes from the input stream
|
||||
*
|
||||
* @param data Buffer to store read data
|
||||
* @param len Maximum number of bytes to read
|
||||
* @return Number of bytes actually read
|
||||
*/
|
||||
size_t readBytes(uint8_t* data, size_t len) override {
|
||||
if (p_input == nullptr) return 0;
|
||||
return p_input->readBytes(data, len);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Adapter class which allows the AudioDecoder API on a StreamingDecoder
|
||||
*
|
||||
* This adapter provides the reverse functionality of StreamingDecoderAdapter:
|
||||
* it allows you to use a StreamingDecoder with the write-based AudioDecoder
|
||||
* API. It uses a ring buffer and queue to convert write() calls into a stream
|
||||
* that the StreamingDecoder can read from.
|
||||
*
|
||||
* @note This is useful when you have a StreamingDecoder but need to integrate
|
||||
* it into code that expects the AudioDecoder write-based interface.
|
||||
*
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class DecoderAdapter : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor
|
||||
*
|
||||
* @param dec The StreamingDecoder to wrap
|
||||
* @param bufferSize Size of the internal ring buffer for data transfer
|
||||
*/
|
||||
DecoderAdapter(StreamingDecoder& dec, int bufferSize) {
|
||||
TRACED();
|
||||
p_dec = &dec;
|
||||
p_dec->setInput(queue);
|
||||
resize(bufferSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines the output Stream
|
||||
*
|
||||
* Sets the output stream for the wrapped StreamingDecoder.
|
||||
*
|
||||
* @param out The output stream for decoded audio data
|
||||
*/
|
||||
void setOutput(Print& out) override { p_dec->setOutput(out); }
|
||||
|
||||
/**
|
||||
* @brief Sets the input stream for the wrapped decoder
|
||||
*
|
||||
* @param in The input stream containing encoded audio data
|
||||
*/
|
||||
void setInput(Stream& in) { p_dec->setInput(in); }
|
||||
|
||||
/**
|
||||
* @brief Starts the processing
|
||||
*
|
||||
* Initializes the wrapped StreamingDecoder and marks this adapter as active.
|
||||
*
|
||||
* @return true if the StreamingDecoder was started successfully
|
||||
*/
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
active = true;
|
||||
bool rc = p_dec->begin();
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Stops the processing
|
||||
*
|
||||
* Marks this adapter as inactive. The wrapped StreamingDecoder is not
|
||||
* explicitly stopped to allow continued use.
|
||||
*/
|
||||
void end() override {
|
||||
TRACED();
|
||||
active = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Resizes the internal buffer
|
||||
*
|
||||
* Changes the size of the ring buffer used for data transfer.
|
||||
* The buffer is only allocated when first needed (lazy setup).
|
||||
*
|
||||
* @param size New buffer size in bytes
|
||||
*/
|
||||
void resize(int size) {
|
||||
buffer_size = size;
|
||||
// setup the buffer only if needed
|
||||
if (is_setup) rbuffer.resize(size);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Writes encoded audio data to be decoded
|
||||
*
|
||||
* The data is written to an internal queue, which is then processed
|
||||
* by calling copy() on the wrapped StreamingDecoder.
|
||||
*
|
||||
* @param data Buffer containing encoded audio data
|
||||
* @param len Number of bytes to write
|
||||
* @return Number of bytes actually written
|
||||
*/
|
||||
size_t write(const uint8_t* data, size_t len) override {
|
||||
TRACED();
|
||||
setupLazy();
|
||||
size_t result = queue.write((uint8_t*)data, len);
|
||||
// Trigger processing - process all available data
|
||||
while (p_dec->copy());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Gets the wrapped StreamingDecoder
|
||||
*
|
||||
* Provides direct access to the underlying StreamingDecoder for
|
||||
* advanced use cases.
|
||||
*
|
||||
* @return Pointer to the wrapped StreamingDecoder
|
||||
*/
|
||||
StreamingDecoder* getStreamingDecoder() { return p_dec; }
|
||||
|
||||
/**
|
||||
* @brief Checks if the adapter is active
|
||||
*
|
||||
* @return true if the adapter is active, false otherwise
|
||||
*/
|
||||
operator bool() override { return active; }
|
||||
|
||||
protected:
|
||||
bool active = false; ///< Whether the adapter is active
|
||||
bool is_setup = false; ///< Whether lazy setup has been performed
|
||||
int buffer_size; ///< Size of the ring buffer
|
||||
StreamingDecoder* p_dec = nullptr; ///< Wrapped StreamingDecoder instance
|
||||
RingBuffer<uint8_t> rbuffer{0}; ///< Ring buffer for data storage
|
||||
QueueStream<uint8_t> queue{rbuffer}; ///< Stream interface to the ring buffer
|
||||
|
||||
/**
|
||||
* @brief Performs lazy initialization of the ring buffer
|
||||
*
|
||||
* The ring buffer is only allocated when first needed to save memory.
|
||||
*/
|
||||
void setupLazy() {
|
||||
if (!is_setup) {
|
||||
rbuffer.resize(buffer_size);
|
||||
queue.begin();
|
||||
is_setup = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Type alias for DecoderAdapter
|
||||
*
|
||||
* Provides an alternative name for backward compatibility.
|
||||
*/
|
||||
using DecoderFromStreaming = DecoderAdapter;
|
||||
|
||||
} // namespace audio_tools
|
||||
231
libraries/audio-tools/src/AudioTools/AudioCodecs/VorbisDecoder.h
Normal file
231
libraries/audio-tools/src/AudioTools/AudioCodecs/VorbisDecoder.h
Normal file
@@ -0,0 +1,231 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
|
||||
#include <vorbis.h>
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Vorbis Audio Decoder using low-level libvorbis API
|
||||
*
|
||||
* This decoder expects Ogg Vorbis packets to be provided via the write()
|
||||
* method. It parses the Vorbis headers, initializes the decoder, and outputs
|
||||
* PCM audio.
|
||||
*
|
||||
* Usage:
|
||||
* 1. Call begin() to reset the decoder.
|
||||
* 2. Feed the first three Vorbis header packets via write().
|
||||
* 3. Feed subsequent audio packets via write().
|
||||
* 4. Use setOutput() to set the PCM output destination.
|
||||
* 5. Call audioInfo() to retrieve stream parameters after header parsing.
|
||||
*
|
||||
* @author Phil Schatzmann
|
||||
* @ingroup codecs
|
||||
* @ingroup decoder
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class VorbisDecoder : public AudioDecoder {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor for VorbisDecoder
|
||||
* @param buffer_size Size of the PCM output buffer (default: 256)
|
||||
* @param header_packets Number of Vorbis header packets (default: 3)
|
||||
*
|
||||
* Initializes the decoder and allocates the PCM output buffer.
|
||||
*/
|
||||
VorbisDecoder(size_t buffer_size = 256, int header_packets = 3)
|
||||
: pcm_buffer_size(buffer_size), num_header_packets(header_packets) {}
|
||||
|
||||
/**
|
||||
* @brief Destructor for VorbisDecoder
|
||||
*
|
||||
* Cleans up all decoder resources.
|
||||
*/
|
||||
~VorbisDecoder() { end(); }
|
||||
|
||||
/**
|
||||
* @brief Resets decoder state and prepares for new Vorbis stream
|
||||
*
|
||||
* This method clears all decoder state, resizes the PCM output buffer,
|
||||
* and initializes Vorbis structures. Call this before feeding header packets.
|
||||
* @return true if successful
|
||||
*/
|
||||
bool begin() override {
|
||||
end();
|
||||
pcmout_buffer.resize(pcm_buffer_size);
|
||||
vorbis_info_init(&vi);
|
||||
vorbis_comment_init(&vc);
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Cleans up all Vorbis decoder structures
|
||||
*/
|
||||
void end() override {
|
||||
vorbis_block_clear(&vb);
|
||||
vorbis_dsp_clear(&vd);
|
||||
vorbis_comment_clear(&vc);
|
||||
vorbis_info_clear(&vi);
|
||||
header_packets = 0;
|
||||
decoder_initialized = false;
|
||||
active = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Feeds a Vorbis packet (header or audio) to the decoder
|
||||
*
|
||||
* The first three packets must be Vorbis headers. Subsequent packets are
|
||||
* audio. PCM output is written to the Print stream set via setOutput().
|
||||
*
|
||||
* @param data Pointer to packet data
|
||||
* @param len Length of packet data
|
||||
* @return Number of PCM bytes written to output
|
||||
*/
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
ogg_packet packet;
|
||||
packet.packet = (unsigned char *)data;
|
||||
packet.bytes = len;
|
||||
packet.b_o_s = (header_packets == 0) ? 1 : 0;
|
||||
packet.e_o_s = 0;
|
||||
packet.granulepos = 0;
|
||||
packet.packetno = header_packets;
|
||||
|
||||
if (num_header_packets == 0 && !decoder_initialized) {
|
||||
if (!initDecoder()) return 0;
|
||||
decoder_initialized = true;
|
||||
}
|
||||
if (header_packets < num_header_packets) {
|
||||
if (!parseHeaderPacket(packet, header_packets)) return 0;
|
||||
header_packets++;
|
||||
if (header_packets == num_header_packets) {
|
||||
if (!initDecoder()) return 0;
|
||||
decoder_initialized = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (header_packets == num_header_packets) {
|
||||
notifyAudioChange(audioInfo());
|
||||
}
|
||||
if (!decoder_initialized) return 0;
|
||||
return decodeAudioPacket(packet);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns audio stream info (sample rate, channels, bits per sample)
|
||||
* @return AudioInfo struct with stream parameters
|
||||
*/
|
||||
AudioInfo audioInfo() override {
|
||||
AudioInfo info;
|
||||
if (vi.channels > 0 && vi.rate > 0) {
|
||||
info.sample_rate = vi.rate;
|
||||
info.channels = vi.channels;
|
||||
info.bits_per_sample = 16;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns true if decoder is active
|
||||
*/
|
||||
operator bool() override { return active; }
|
||||
|
||||
protected:
|
||||
/** @brief Vorbis stream info (channels, sample rate, etc.) */
|
||||
vorbis_info vi{};
|
||||
/** @brief Vorbis comment metadata */
|
||||
vorbis_comment vc{};
|
||||
/** @brief Decoder state for synthesis */
|
||||
vorbis_dsp_state vd{};
|
||||
/** @brief Block structure for synthesis */
|
||||
vorbis_block vb{};
|
||||
/** @brief Output stream for PCM audio */
|
||||
Print *p_print = nullptr;
|
||||
/** @brief Decoder active state */
|
||||
bool active = false;
|
||||
/** @brief PCM output buffer size */
|
||||
size_t pcm_buffer_size = 256;
|
||||
/** @brief Number of Vorbis header packets */
|
||||
int num_header_packets = 3;
|
||||
/** @brief Buffer for interleaved PCM output */
|
||||
Vector<int16_t> pcmout_buffer;
|
||||
int header_packets = 0;
|
||||
bool decoder_initialized = false;
|
||||
/**
|
||||
* @brief Parses a Vorbis header packet
|
||||
* @param packet Ogg Vorbis header packet
|
||||
* @param header_packets Index of header packet (0, 1, 2)
|
||||
* @return true if successful
|
||||
*/
|
||||
bool parseHeaderPacket(ogg_packet &packet, int header_packets) {
|
||||
if (vorbis_synthesis_headerin(&vi, &vc, &packet) != 0) {
|
||||
LOGE("Header packet %d invalid", header_packets);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Initializes the Vorbis decoder after header parsing
|
||||
* @return true if successful
|
||||
*/
|
||||
bool initDecoder() {
|
||||
if (vorbis_synthesis_init(&vd, &vi) != 0) {
|
||||
LOGE("vorbis_synthesis_init failed");
|
||||
return false;
|
||||
}
|
||||
vorbis_block_init(&vd, &vb);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Decodes an audio packet and writes PCM to output
|
||||
* @param packet Ogg Vorbis audio packet
|
||||
* @return Number of PCM bytes written
|
||||
*/
|
||||
size_t decodeAudioPacket(ogg_packet &packet) {
|
||||
size_t total_written = 0;
|
||||
if (vorbis_synthesis(&vb, &packet) == 0) {
|
||||
vorbis_synthesis_blockin(&vd, &vb);
|
||||
float **pcm = nullptr;
|
||||
int samples = vorbis_synthesis_pcmout(&vd, &pcm);
|
||||
while (samples > 0 && pcm) {
|
||||
int chunk = (samples > pcm_buffer_size) ? pcm_buffer_size : samples;
|
||||
convertFloatToInt16PCM(pcm, chunk, vi.channels);
|
||||
if (!pcmout_buffer.empty() && p_print) {
|
||||
p_print->write((uint8_t *)pcmout_buffer.data(),
|
||||
pcmout_buffer.size() * sizeof(int16_t));
|
||||
total_written += pcmout_buffer.size() * sizeof(int16_t);
|
||||
pcmout_buffer.clear();
|
||||
}
|
||||
vorbis_synthesis_read(&vd, chunk);
|
||||
samples = vorbis_synthesis_pcmout(&vd, &pcm);
|
||||
}
|
||||
}
|
||||
return total_written;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Converts float PCM to interleaved int16 PCM and stores in
|
||||
* pcmout_buffer
|
||||
* @param pcm Pointer to float PCM array [channels][samples]
|
||||
* @param samples Number of samples
|
||||
* @param channels Number of channels
|
||||
*/
|
||||
void convertFloatToInt16PCM(float **pcm, int samples, int channels) {
|
||||
for (int i = 0; i < samples; ++i) {
|
||||
for (int ch = 0; ch < channels; ++ch) {
|
||||
float val = pcm[ch][i];
|
||||
int16_t sample = (int16_t)(val * 32767.0f);
|
||||
if (sample > 32767) sample = 32767;
|
||||
if (sample < -32768) sample = -32768;
|
||||
pcmout_buffer.push_back(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
#warning("obsolete: use AudioTools/Communication/A2DPStream.h")
|
||||
#include "AudioTools/Communication/A2DPStream.h"
|
||||
@@ -0,0 +1,421 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioTools/AudioLibs/I2SCodecStream.h"
|
||||
#include "AudioTools/CoreAudio/AudioActions.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief New functionality which replaces the AudioKitStream that is based on
|
||||
* the legacy AudioKit library. This functionality uses the new
|
||||
* arduino-audio-driver library! It is the same as I2SCodecStream extended by
|
||||
* some AudioActions and some method calls to determine defined pin values.
|
||||
* See https://github.com/pschatzmann/arduino-audio-driver
|
||||
* @ingroup io
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioBoardStream : public I2SCodecStream {
|
||||
struct AudioBoardAction : public AudioActions::Action {
|
||||
AudioBoardAction(AudioBoard &board, AudioDriverKey key) {
|
||||
this->key = key;
|
||||
this->p_board = &board;
|
||||
}
|
||||
AudioDriverKey key;
|
||||
AudioBoard *p_board;
|
||||
int id() override { return key | 0x400; }
|
||||
bool readValue() override { return p_board->isKeyPressed(key); }
|
||||
};
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief Default constructor: for available AudioBoard values check
|
||||
* the audioboard variables in
|
||||
* https://pschatzmann.github.io/arduino-audio-driver/html/group__audio__driver.html
|
||||
* Further information can be found in
|
||||
* https://github.com/pschatzmann/arduino-audio-driver/wiki
|
||||
*/
|
||||
AudioBoardStream(audio_driver::AudioBoard &board) : I2SCodecStream(board) {
|
||||
// pin mode already set up by driver library
|
||||
actions.setPinMode(false);
|
||||
}
|
||||
|
||||
bool begin() override { return I2SCodecStream::begin(); }
|
||||
|
||||
bool begin(I2SCodecConfig cfg) override { return I2SCodecStream::begin(cfg); }
|
||||
|
||||
/**
|
||||
* @brief Process input keys and pins
|
||||
*
|
||||
*/
|
||||
void processActions() {
|
||||
// TRACED();
|
||||
actions.processActions();
|
||||
delay(1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Defines a new action that is executed when the Button is pressed
|
||||
*/
|
||||
void addAction(AudioDriverKey key, void (*action)(bool, int, void *),
|
||||
void *ref = nullptr) {
|
||||
AudioBoardAction *abo = new AudioBoardAction(board(), key);
|
||||
abo->actionOn = action;
|
||||
abo->ref = (ref == nullptr) ? this : ref;
|
||||
actions.add(*abo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines a new action that is executed when the Button is pressed and released
|
||||
*/
|
||||
void addAction(AudioDriverKey key, void (*actionOn)(bool, int, void *),
|
||||
void (*actionOff)(bool, int, void *),
|
||||
void *ref = nullptr) {
|
||||
|
||||
AudioBoardAction *abo = new AudioBoardAction(board(), key);
|
||||
abo->actionOn = actionOn;
|
||||
abo->actionOn = actionOff;
|
||||
abo->ref = (ref == nullptr) ? this : ref;
|
||||
actions.add(*abo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines a new action that is executed when the indicated pin is
|
||||
* active
|
||||
*
|
||||
* @param pin
|
||||
* @param action
|
||||
* @param ref
|
||||
*/
|
||||
void addAction(int pin, void (*action)(bool, int, void *),
|
||||
void *ref = nullptr) {
|
||||
TRACEI();
|
||||
// determine logic from config
|
||||
AudioActions::ActiveLogic activeLogic = getActionLogic(pin);
|
||||
actions.add(pin, action, activeLogic, ref == nullptr ? this : ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines a new action that is executed when the indicated pin is
|
||||
* active
|
||||
*
|
||||
* @param pin
|
||||
* @param action
|
||||
* @param activeLogic
|
||||
* @param ref
|
||||
*/
|
||||
void addAction(int pin, void (*action)(bool, int, void *),
|
||||
AudioActions::ActiveLogic activeLogic, void *ref = nullptr) {
|
||||
TRACEI();
|
||||
actions.add(pin, action, activeLogic, ref == nullptr ? this : ref);
|
||||
}
|
||||
|
||||
/// Provides access to the AudioActions
|
||||
AudioActions &audioActions() { return actions; }
|
||||
|
||||
AudioActions &getActions() { return actions; }
|
||||
|
||||
/**
|
||||
* @brief Relative volume control
|
||||
*
|
||||
* @param vol
|
||||
*/
|
||||
void incrementVolume(float inc) {
|
||||
float current_volume = getVolume();
|
||||
float new_volume = current_volume + inc;
|
||||
LOGI("incrementVolume: %f -> %f", current_volume, new_volume);
|
||||
setVolume(new_volume);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Increase the volume
|
||||
*
|
||||
*/
|
||||
static void actionVolumeUp(bool, int, void *ref) {
|
||||
TRACEI();
|
||||
AudioBoardStream *self = (AudioBoardStream *)ref;
|
||||
self->incrementVolume(+self->actionVolumeIncrementValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Decrease the volume
|
||||
*
|
||||
*/
|
||||
static void actionVolumeDown(bool, int, void *ref) {
|
||||
TRACEI();
|
||||
AudioBoardStream *self = (AudioBoardStream *)ref;
|
||||
self->incrementVolume(-self->actionVolumeIncrementValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Toggle start stop
|
||||
*
|
||||
*/
|
||||
static void actionStartStop(bool, int, void *ref) {
|
||||
TRACEI();
|
||||
AudioBoardStream *self = (AudioBoardStream *)ref;
|
||||
self->active = !self->active;
|
||||
self->setActive(self->active);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Start
|
||||
*
|
||||
*/
|
||||
static void actionStart(bool, int, void *ref) {
|
||||
TRACEI();
|
||||
AudioBoardStream *self = (AudioBoardStream *)ref;
|
||||
self->active = true;
|
||||
self->setActive(self->active);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Stop
|
||||
*/
|
||||
static void actionStop(bool, int, void *ref) {
|
||||
TRACEI();
|
||||
AudioBoardStream *self = (AudioBoardStream *)ref;
|
||||
self->active = false;
|
||||
self->setActive(self->active);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Switch off the PA if the headphone in plugged in
|
||||
* and switch it on again if the headphone is unplugged.
|
||||
* This method complies with the
|
||||
*/
|
||||
static void actionHeadphoneDetection(bool, int, void *ref) {
|
||||
AudioBoardStream *self = (AudioBoardStream *)ref;
|
||||
if (self->pinHeadphoneDetect() >= 0) {
|
||||
// detect changes
|
||||
bool isConnected = self->headphoneStatus();
|
||||
if (self->headphoneIsConnected != isConnected) {
|
||||
self->headphoneIsConnected = isConnected;
|
||||
|
||||
// update if things have stabilized
|
||||
bool powerActive = !isConnected;
|
||||
LOGW("Headphone jack has been %s",
|
||||
isConnected ? "inserted" : "removed");
|
||||
self->setSpeakerActive(powerActive);
|
||||
}
|
||||
}
|
||||
delay(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the gpio number for auxin detection
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
GpioPin pinAuxin() { return getPinID(PinFunction::AUXIN_DETECT); }
|
||||
|
||||
/**
|
||||
* @brief Get the gpio number for headphone detection
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
GpioPin pinHeadphoneDetect() {
|
||||
return getPinID(PinFunction::HEADPHONE_DETECT);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the gpio number for PA enable
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
GpioPin pinPaEnable() { return getPinID(PinFunction::PA); }
|
||||
|
||||
// /**
|
||||
// * @brief Get the gpio number for adc detection
|
||||
// *
|
||||
// * @return -1 non-existent
|
||||
// * Others gpio number
|
||||
// */
|
||||
// GpioPin pinAdcDetect() { return getPin(AUXIN_DETECT); }
|
||||
|
||||
/**
|
||||
* @brief Get the record-button id for adc-button
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others button id
|
||||
*/
|
||||
GpioPin pinInputRec() { return getPinID(PinFunction::KEY, 1); }
|
||||
|
||||
/**
|
||||
* @brief Get the number for mode-button
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
GpioPin pinInputMode() { return getPinID(PinFunction::KEY, 2); }
|
||||
|
||||
/**
|
||||
* @brief Get number for set function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
GpioPin pinInputSet() { return getPinID(PinFunction::KEY, 4); }
|
||||
|
||||
/**
|
||||
* @brief Get number for play function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
GpioPin pinInputPlay() { return getPinID(PinFunction::KEY, 3); }
|
||||
|
||||
/**
|
||||
* @brief number for volume up function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
GpioPin pinVolumeUp() { return getPinID(PinFunction::KEY, 6); }
|
||||
|
||||
/**
|
||||
* @brief Get number for volume down function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
GpioPin pinVolumeDown() { return getPinID(PinFunction::KEY, 5); }
|
||||
|
||||
/**
|
||||
* @brief Get LED pin
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
GpioPin pinLed(int idx) { return getPinID(PinFunction::LED, idx); }
|
||||
|
||||
/// the same as setPAPower()
|
||||
void setSpeakerActive(bool active) { setPAPower(active); }
|
||||
|
||||
/**
|
||||
* @brief Returns true if the headphone was detected
|
||||
*
|
||||
* @return true
|
||||
* @return false
|
||||
*/
|
||||
bool headphoneStatus() {
|
||||
int headphoneGpioPin = pinHeadphoneDetect();
|
||||
return headphoneGpioPin > 0 ? !digitalRead(headphoneGpioPin) : false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The oposite of setMute(): setActive(true) calls setMute(false)
|
||||
*/
|
||||
void setActive(bool active) { setMute(!active); }
|
||||
|
||||
/// add start/stop on inputMode
|
||||
void addStartStopAction() {
|
||||
// pin conflicts for pinInputMode() with the SD CS pin for AIThinker and
|
||||
// buttons
|
||||
int sd_cs = getSdCsPin();
|
||||
int input_mode = pinInputMode();
|
||||
if (input_mode != -1 && (input_mode != sd_cs || !cfg.sd_active)) {
|
||||
LOGD("actionInputMode")
|
||||
addAction(input_mode, actionStartStop);
|
||||
}
|
||||
}
|
||||
|
||||
/// add volume up and volume down action
|
||||
void addVolumeActions() {
|
||||
// pin conflicts with SD Lyrat SD CS GpioPin and buttons / Conflict on
|
||||
// Audiokit V. 2957
|
||||
int sd_cs = getSdCsPin();
|
||||
int vol_up = pinVolumeUp();
|
||||
int vol_down = pinVolumeDown();
|
||||
if ((vol_up != -1 && vol_down != -1) &&
|
||||
(!cfg.sd_active || (vol_down != sd_cs && vol_up != sd_cs))) {
|
||||
LOGD("actionVolumeDown")
|
||||
addAction(vol_down, actionVolumeDown);
|
||||
LOGD("actionVolumeUp")
|
||||
addAction(vol_up, actionVolumeUp);
|
||||
} else {
|
||||
LOGW("Volume Buttons ignored because of conflict: %d ", pinVolumeDown());
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds headphone determination
|
||||
void addHeadphoneDetectionAction() {
|
||||
// pin conflicts with AIThinker A101: key6 and headphone detection
|
||||
int head_phone = pinHeadphoneDetect();
|
||||
if (head_phone != -1 && (getPinID(PinFunction::KEY, 6) != head_phone)) {
|
||||
actions.add(head_phone, actionHeadphoneDetection,
|
||||
AudioActions::ActiveChange, this);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Setup the supported default actions (volume, start/stop, headphone
|
||||
* detection)
|
||||
*/
|
||||
void addDefaultActions() {
|
||||
TRACEI();
|
||||
addHeadphoneDetectionAction();
|
||||
addStartStopAction();
|
||||
addVolumeActions();
|
||||
}
|
||||
|
||||
/// Defines the increment value used by actionVolumeDown/actionVolumeUp
|
||||
void setActionVolumeIncrementValue(float value) {
|
||||
action_increment_value = value;
|
||||
}
|
||||
|
||||
float actionVolumeIncrementValue() { return action_increment_value; }
|
||||
|
||||
bool isKeyPressed(int key) {
|
||||
if (!board()) return false;
|
||||
return board().isKeyPressed(key);
|
||||
}
|
||||
|
||||
protected:
|
||||
AudioActions actions;
|
||||
bool headphoneIsConnected = false;
|
||||
bool active = true;
|
||||
float action_increment_value = 0.02;
|
||||
|
||||
int getSdCsPin() {
|
||||
static GpioPin sd_cs = -2;
|
||||
// execute only once
|
||||
if (sd_cs != -2) return sd_cs;
|
||||
|
||||
auto sd_opt = getPins().getSPIPins(PinFunction::SD);
|
||||
if (sd_opt) {
|
||||
sd_cs = sd_opt.value().cs;
|
||||
} else {
|
||||
// no spi -> no sd
|
||||
LOGI("No sd defined -> sd_active=false")
|
||||
cfg.sd_active = false;
|
||||
sd_cs = -1;
|
||||
}
|
||||
return sd_cs;
|
||||
}
|
||||
|
||||
/// Determines the action logic (ActiveLow or ActiveTouch) for the pin
|
||||
AudioActions::ActiveLogic getActionLogic(int pin) {
|
||||
auto opt = board().getPins().getPin(pin);
|
||||
PinLogic logic = PinLogic::Input;
|
||||
if (opt) logic = opt.value().pin_logic;
|
||||
switch (logic) {
|
||||
case PinLogic::Input:
|
||||
case PinLogic::InputActiveLow:
|
||||
return AudioActions::ActiveLow;
|
||||
case PinLogic::InputActiveHigh:
|
||||
return AudioActions::ActiveHigh;
|
||||
case PinLogic::InputActiveTouch:
|
||||
return AudioActions::ActiveTouch;
|
||||
default:
|
||||
return AudioActions::ActiveLow;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
#WARNING("Obsolete: Use AudioTools/Communication/AudioClientRTSP555.h")
|
||||
#include "AudioTools/Communication/AudioClientRTSP555.h"
|
||||
138
libraries/audio-tools/src/AudioTools/AudioLibs/AudioCmsisFFT.h
Normal file
138
libraries/audio-tools/src/AudioTools/AudioLibs/AudioCmsisFFT.h
Normal file
@@ -0,0 +1,138 @@
|
||||
#pragma once
|
||||
#include "AudioFFT.h"
|
||||
#ifdef STM32
|
||||
# include "CMSIS_DSP.h"
|
||||
#endif
|
||||
#if defined(ARDUINO_ARCH_RENESAS) || defined(ARDUINO_ARCH_RP2040)
|
||||
# include "arm_vec_fft.h"
|
||||
#endif
|
||||
/**
|
||||
* @defgroup fft-cmsis CMSIS
|
||||
* @ingroup fft
|
||||
* @brief FFT using CMSIS
|
||||
**/
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Driver for Cmsis-FFT see https://arm-software.github.io/CMSIS_5/DSP
|
||||
* @ingroup fft-cmsis
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FFTDriverCmsisFFT : public FFTDriver {
|
||||
public:
|
||||
bool begin(int len) override {
|
||||
TRACEI();
|
||||
this->len = len;
|
||||
input = new float[len];
|
||||
output = new float[len*2];
|
||||
output_magn = new float[len];
|
||||
status = arm_rfft_fast_init_f32(&fft_instance, len);
|
||||
if (status!=ARM_MATH_SUCCESS){
|
||||
LOGE("arm_rfft_fast_init_f32: %d", status);
|
||||
}
|
||||
assert(input!=nullptr);
|
||||
assert(output!=nullptr);
|
||||
assert(output_magn != nullptr);
|
||||
return input!=nullptr && output != nullptr && output_magn != nullptr;
|
||||
}
|
||||
void end()override{
|
||||
TRACEI();
|
||||
if (input!=nullptr) delete input;
|
||||
if (output!=nullptr) delete output;
|
||||
if (output_magn!=nullptr) delete output_magn;
|
||||
input = nullptr;
|
||||
output = nullptr;
|
||||
output_magn = nullptr;
|
||||
}
|
||||
|
||||
void setValue(int idx, float value) override{
|
||||
input[idx] = value;
|
||||
}
|
||||
|
||||
void fft() override {
|
||||
TRACED();
|
||||
arm_rfft_fast_f32(&fft_instance, input, output, false);
|
||||
arm_cmplx_mag_f32(output, output_magn, len / 2);
|
||||
/* Calculates maxValue and returns corresponding BIN value */
|
||||
arm_max_f32(output_magn, len / 2, &result_max_value, &result_index);
|
||||
TRACED();
|
||||
};
|
||||
|
||||
void rfft() override {
|
||||
arm_rfft_fast_f32(&fft_instance, output, input, true);
|
||||
}
|
||||
|
||||
float magnitude(int idx) override {
|
||||
return output_magn[idx];
|
||||
}
|
||||
|
||||
/// same as magnitude
|
||||
float magnitudeFast(int idx) override {
|
||||
return output_magn[idx];
|
||||
}
|
||||
|
||||
float getValue(int idx) override { return input[idx];}
|
||||
|
||||
bool setBin(int pos, float real, float img) override {
|
||||
if (pos>=len) return false;
|
||||
output[pos*2] = real;
|
||||
output[pos*2+1] = img;
|
||||
return true;
|
||||
}
|
||||
bool getBin(int pos, FFTBin &bin) override {
|
||||
if (pos>=len) return false;
|
||||
bin.real = output[pos*2];
|
||||
bin.img = output[pos*2+1];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isReverseFFT() override {return true;}
|
||||
|
||||
bool isValid() override{ return status==ARM_MATH_SUCCESS; }
|
||||
|
||||
arm_rfft_fast_instance_f32 fft_instance;
|
||||
arm_status status;
|
||||
int len;
|
||||
float *input=nullptr;
|
||||
float *output_magn=nullptr;
|
||||
float *output=nullptr;
|
||||
float result_max_value;
|
||||
uint32_t result_index = 0;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief AudioFFT for ARM processors that provided Cmsis DSP
|
||||
* @ingroup fft-cmsis
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioCmsisFFT : public AudioFFTBase {
|
||||
public:
|
||||
AudioCmsisFFT():AudioFFTBase(new FFTDriverCmsisFFT()) {}
|
||||
|
||||
/// Provides the result array returned by CMSIS FFT
|
||||
float* array() {
|
||||
return driverEx()->output;
|
||||
}
|
||||
|
||||
float* magnitudes() {
|
||||
return driverEx()->output_magn;
|
||||
}
|
||||
|
||||
AudioFFTResult result() {
|
||||
AudioFFTResult ret_value;
|
||||
ret_value.magnitude = driverEx()->result_max_value;
|
||||
ret_value.bin = driverEx()->result_index;
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
FFTDriverCmsisFFT* driverEx() {
|
||||
return (FFTDriverCmsisFFT*)driver();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioFFT.h"
|
||||
#include "fft.h"
|
||||
|
||||
/**
|
||||
* @defgroup fft-esp32 esp32-fft
|
||||
* @ingroup fft
|
||||
* @brief FFT using esp32-fft
|
||||
**/
|
||||
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Driver for ESP32-FFT https://github.com/pschatzmann/esp32-fft
|
||||
* @ingroup fft-esp32
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FFTDriverESP32FFT : public FFTDriver {
|
||||
public:
|
||||
bool begin(int len) override {
|
||||
this->len = len;
|
||||
if (p_fft_object==nullptr) p_fft_object = fft_init(len, FFT_REAL, FFT_FORWARD, NULL, NULL);
|
||||
assert(p_fft_object!=nullptr);
|
||||
return p_fft_object!=nullptr;
|
||||
}
|
||||
void end()override{
|
||||
if (p_fft_object!=nullptr) fft_destroy(p_fft_object);
|
||||
}
|
||||
void setValue(int idx, float value) override{
|
||||
p_fft_object->input[idx] = value;
|
||||
}
|
||||
|
||||
void fft() override{
|
||||
fft_execute(p_fft_object);
|
||||
};
|
||||
|
||||
void rfft() override {
|
||||
irfft(p_fft_object->input, p_fft_object->output, p_fft_object->twiddle_factors, p_fft_object->size);
|
||||
}
|
||||
|
||||
float magnitude(int idx) override {
|
||||
return sqrt(magnitudeFast(idx));
|
||||
}
|
||||
|
||||
/// magnitude w/o sqrt
|
||||
float magnitudeFast(int idx) override {
|
||||
return (pow(p_fft_object->output[2*idx],2) + pow(p_fft_object->output[2*idx+1],2));
|
||||
}
|
||||
|
||||
float getValue(int idx) { return p_fft_object->input[idx];}
|
||||
|
||||
bool setBin(int pos, float real, float img) override {
|
||||
if (pos>=len) return false;
|
||||
p_fft_object->output[2*pos] = real;
|
||||
p_fft_object->output[2*pos+1] = img;
|
||||
return true;
|
||||
}
|
||||
bool getBin(int pos, FFTBin &bin) override {
|
||||
if (pos>=len) return false;
|
||||
bin.real = p_fft_object->output[2*pos];
|
||||
bin.img = p_fft_object->output[2*pos+1];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isReverseFFT() override {return true;}
|
||||
|
||||
bool isValid() override{ return p_fft_object!=nullptr; }
|
||||
|
||||
fft_config_t *p_fft_object=nullptr;
|
||||
int len;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief AudioFFT using https://github.com/pschatzmann/esp32-fft
|
||||
* @ingroup fft-esp32
|
||||
* @author Phil Schatzmann
|
||||
* Warning: This does not work as expected yet: I did not get the expected results...
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioESP32FFT : public AudioFFTBase {
|
||||
public:
|
||||
AudioESP32FFT():AudioFFTBase(new FFTDriverESP32FFT()) {}
|
||||
|
||||
/// Provides the result array returned by the FFT: The real part of a magnitude at a frequency is followed by the corresponding imaginary part in the output*/
|
||||
float* array() {
|
||||
return driverEx()->p_fft_object->output;
|
||||
}
|
||||
|
||||
FFTDriverESP32FFT* driverEx() {
|
||||
return (FFTDriverESP32FFT*)driver();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
363
libraries/audio-tools/src/AudioTools/AudioLibs/AudioESP32ULP.h
Normal file
363
libraries/audio-tools/src/AudioTools/AudioLibs/AudioESP32ULP.h
Normal file
@@ -0,0 +1,363 @@
|
||||
/**
|
||||
* @file AudioEsp32ULP.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief Outputs to ESP32 DAC through the ULP, freeing I2S for other uses
|
||||
* @version 0.1
|
||||
* @date 2023-03-26
|
||||
* @copyright (C) 2020 Martin Laclaustra, based on bitluni's code
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#ifndef ESP32
|
||||
#error Only the ESP32 supports ULP audio output
|
||||
#endif
|
||||
#include "AudioLogger.h"
|
||||
#include "AudioTools/CoreAudio/AudioTypes.h"
|
||||
#include "AudioTools/CoreAudio/AudioOutput.h"
|
||||
#include <driver/dac.h>
|
||||
#include <driver/rtc_io.h>
|
||||
#include <esp32/ulp.h>
|
||||
#include <math.h>
|
||||
#include <soc/rtc.h>
|
||||
#include "soc/rtc_io_reg.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
enum UlpDac { ULP_DAC1 = 1, ULP_DAC2 = 2 };
|
||||
|
||||
/**
|
||||
* @brief Outputs to ESP32 DAC through the ULP (Ultra> Low Power coprocessor),
|
||||
* freeing I2S for other uses. Connect left channel on pin 25 Connect right
|
||||
* channel on pin 26
|
||||
* @ingroup io
|
||||
* @version 0.1
|
||||
* @date 2023-03-26
|
||||
* @copyright (C) 2020 Martin Laclaustra, based on bitluni's code
|
||||
*/
|
||||
class AudioESP32ULP : public AudioOutput {
|
||||
public:
|
||||
AudioInfo defaultConfig() {
|
||||
AudioInfo cfg(44100, 2, 16);
|
||||
return cfg;
|
||||
}
|
||||
|
||||
/// Selects the DAC when we have a mono signal
|
||||
void setMonoDAC(UlpDac dac){
|
||||
selected_mono_dac = dac;
|
||||
}
|
||||
|
||||
/// Selects the limit for the availableForWrite to report the data
|
||||
void setMinWriteBytes(int bytes){
|
||||
min_write_bytes = bytes;
|
||||
}
|
||||
|
||||
/// Starts the processing. I the output is mono, we can determine the output pin by selecting DAC1 (gpio25) or DAC2 (gpio26)
|
||||
bool begin(AudioInfo info) {
|
||||
TRACEI();
|
||||
cfg = info;
|
||||
stereoOutput = info.channels == 2;
|
||||
activeDACs = stereoOutput ? 3 : selected_mono_dac;
|
||||
hertz = cfg.sample_rate;
|
||||
|
||||
if (info.bits_per_sample != 16) {
|
||||
LOGE("Unsupported bits_per_sample: %d", info.bits_per_sample);
|
||||
return false;
|
||||
}
|
||||
return setup();
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) {
|
||||
TRACED();
|
||||
int16_t *data_16 = (int16_t *)data;
|
||||
size_t result = 0;
|
||||
int16_t stereo[2];
|
||||
int frameSize = cfg.channels * sizeof(int16_t);
|
||||
int frames = len / frameSize;
|
||||
for (int j = 0; j < frames; j++) {
|
||||
int pos = j * cfg.channels;
|
||||
stereo[0] = data_16[pos];
|
||||
stereo[1] = stereoOutput ? data_16[pos + 1] : data_16[pos];
|
||||
// blocking write
|
||||
while (!writeFrame(stereo)) {
|
||||
delay(20);
|
||||
}
|
||||
result += frameSize;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int availableForWrite() {
|
||||
int result = totalSampleWords-lastFilledWord;
|
||||
return result < min_write_bytes ? 0 : result;
|
||||
}
|
||||
|
||||
void end() {
|
||||
TRACEI();
|
||||
const ulp_insn_t stopulp[] = {// stop the timer
|
||||
I_END(),
|
||||
// end the program
|
||||
I_HALT()};
|
||||
|
||||
size_t load_addr = 0;
|
||||
size_t size = sizeof(stopulp) / sizeof(ulp_insn_t);
|
||||
ulp_process_macros_and_load(load_addr, stopulp, &size);
|
||||
|
||||
// start
|
||||
ulp_run(0);
|
||||
|
||||
if (activeDACs & 1) {
|
||||
dac_output_voltage(DAC_CHANNEL_1, 128);
|
||||
}
|
||||
if (activeDACs & 2) {
|
||||
dac_output_voltage(DAC_CHANNEL_2, 128);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
int lastFilledWord = 0;
|
||||
int hertz;
|
||||
int min_write_bytes = 128;
|
||||
UlpDac selected_mono_dac = ULP_DAC1;
|
||||
uint8_t bufferedOddSample = 128;
|
||||
bool waitingOddSample = true; // must be set to false for mono output
|
||||
int activeDACs = 3; // 1:DAC1; 2:DAC2; 3:both;
|
||||
bool stereoOutput = true;
|
||||
const int opcodeCount = 20;
|
||||
const uint32_t dacTableStart1 = 2048 - 512;
|
||||
const uint32_t dacTableStart2 = dacTableStart1 - 512;
|
||||
uint32_t totalSampleWords =
|
||||
2048 - 512 - 512 - (opcodeCount + 1); // add 512 for mono
|
||||
const int totalSamples = totalSampleWords * 2;
|
||||
const uint32_t indexAddress = opcodeCount;
|
||||
const uint32_t bufferStart = indexAddress + 1;
|
||||
|
||||
bool setup() {
|
||||
TRACED();
|
||||
if (!stereoOutput) {
|
||||
waitingOddSample = false;
|
||||
// totalSampleWords += 512;
|
||||
// dacTableStart2 = dacTableStart1;
|
||||
}
|
||||
|
||||
// calculate the actual ULP clock
|
||||
unsigned long rtc_8md256_period = rtc_clk_cal(RTC_CAL_8MD256, 1000);
|
||||
unsigned long rtc_fast_freq_hz =
|
||||
1000000ULL * (1 << RTC_CLK_CAL_FRACT) * 256 / rtc_8md256_period;
|
||||
|
||||
// initialize DACs
|
||||
if (activeDACs & 1) {
|
||||
dac_output_enable(DAC_CHANNEL_1);
|
||||
dac_output_voltage(DAC_CHANNEL_1, 128);
|
||||
}
|
||||
if (activeDACs & 2) {
|
||||
dac_output_enable(DAC_CHANNEL_2);
|
||||
dac_output_voltage(DAC_CHANNEL_2, 128);
|
||||
}
|
||||
|
||||
int retAddress1 = 9;
|
||||
int retAddress2 = 14;
|
||||
|
||||
int loopCycles = 134;
|
||||
int loopHalfCycles1 = 90;
|
||||
int loopHalfCycles2 = 44;
|
||||
|
||||
LOGI("Real RTC clock: %d", rtc_fast_freq_hz);
|
||||
|
||||
uint32_t dt = (rtc_fast_freq_hz / hertz) - loopCycles;
|
||||
uint32_t dt2 = 0;
|
||||
if (!stereoOutput) {
|
||||
dt = (rtc_fast_freq_hz / hertz) - loopHalfCycles1;
|
||||
dt2 = (rtc_fast_freq_hz / hertz) - loopHalfCycles2;
|
||||
}
|
||||
|
||||
LOGI("dt: %d", dt);
|
||||
LOGI("dt2: %d", dt2);
|
||||
|
||||
const ulp_insn_t stereo[] = {
|
||||
// reset offset register
|
||||
I_MOVI(R3, 0),
|
||||
// delay to get the right sampling rate
|
||||
I_DELAY(dt), // 6 + dt
|
||||
// reset sample index
|
||||
I_MOVI(R0, 0), // 6
|
||||
// write the index back to memory for the main cpu
|
||||
I_ST(R0, R3, indexAddress), // 8
|
||||
// load the samples
|
||||
I_LD(R1, R0, bufferStart), // 8
|
||||
// mask the lower 8 bits
|
||||
I_ANDI(R2, R1, 0x00ff), // 6
|
||||
// multiply by 2
|
||||
I_LSHI(R2, R2, 1), // 6
|
||||
// add start position
|
||||
I_ADDI(R2, R2, dacTableStart1), // 6
|
||||
// jump to the dac opcode
|
||||
I_BXR(R2), // 4
|
||||
// back from first dac
|
||||
// delay between the two samples in mono rendering
|
||||
I_DELAY(dt2), // 6 + dt2
|
||||
// mask the upper 8 bits
|
||||
I_ANDI(R2, R1, 0xff00), // 6
|
||||
// shift the upper bits to right and multiply by 2
|
||||
I_RSHI(R2, R2, 8 - 1), // 6
|
||||
// add start position of second dac table
|
||||
I_ADDI(R2, R2, dacTableStart2), // 6
|
||||
// jump to the dac opcode
|
||||
I_BXR(R2), // 4
|
||||
// here we get back from writing the second sample
|
||||
// load 0x8080 as sample
|
||||
I_MOVI(R1, 0x8080), // 6
|
||||
// write 0x8080 in the sample buffer
|
||||
I_ST(R1, R0, indexAddress), // 8
|
||||
// increment the sample index
|
||||
I_ADDI(R0, R0, 1), // 6
|
||||
// if reached end of the buffer, jump relative to index reset
|
||||
I_BGE(-16, totalSampleWords), // 4
|
||||
// wait to get the right sample rate (2 cycles more to compensate the
|
||||
// index reset)
|
||||
I_DELAY((unsigned int)dt + 2), // 8 + dt
|
||||
// if not, jump absolute to where index is written to memory
|
||||
I_BXI(3) // 4
|
||||
};
|
||||
// write io and jump back another 12 + 4 + 12 + 4
|
||||
|
||||
size_t load_addr = 0;
|
||||
size_t size = sizeof(stereo) / sizeof(ulp_insn_t);
|
||||
ulp_process_macros_and_load(load_addr, stereo, &size);
|
||||
// this is how to get the opcodes
|
||||
// for(int i = 0; i < size; i++)
|
||||
// Serial.println(RTC_SLOW_MEM[i], HEX);
|
||||
|
||||
// create DAC opcode tables
|
||||
switch (activeDACs) {
|
||||
case 1:
|
||||
for (int i = 0; i < 256; i++) {
|
||||
RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
|
||||
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
|
||||
RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
|
||||
create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
|
||||
RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
|
||||
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
|
||||
RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
|
||||
create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
for (int i = 0; i < 256; i++) {
|
||||
RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
|
||||
RTC_IO_PAD_DAC2_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
|
||||
RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
|
||||
create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
|
||||
RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
|
||||
RTC_IO_PAD_DAC2_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
|
||||
RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
|
||||
create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (int i = 0; i < 256; i++) {
|
||||
RTC_SLOW_MEM[dacTableStart1 + i * 2] = create_I_WR_REG(
|
||||
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac1: 0x1D4C0121 | (i << 10)
|
||||
RTC_SLOW_MEM[dacTableStart1 + 1 + i * 2] =
|
||||
create_I_BXI(retAddress1); // 0x80000000 + retAddress1 * 4
|
||||
RTC_SLOW_MEM[dacTableStart2 + i * 2] = create_I_WR_REG(
|
||||
RTC_IO_PAD_DAC1_REG, 19, 26, i); // dac2: 0x1D4C0122 | (i << 10)
|
||||
RTC_SLOW_MEM[dacTableStart2 + 1 + i * 2] =
|
||||
create_I_BXI(retAddress2); // 0x80000000 + retAddress2 * 4
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// set all samples to 128 (silence)
|
||||
for (int i = 0; i < totalSampleWords; i++)
|
||||
RTC_SLOW_MEM[bufferStart + i] = 0x8080;
|
||||
|
||||
// start
|
||||
RTC_SLOW_MEM[indexAddress] = 0;
|
||||
ulp_run(0);
|
||||
|
||||
// wait until ULP starts using samples and the index of output sample
|
||||
// advances
|
||||
while (RTC_SLOW_MEM[indexAddress] == 0)
|
||||
delay(1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool writeFrame(int16_t sample[2]) {
|
||||
TRACED();
|
||||
int16_t ms[2];
|
||||
ms[0] = sample[0];
|
||||
ms[1] = sample[1];
|
||||
|
||||
// TODO: needs improvement (counting is different here with respect to ULP
|
||||
// code)
|
||||
int currentSample = RTC_SLOW_MEM[indexAddress] & 0xffff;
|
||||
int currentWord = currentSample >> 1;
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
ms[i] = ((ms[i] >> 8) + 128) & 0xff;
|
||||
}
|
||||
if (!stereoOutput) // mix both channels
|
||||
ms[0] =
|
||||
(uint16_t)(((uint32_t)((int32_t)(ms[0]) + (int32_t)(ms[1])) >> 1) &
|
||||
0xff);
|
||||
|
||||
if (waitingOddSample) { // always true for stereo because samples are
|
||||
// consumed in pairs
|
||||
if (lastFilledWord !=
|
||||
currentWord) // accept sample if writing index lastFilledWord has not
|
||||
// reached index of output sample
|
||||
{
|
||||
unsigned int w;
|
||||
if (stereoOutput) {
|
||||
w = ms[0];
|
||||
w |= ms[1] << 8;
|
||||
} else {
|
||||
w = bufferedOddSample;
|
||||
w |= ms[0] << 8;
|
||||
bufferedOddSample = 128;
|
||||
waitingOddSample = false;
|
||||
}
|
||||
RTC_SLOW_MEM[bufferStart + lastFilledWord] = w;
|
||||
lastFilledWord++;
|
||||
if (lastFilledWord == totalSampleWords)
|
||||
lastFilledWord = 0;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
bufferedOddSample = ms[0];
|
||||
waitingOddSample = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t create_I_WR_REG(uint32_t reg, uint32_t low_bit, uint32_t high_bit,
|
||||
uint32_t val) {
|
||||
typedef union {
|
||||
ulp_insn_t ulp_ins;
|
||||
uint32_t ulp_bin;
|
||||
} ulp_union;
|
||||
const ulp_insn_t singleinstruction[] = {
|
||||
I_WR_REG(reg, low_bit, high_bit, val)};
|
||||
ulp_union recover_ins;
|
||||
recover_ins.ulp_ins = singleinstruction[0];
|
||||
return (uint32_t)(recover_ins.ulp_bin);
|
||||
}
|
||||
|
||||
uint32_t create_I_BXI(uint32_t imm_pc) {
|
||||
typedef union {
|
||||
ulp_insn_t ulp_ins;
|
||||
uint32_t ulp_bin;
|
||||
} ulp_union;
|
||||
const ulp_insn_t singleinstruction[] = {I_BXI(imm_pc)};
|
||||
ulp_union recover_ins;
|
||||
recover_ins.ulp_ins = singleinstruction[0];
|
||||
return (uint32_t)(recover_ins.ulp_bin);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
100
libraries/audio-tools/src/AudioTools/AudioLibs/AudioESP8266.h
Normal file
100
libraries/audio-tools/src/AudioTools/AudioLibs/AudioESP8266.h
Normal file
@@ -0,0 +1,100 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioTools/CoreAudio/AudioOutput.h"
|
||||
#include "AudioTools/CoreAudio/AudioStreams.h"
|
||||
#include "AudioTools/CoreAudio/Buffers.h"
|
||||
#include "AudioOutput.h"
|
||||
#include "SoundData.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief ESP8266Audio AudioOutput class which stores the data in a temporary
|
||||
* buffer. The buffer can be consumed e.g. by a callback function by calling
|
||||
* read();
|
||||
* Dependencies: ESP8266Audio Library
|
||||
* Dependencies: ESP32-A2DP Library
|
||||
*
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioOutputWithCallback : public ::AudioOutput, public BufferedStream {
|
||||
public:
|
||||
// Default constructor
|
||||
AudioOutputWithCallback(int bufferSize, int bufferCount)
|
||||
: BufferedStream(bufferSize) {
|
||||
callback_buffer_ptr = new NBuffer<Frame>(bufferSize, bufferCount);
|
||||
}
|
||||
|
||||
virtual ~AudioOutputWithCallback() { delete callback_buffer_ptr; }
|
||||
|
||||
/// Activates the output
|
||||
virtual bool begin() {
|
||||
active = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// puts the sample into a buffer
|
||||
virtual bool ConsumeSample(int16_t sample[2]) {
|
||||
Frame c;
|
||||
c.channel1 = sample[0];
|
||||
c.channel2 = sample[1];
|
||||
return callback_buffer_ptr->write(c);
|
||||
};
|
||||
|
||||
/// stops the processing
|
||||
virtual bool stop() {
|
||||
active = false;
|
||||
return true;
|
||||
};
|
||||
|
||||
/// Provides the data from the internal buffer to the callback
|
||||
size_t read(Frame *src, size_t len) {
|
||||
return active ? this->callback_buffer_ptr->readArray(src, len) : 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
NBuffer<Frame> *callback_buffer_ptr;
|
||||
bool active;
|
||||
|
||||
virtual size_t writeExt(const uint8_t *data, size_t len) {
|
||||
return callback_buffer_ptr->writeArray((Frame *)data, len / sizeof(Frame));
|
||||
}
|
||||
|
||||
virtual size_t readExt(uint8_t *data, size_t len) {
|
||||
return callback_buffer_ptr->readArray((Frame *)data, len / sizeof(Frame));
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Stream Adapter for ESP8288-Audio AudioOutput
|
||||
*
|
||||
*/
|
||||
class ESP3288AudioOutput : public AudioStream {
|
||||
public:
|
||||
ESP3288AudioOutput(::AudioOutput &out, int channels) {
|
||||
p_out = &out;
|
||||
this->channels = channels;
|
||||
}
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
size_t result = 0;
|
||||
int16_t *v = (int16_t *)data;
|
||||
if (channels == 2) {
|
||||
result = p_out->ConsumeSamples(v, len / 2);
|
||||
} else {
|
||||
LOGE("Only 2 Channels are supported");
|
||||
result = 0;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected:
|
||||
::AudioOutput *p_out = nullptr;
|
||||
int channels;
|
||||
};
|
||||
} // namespace audio_tools
|
||||
1593
libraries/audio-tools/src/AudioTools/AudioLibs/AudioEffectsSuite.h
Normal file
1593
libraries/audio-tools/src/AudioTools/AudioLibs/AudioEffectsSuite.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,151 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioFFT.h"
|
||||
#include "esp_dsp.h"
|
||||
|
||||
/**
|
||||
* @defgroup fft-dsp esp32-dsp
|
||||
* @ingroup fft
|
||||
* @brief FFT using esp32 esp-dsp library
|
||||
**/
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief fft Driver for espressif dsp library: https://espressif-docs.readthedocs-hosted.com/projects/esp-dsp/en/latest/esp-dsp-apis.html
|
||||
* @ingroup fft-dsp
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FFTDriverEspressifFFT : public FFTDriver {
|
||||
public:
|
||||
bool begin(int len) override {
|
||||
this->len = len;
|
||||
int alloc_size = len * 2;
|
||||
fft_data.resize(alloc_size);
|
||||
table_buffer.resize(CONFIG_DSP_MAX_FFT_SIZE);
|
||||
assert(table_buffer.data() != nullptr);
|
||||
assert(fft_data.data() != nullptr);
|
||||
ret = dsps_fft2r_init_fc32(table_buffer.data(), CONFIG_DSP_MAX_FFT_SIZE);
|
||||
if (ret != ESP_OK){
|
||||
LOGE("dsps_fft2r_init_fc32 %d", ret);
|
||||
}
|
||||
return fft_data.data()!=nullptr && ret == ESP_OK;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
dsps_fft2r_deinit_fc32();
|
||||
fft_data.resize(0);
|
||||
table_buffer.resize(0);
|
||||
}
|
||||
|
||||
void setValue(int idx, float value) override {
|
||||
if (idx<len){
|
||||
fft_data[idx*2] = value;
|
||||
fft_data[idx*2 + 1] = 0.0f;
|
||||
}
|
||||
}
|
||||
|
||||
float getValue(int idx) override { return fft_data[idx * 2]; }
|
||||
|
||||
void fft() override {
|
||||
ret = dsps_fft2r_fc32(fft_data.data(), len);
|
||||
if (ret != ESP_OK){
|
||||
LOGE("dsps_fft2r_fc32 %d", ret);
|
||||
}
|
||||
// Bit reverse
|
||||
ret = dsps_bit_rev_fc32(fft_data.data(), len);
|
||||
if (ret != ESP_OK){
|
||||
LOGE("dsps_bit_rev_fc32 %d", ret);
|
||||
}
|
||||
// Convert one complex vector to two complex vectors
|
||||
ret = dsps_cplx2reC_fc32(fft_data.data(), len);
|
||||
if (ret != ESP_OK){
|
||||
LOGE("dsps_cplx2reC_fc32 %d", ret);
|
||||
}
|
||||
};
|
||||
|
||||
void rfft() override {
|
||||
conjugate();
|
||||
ret = dsps_fft2r_fc32(fft_data.data(), len);
|
||||
if (ret != ESP_OK){
|
||||
LOGE("dsps_fft2r_fc32 %d", ret);
|
||||
}
|
||||
conjugate();
|
||||
// Bit reverse
|
||||
ret = dsps_bit_rev_fc32(fft_data.data(), len);
|
||||
if (ret != ESP_OK){
|
||||
LOGE("dsps_bit_rev_fc32 %d", ret);
|
||||
}
|
||||
// Convert one complex vector to two complex vectors
|
||||
ret = dsps_cplx2reC_fc32(fft_data.data(), len);
|
||||
if (ret != ESP_OK){
|
||||
LOGE("dsps_cplx2reC_fc32 %d", ret);
|
||||
}
|
||||
}
|
||||
|
||||
void conjugate(){
|
||||
FFTBin bin;
|
||||
for (int j=0;j<len;j++){
|
||||
getBin(j, bin);
|
||||
bin.conjugate();
|
||||
setBin(j, bin);
|
||||
}
|
||||
}
|
||||
|
||||
float magnitude(int idx) override {
|
||||
return sqrt(magnitudeFast(idx));
|
||||
}
|
||||
|
||||
/// magnitude w/o sqrt
|
||||
float magnitudeFast(int idx) override {
|
||||
return (fft_data[idx*2] * fft_data[idx*2] + fft_data[idx*2+1] * fft_data[idx*2+1]);
|
||||
}
|
||||
bool setBin(int pos, float real, float img) override {
|
||||
if (pos>=len) return false;
|
||||
fft_data[pos*2] = real;
|
||||
fft_data[pos*2+1] = img;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool setBin(int pos, FFTBin &bin) { return FFTDriver::setBin(pos, bin);}
|
||||
|
||||
bool getBin(int pos, FFTBin &bin) override {
|
||||
if (pos>=len) return false;
|
||||
bin.real = fft_data[pos*2];
|
||||
bin.img = fft_data[pos*2+1];
|
||||
return true;
|
||||
}
|
||||
|
||||
bool isReverseFFT() override {return true;}
|
||||
|
||||
bool isValid() override{ return fft_data.data()!=nullptr && ret==ESP_OK; }
|
||||
|
||||
esp_err_t ret;
|
||||
Vector<float> fft_data{0};
|
||||
Vector<float> table_buffer{0};
|
||||
int len=0;
|
||||
|
||||
};
|
||||
/**
|
||||
* @brief AudioFFT using FFTReal. The only specific functionality is the access to the dataArray
|
||||
* @ingroup fft-dsp
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioEspressifFFT : public AudioFFTBase {
|
||||
public:
|
||||
AudioEspressifFFT():AudioFFTBase(new FFTDriverEspressifFFT()) {}
|
||||
|
||||
/// Provides the complex array returned by the FFT
|
||||
float *dataArray() {
|
||||
return driverEx()->fft_data.data();
|
||||
}
|
||||
|
||||
FFTDriverEspressifFFT* driverEx() {
|
||||
return (FFTDriverEspressifFFT*)driver();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
793
libraries/audio-tools/src/AudioTools/AudioLibs/AudioFFT.h
Normal file
793
libraries/audio-tools/src/AudioTools/AudioLibs/AudioFFT.h
Normal file
@@ -0,0 +1,793 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioLibs/FFT/FFTWindows.h"
|
||||
#include "AudioTools/CoreAudio/AudioStreams.h"
|
||||
#include "AudioTools/CoreAudio/MusicalNotes.h"
|
||||
|
||||
/**
|
||||
* @defgroup fft FFT
|
||||
* @ingroup dsp
|
||||
* @brief Fast Fourier Transform
|
||||
**/
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
// forward declaration
|
||||
class AudioFFTBase;
|
||||
static MusicalNotes AudioFFTNotes;
|
||||
|
||||
/**
|
||||
* @brief Result of the FFT
|
||||
* @ingroup fft
|
||||
*/
|
||||
struct AudioFFTResult {
|
||||
int bin = 0;
|
||||
float magnitude = 0.0f;
|
||||
float frequency = 0.0f;
|
||||
|
||||
int frequencyAsInt() { return round(frequency); }
|
||||
const char *frequencyAsNote() { return AudioFFTNotes.note(frequency); }
|
||||
const char *frequencyAsNote(float &diff) {
|
||||
return AudioFFTNotes.note(frequency, diff);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Configuration for AudioFFT. If there are more then 1 channel the
|
||||
* channel_used is defining which channel is used to perform the fft on.
|
||||
* @ingroup fft
|
||||
*/
|
||||
struct AudioFFTConfig : public AudioInfo {
|
||||
AudioFFTConfig() {
|
||||
channels = 2;
|
||||
bits_per_sample = 16;
|
||||
sample_rate = 44100;
|
||||
}
|
||||
/// Callback method which is called after we got a new result
|
||||
void (*callback)(AudioFFTBase &fft) = nullptr;
|
||||
/// Channel which is used as input
|
||||
uint8_t channel_used = 0;
|
||||
int length = 8192;
|
||||
int stride = 0;
|
||||
/// Optional window function for both fft and ifft
|
||||
WindowFunction *window_function = nullptr;
|
||||
/// Optional window function for fft only
|
||||
WindowFunction *window_function_fft = nullptr;
|
||||
/// Optional window function for ifft only
|
||||
WindowFunction *window_function_ifft = nullptr;
|
||||
/// TX_MODE = FFT, RX_MODE = IFFT
|
||||
RxTxMode rxtx_mode = TX_MODE;
|
||||
/// caller
|
||||
void *ref = nullptr;
|
||||
};
|
||||
|
||||
/// And individual FFT Bin
|
||||
struct FFTBin {
|
||||
float real;
|
||||
float img;
|
||||
|
||||
FFTBin() = default;
|
||||
|
||||
FFTBin(float r, float i) {
|
||||
real = r;
|
||||
img = i;
|
||||
}
|
||||
|
||||
void multiply(float f) {
|
||||
real *= f;
|
||||
img *= f;
|
||||
}
|
||||
|
||||
void conjugate() { img = -img; }
|
||||
|
||||
void clear() { real = img = 0.0f; }
|
||||
};
|
||||
|
||||
/// Inverse FFT Overlapp Add
|
||||
class FFTInverseOverlapAdder {
|
||||
public:
|
||||
FFTInverseOverlapAdder(int size = 0) {
|
||||
if (size > 0) resize(size);
|
||||
}
|
||||
|
||||
/// Initilze data by defining new size
|
||||
void resize(int size) {
|
||||
// reset max for new scaling
|
||||
rfft_max = 0.0;
|
||||
// define new size
|
||||
len = size;
|
||||
data.resize(size);
|
||||
for (int j = 0; j < data.size(); j++) {
|
||||
data[j] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
// adds the values to the array (by applying the window function)
|
||||
void add(float value, int pos, WindowFunction *window_function) {
|
||||
float add_value = value;
|
||||
if (window_function != nullptr) {
|
||||
add_value = value * window_function->factor(pos);
|
||||
}
|
||||
assert(pos < len);
|
||||
data[pos] += add_value;
|
||||
}
|
||||
|
||||
// gets the scaled audio data as result
|
||||
void getStepData(float *result, int stride, float maxResult) {
|
||||
for (int j = 0; j < stride; j++) {
|
||||
// determine max value to scale
|
||||
if (data[j] > rfft_max) rfft_max = data[j];
|
||||
}
|
||||
for (int j = 0; j < stride; j++) {
|
||||
result[j] = data[j] / rfft_max * maxResult;
|
||||
// clip
|
||||
if (result[j] > maxResult) {
|
||||
result[j] = maxResult;
|
||||
}
|
||||
if (result[j] < -maxResult) {
|
||||
result[j] = -maxResult;
|
||||
}
|
||||
}
|
||||
// copy data to head
|
||||
for (int j = 0; j < len - stride; j++) {
|
||||
data[j] = data[j + stride];
|
||||
}
|
||||
// clear tail
|
||||
for (int j = len - stride; j < len; j++) {
|
||||
data[j] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/// provides the actual size
|
||||
int size() { return data.size(); }
|
||||
|
||||
protected:
|
||||
Vector<float> data{0};
|
||||
int len = 0;
|
||||
float rfft_max = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Abstract Class which defines the basic FFT functionality
|
||||
* @ingroup fft
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FFTDriver {
|
||||
public:
|
||||
virtual bool begin(int len) = 0;
|
||||
virtual void end() = 0;
|
||||
/// Sets the real value
|
||||
virtual void setValue(int pos, float value) = 0;
|
||||
/// Perform FFT
|
||||
virtual void fft() = 0;
|
||||
/// Calculate the magnitude (fft result) at index (sqr(i² + r²))
|
||||
virtual float magnitude(int idx) = 0;
|
||||
/// Calculate the magnitude w/o sqare root
|
||||
virtual float magnitudeFast(int idx) = 0;
|
||||
virtual bool isValid() = 0;
|
||||
/// Returns true if reverse FFT is supported
|
||||
virtual bool isReverseFFT() { return false; }
|
||||
/// Calculate reverse FFT
|
||||
virtual void rfft() { LOGE("Not implemented"); }
|
||||
/// Get result value from Reverse FFT
|
||||
virtual float getValue(int pos) = 0;
|
||||
/// sets the value of a bin
|
||||
virtual bool setBin(int idx, float real, float img) { return false; }
|
||||
/// sets the value of a bin
|
||||
bool setBin(int pos, FFTBin &bin) { return setBin(pos, bin.real, bin.img); }
|
||||
/// gets the value of a bin
|
||||
virtual bool getBin(int pos, FFTBin &bin) { return false; }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Executes FFT using audio data privded by write() and/or an inverse FFT
|
||||
* where the samples are made available via readBytes(). The Driver which is
|
||||
* passed in the constructor selects a specifc FFT implementation.
|
||||
* @ingroup fft
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioFFTBase : public AudioStream {
|
||||
public:
|
||||
/// Default Constructor. The len needs to be of the power of 2 (e.g. 512,
|
||||
/// 1024, 2048, 4096, 8192)
|
||||
AudioFFTBase(FFTDriver *driver) { p_driver = driver; }
|
||||
|
||||
~AudioFFTBase() { end(); }
|
||||
|
||||
/// Provides the default configuration
|
||||
AudioFFTConfig defaultConfig(RxTxMode mode = TX_MODE) {
|
||||
AudioFFTConfig info;
|
||||
info.rxtx_mode = mode;
|
||||
return info;
|
||||
}
|
||||
|
||||
/// starts the processing
|
||||
bool begin(AudioFFTConfig info) {
|
||||
cfg = info;
|
||||
return begin();
|
||||
}
|
||||
|
||||
/// starts the processing
|
||||
bool begin() override {
|
||||
bins = cfg.length / 2;
|
||||
// define window functions
|
||||
if (cfg.window_function_fft == nullptr)
|
||||
cfg.window_function_fft = cfg.window_function;
|
||||
if (cfg.window_function_ifft == nullptr)
|
||||
cfg.window_function_ifft = cfg.window_function;
|
||||
// define default stride value if not defined
|
||||
if (cfg.stride == 0) cfg.stride = cfg.length;
|
||||
|
||||
if (!isPowerOfTwo(cfg.length)) {
|
||||
LOGE("Len must be of the power of 2: %d", cfg.length);
|
||||
return false;
|
||||
}
|
||||
if (!p_driver->begin(cfg.length)) {
|
||||
LOGE("Not enough memory");
|
||||
}
|
||||
|
||||
if (cfg.window_function_fft != nullptr) {
|
||||
cfg.window_function_fft->begin(cfg.length);
|
||||
}
|
||||
if (cfg.window_function_ifft != nullptr &&
|
||||
cfg.window_function_ifft != cfg.window_function_fft) {
|
||||
cfg.window_function_ifft->begin(cfg.length);
|
||||
}
|
||||
|
||||
bool is_valid_rxtx = false;
|
||||
if (cfg.rxtx_mode == TX_MODE || cfg.rxtx_mode == RXTX_MODE) {
|
||||
// holds last N bytes that need to be reprocessed
|
||||
stride_buffer.resize((cfg.length) * bytesPerSample());
|
||||
is_valid_rxtx = true;
|
||||
}
|
||||
if (cfg.rxtx_mode == RX_MODE || cfg.rxtx_mode == RXTX_MODE) {
|
||||
rfft_data.resize(cfg.channels * bytesPerSample() * cfg.stride);
|
||||
rfft_add.resize(cfg.length);
|
||||
step_data.resize(cfg.stride);
|
||||
is_valid_rxtx = true;
|
||||
}
|
||||
|
||||
if (!is_valid_rxtx) {
|
||||
LOGE("Invalid rxtx_mode");
|
||||
return false;
|
||||
}
|
||||
|
||||
current_pos = 0;
|
||||
return p_driver->isValid();
|
||||
}
|
||||
|
||||
/// Just resets the current_pos e.g. to start a new cycle
|
||||
void reset() {
|
||||
current_pos = 0;
|
||||
if (cfg.window_function_fft != nullptr) {
|
||||
cfg.window_function_fft->begin(cfg.length);
|
||||
}
|
||||
if (cfg.window_function_ifft != nullptr) {
|
||||
cfg.window_function_ifft->begin(cfg.length);
|
||||
}
|
||||
}
|
||||
|
||||
operator bool() override {
|
||||
return p_driver != nullptr && p_driver->isValid();
|
||||
}
|
||||
|
||||
/// Notify change of audio information
|
||||
void setAudioInfo(AudioInfo info) override {
|
||||
cfg.bits_per_sample = info.bits_per_sample;
|
||||
cfg.sample_rate = info.sample_rate;
|
||||
cfg.channels = info.channels;
|
||||
begin(cfg);
|
||||
}
|
||||
|
||||
/// Release the allocated memory
|
||||
void end() override {
|
||||
p_driver->end();
|
||||
l_magnitudes.resize(0);
|
||||
rfft_data.resize(0);
|
||||
rfft_add.resize(0);
|
||||
step_data.resize(0);
|
||||
}
|
||||
|
||||
/// Provide the audio data as FFT input
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
size_t result = 0;
|
||||
if (p_driver->isValid()) {
|
||||
result = len;
|
||||
switch (cfg.bits_per_sample) {
|
||||
case 8:
|
||||
processSamples<int8_t>(data, len);
|
||||
break;
|
||||
case 16:
|
||||
processSamples<int16_t>(data, len / 2);
|
||||
break;
|
||||
case 24:
|
||||
processSamples<int24_t>(data, len / 3);
|
||||
break;
|
||||
case 32:
|
||||
processSamples<int32_t>(data, len / 4);
|
||||
break;
|
||||
default:
|
||||
LOGE("Unsupported bits_per_sample: %d", cfg.bits_per_sample);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Provides the result of a reverse FFT
|
||||
size_t readBytes(uint8_t *data, size_t len) override {
|
||||
TRACED();
|
||||
if (rfft_data.size() == 0) return 0;
|
||||
|
||||
// get data via callback if there is no more data
|
||||
if (cfg.rxtx_mode == RX_MODE && cfg.callback != nullptr &&
|
||||
rfft_data.available() == 0) {
|
||||
cfg.callback(*this);
|
||||
}
|
||||
|
||||
// execute rfft when we consumed all data
|
||||
if (has_rfft_data && rfft_data.available() == 0) {
|
||||
rfft();
|
||||
}
|
||||
return rfft_data.readArray(data, len);
|
||||
}
|
||||
|
||||
/// We try to fill the buffer at once
|
||||
int availableForWrite() override {
|
||||
return cfg.length * cfg.channels * bytesPerSample();
|
||||
}
|
||||
|
||||
/// Data available for reverse fft
|
||||
int available() override {
|
||||
assert(cfg.stride != 0);
|
||||
return cfg.stride * cfg.channels * bytesPerSample();
|
||||
}
|
||||
|
||||
/// The number of bins used by the FFT which are relevant for the result
|
||||
int size() { return bins; }
|
||||
|
||||
/// The number of samples
|
||||
int length() { return cfg.length; }
|
||||
|
||||
/// time after the fft: time when the last result was provided - you can poll
|
||||
/// this to check if we have a new result
|
||||
unsigned long resultTime() { return timestamp; }
|
||||
/// time before the fft
|
||||
unsigned long resultTimeBegin() { return timestamp_begin; }
|
||||
|
||||
/// Determines the result values in the max magnitude bin
|
||||
AudioFFTResult result() {
|
||||
AudioFFTResult ret_value;
|
||||
ret_value.magnitude = 0.0f;
|
||||
ret_value.bin = 0;
|
||||
// find max value and index
|
||||
for (int j = 0; j < size(); j++) {
|
||||
float m = magnitude(j);
|
||||
if (m > ret_value.magnitude) {
|
||||
ret_value.magnitude = m;
|
||||
ret_value.bin = j;
|
||||
}
|
||||
}
|
||||
ret_value.frequency = frequency(ret_value.bin);
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
/// Determines the N biggest result values
|
||||
template <int N>
|
||||
void resultArray(AudioFFTResult (&result)[N]) {
|
||||
// initialize to negative value
|
||||
for (int j = 0; j < N; j++) {
|
||||
result[j].magnitude = -1000000;
|
||||
}
|
||||
// find top n values
|
||||
AudioFFTResult act;
|
||||
for (int j = 0; j < size(); j++) {
|
||||
act.magnitude = magnitude(j);
|
||||
act.bin = j;
|
||||
act.frequency = frequency(j);
|
||||
insertSorted<N>(result, act);
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the FFT result to MEL spectrum
|
||||
float *toMEL(int n_bins, float min_freq = 0.0f, float max_freq = 0.0f) {
|
||||
// calculate mel bins
|
||||
if (n_bins <= 0) n_bins = size();
|
||||
if (min_freq <= 0.0f) min_freq = frequency(0);
|
||||
if (max_freq <= 0.0f) max_freq = frequency(size() - 1);
|
||||
mel_bins.resize(n_bins);
|
||||
|
||||
// Convert min and max frequencies to MEL scale
|
||||
float min_mel = 2595.0f * log10(1.0f + (min_freq / 700.0f));
|
||||
float max_mel = 2595.0f * log10(1.0f + (max_freq / 700.0f));
|
||||
|
||||
// Create equally spaced points in the MEL scale
|
||||
Vector<float> mel_points;
|
||||
mel_points.resize(n_bins + 2); // +2 for the endpoints
|
||||
|
||||
float mel_step = (max_mel - min_mel) / (n_bins + 1);
|
||||
for (int i = 0; i < n_bins + 2; i++) {
|
||||
mel_points[i] = min_mel + i * mel_step;
|
||||
}
|
||||
|
||||
// Convert MEL points back to frequency
|
||||
Vector<float> freq_points;
|
||||
freq_points.resize(n_bins + 2);
|
||||
for (int i = 0; i < n_bins + 2; i++) {
|
||||
freq_points[i] = 700.0f * (pow(10.0f, mel_points[i] / 2595.0f) - 1.0f);
|
||||
}
|
||||
|
||||
// Convert frequency points to FFT bin indices
|
||||
Vector<int> bin_indices;
|
||||
bin_indices.resize(n_bins + 2);
|
||||
for (int i = 0; i < n_bins + 2; i++) {
|
||||
bin_indices[i] = round(freq_points[i] * cfg.length / cfg.sample_rate);
|
||||
// Ensure bin index is within valid range
|
||||
if (bin_indices[i] >= bins) bin_indices[i] = bins - 1;
|
||||
if (bin_indices[i] < 0) bin_indices[i] = 0;
|
||||
}
|
||||
|
||||
// Create and apply triangular filters
|
||||
for (int i = 0; i < n_bins; i++) {
|
||||
float mel_sum = 0.0f;
|
||||
|
||||
int start_bin = bin_indices[i];
|
||||
int mid_bin = bin_indices[i + 1];
|
||||
int end_bin = bin_indices[i + 2];
|
||||
|
||||
// Apply first half of triangle filter (ascending)
|
||||
for (int j = start_bin; j < mid_bin; j++) {
|
||||
if (j >= bins) break;
|
||||
float weight = (j - start_bin) / float(mid_bin - start_bin);
|
||||
mel_sum += magnitude(j) * weight;
|
||||
}
|
||||
|
||||
// Apply second half of triangle filter (descending)
|
||||
for (int j = mid_bin; j < end_bin; j++) {
|
||||
if (j >= bins) break;
|
||||
float weight = (end_bin - j) / float(end_bin - mid_bin);
|
||||
mel_sum += magnitude(j) * weight;
|
||||
}
|
||||
|
||||
mel_bins[i] = mel_sum;
|
||||
}
|
||||
|
||||
return mel_bins.data();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Convert MEL spectrum back to linear frequency spectrum
|
||||
*
|
||||
* @param values Pointer to MEL spectrum values
|
||||
* @param n_bins Number of MEL bins
|
||||
* @return bool Success status
|
||||
*/
|
||||
bool fromMEL(float *values, int n_bins, float min_freq = 0.0f,
|
||||
float max_freq = 0.0f) {
|
||||
if (n_bins <= 0 || values == nullptr) return false;
|
||||
|
||||
// Use default frequency range if not specified
|
||||
if (min_freq <= 0.0f) min_freq = frequency(0);
|
||||
if (max_freq <= 0.0f) max_freq = frequency(size() - 1);
|
||||
|
||||
// Clear the current magnitude array
|
||||
for (int i = 0; i < bins; i++) {
|
||||
FFTBin bin;
|
||||
bin.clear();
|
||||
setBin(i, bin);
|
||||
}
|
||||
|
||||
// Convert min and max frequencies to MEL scale
|
||||
float min_mel = 2595.0f * log10(1.0f + (min_freq / 700.0f));
|
||||
float max_mel = 2595.0f * log10(1.0f + (max_freq / 700.0f));
|
||||
|
||||
// Create equally spaced points in the MEL scale
|
||||
Vector<float> mel_points;
|
||||
mel_points.resize(n_bins + 2); // +2 for the endpoints
|
||||
|
||||
float mel_step = (max_mel - min_mel) / (n_bins + 1);
|
||||
for (int i = 0; i < n_bins + 2; i++) {
|
||||
mel_points[i] = min_mel + i * mel_step;
|
||||
}
|
||||
|
||||
// Convert MEL points back to frequency
|
||||
Vector<float> freq_points;
|
||||
freq_points.resize(n_bins + 2);
|
||||
for (int i = 0; i < n_bins + 2; i++) {
|
||||
freq_points[i] = 700.0f * (pow(10.0f, mel_points[i] / 2595.0f) - 1.0f);
|
||||
}
|
||||
|
||||
// Convert frequency points to FFT bin indices
|
||||
Vector<int> bin_indices;
|
||||
bin_indices.resize(n_bins + 2);
|
||||
for (int i = 0; i < n_bins + 2; i++) {
|
||||
bin_indices[i] = round(freq_points[i] * cfg.length / cfg.sample_rate);
|
||||
// Ensure bin index is within valid range
|
||||
if (bin_indices[i] >= bins) bin_indices[i] = bins - 1;
|
||||
if (bin_indices[i] < 0) bin_indices[i] = 0;
|
||||
}
|
||||
|
||||
// Distribute MEL energy back to linear frequency bins
|
||||
Vector<float> linear_magnitudes;
|
||||
linear_magnitudes.resize(bins);
|
||||
|
||||
for (int i = 0; i < n_bins; i++) {
|
||||
int start_bin = bin_indices[i];
|
||||
int mid_bin = bin_indices[i + 1];
|
||||
int end_bin = bin_indices[i + 2];
|
||||
|
||||
// Apply first half of triangle (ascending)
|
||||
for (int j = start_bin; j < mid_bin; j++) {
|
||||
if (j >= bins) break;
|
||||
float weight = (j - start_bin) / float(mid_bin - start_bin);
|
||||
linear_magnitudes[j] += values[i] * weight;
|
||||
}
|
||||
|
||||
// Apply second half of triangle (descending)
|
||||
for (int j = mid_bin; j < end_bin; j++) {
|
||||
if (j >= bins) break;
|
||||
float weight = (end_bin - j) / float(end_bin - mid_bin);
|
||||
linear_magnitudes[j] += values[i] * weight;
|
||||
}
|
||||
}
|
||||
|
||||
// Set magnitude values and create simple phase (all zeros)
|
||||
for (int i = 0; i < bins; i++) {
|
||||
if (linear_magnitudes[i] > 0) {
|
||||
FFTBin bin;
|
||||
bin.real = linear_magnitudes[i];
|
||||
bin.img = 0.0f;
|
||||
setBin(i, bin);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// provides access to the FFTDriver which implements the basic FFT
|
||||
/// functionality
|
||||
FFTDriver *driver() { return p_driver; }
|
||||
|
||||
/// Determines the frequency of the indicated bin
|
||||
float frequency(int bin) {
|
||||
if (bin >= bins) {
|
||||
LOGE("Invalid bin %d", bin);
|
||||
return 0;
|
||||
}
|
||||
return static_cast<float>(bin) * cfg.sample_rate / cfg.length;
|
||||
}
|
||||
|
||||
/// Determine the bin number from the frequency
|
||||
int frequencyToBin(int freq) {
|
||||
int max_freq = cfg.sample_rate / 2;
|
||||
return map(freq, 0, max_freq, 0, size());
|
||||
}
|
||||
|
||||
/// Calculates the magnitude of the fft result to determine the max value (bin
|
||||
/// is 0 to size())
|
||||
float magnitude(int bin) {
|
||||
if (bin >= bins) {
|
||||
LOGE("Invalid bin %d", bin);
|
||||
return 0;
|
||||
}
|
||||
return p_driver->magnitude(bin);
|
||||
}
|
||||
|
||||
float magnitudeFast(int bin) {
|
||||
if (bin >= bins) {
|
||||
LOGE("Invalid bin %d", bin);
|
||||
return 0;
|
||||
}
|
||||
return p_driver->magnitudeFast(bin);
|
||||
}
|
||||
|
||||
/// calculates the phase
|
||||
float phase(int bin) {
|
||||
FFTBin fft_bin;
|
||||
getBin(bin, fft_bin);
|
||||
return atan2(fft_bin.img, fft_bin.real);
|
||||
}
|
||||
|
||||
/// Provides the magnitudes as array of size size(). Please note that this
|
||||
/// method is allocating additinal memory!
|
||||
float *magnitudes() {
|
||||
if (l_magnitudes.size() == 0) {
|
||||
l_magnitudes.resize(size());
|
||||
}
|
||||
for (int j = 0; j < size(); j++) {
|
||||
l_magnitudes[j] = magnitude(j);
|
||||
}
|
||||
return l_magnitudes.data();
|
||||
}
|
||||
|
||||
/// Provides the magnitudes w/o calling the square root function as array of
|
||||
/// size size(). Please note that this method is allocating additinal memory!
|
||||
float *magnitudesFast() {
|
||||
if (l_magnitudes.size() == 0) {
|
||||
l_magnitudes.resize(size());
|
||||
}
|
||||
for (int j = 0; j < size(); j++) {
|
||||
l_magnitudes[j] = magnitudeFast(j);
|
||||
}
|
||||
return l_magnitudes.data();
|
||||
}
|
||||
|
||||
/// sets the value of a bin
|
||||
bool setBin(int idx, float real, float img) {
|
||||
has_rfft_data = true;
|
||||
if (idx < 0 || idx >= size()) return false;
|
||||
bool rc_first_half = p_driver->setBin(idx, real, img);
|
||||
bool rc_2nd_half = p_driver->setBin(cfg.length - idx, real, img);
|
||||
return rc_first_half && rc_2nd_half;
|
||||
}
|
||||
/// sets the value of a bin
|
||||
bool setBin(int pos, FFTBin &bin) { return setBin(pos, bin.real, bin.img); }
|
||||
/// gets the value of a bin
|
||||
bool getBin(int pos, FFTBin &bin) { return p_driver->getBin(pos, bin); }
|
||||
|
||||
/// clears the fft data
|
||||
void clearBins() {
|
||||
FFTBin empty{0, 0};
|
||||
for (int j = 0; j < size(); j++) {
|
||||
setBin(j, empty);
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides the actual configuration
|
||||
AudioFFTConfig &config() { return cfg; }
|
||||
|
||||
protected:
|
||||
FFTDriver *p_driver = nullptr;
|
||||
int current_pos = 0;
|
||||
int bins = 0;
|
||||
unsigned long timestamp_begin = 0l;
|
||||
unsigned long timestamp = 0l;
|
||||
AudioFFTConfig cfg;
|
||||
FFTInverseOverlapAdder rfft_add{0};
|
||||
Vector<float> l_magnitudes{0};
|
||||
Vector<float> step_data{0};
|
||||
Vector<float> mel_bins{0};
|
||||
SingleBuffer<uint8_t> stride_buffer{0};
|
||||
RingBuffer<uint8_t> rfft_data{0};
|
||||
bool has_rfft_data = false;
|
||||
|
||||
// Add samples to input data p_x - and process them if full
|
||||
template <typename T>
|
||||
void processSamples(const void *data, size_t count) {
|
||||
T *dataT = (T *)data;
|
||||
T sample;
|
||||
for (int j = 0; j < count; j += cfg.channels) {
|
||||
sample = dataT[j + cfg.channel_used];
|
||||
if (writeStrideBuffer((uint8_t *)&sample, sizeof(T))) {
|
||||
// process data if buffer is full
|
||||
T *samples = (T *)stride_buffer.data();
|
||||
int sample_count = stride_buffer.size() / sizeof(T);
|
||||
assert(sample_count == cfg.length);
|
||||
for (int j = 0; j < sample_count; j++) {
|
||||
T out_sample = samples[j];
|
||||
T windowed_sample = windowedSample(out_sample, j);
|
||||
float scaled_sample =
|
||||
1.0f / NumberConverter::maxValueT<T>() * windowed_sample;
|
||||
p_driver->setValue(j, scaled_sample);
|
||||
}
|
||||
|
||||
fft<T>();
|
||||
|
||||
// remove stride samples
|
||||
stride_buffer.clearArray(cfg.stride * sizeof(T));
|
||||
|
||||
// validate available data in stride buffer
|
||||
if (cfg.stride == cfg.length) assert(stride_buffer.available() == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T windowedSample(T sample, int pos) {
|
||||
T result = sample;
|
||||
if (cfg.window_function_fft != nullptr) {
|
||||
result = cfg.window_function_fft->factor(pos) * sample;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void fft() {
|
||||
timestamp_begin = millis();
|
||||
p_driver->fft();
|
||||
has_rfft_data = true;
|
||||
timestamp = millis();
|
||||
if (cfg.callback != nullptr) {
|
||||
cfg.callback(*this);
|
||||
}
|
||||
}
|
||||
|
||||
/// reverse fft
|
||||
void rfft() {
|
||||
TRACED();
|
||||
// execute reverse fft
|
||||
p_driver->rfft();
|
||||
has_rfft_data = false;
|
||||
// add data to sum buffer
|
||||
for (int j = 0; j < cfg.length; j++) {
|
||||
float value = p_driver->getValue(j);
|
||||
rfft_add.add(value, j, cfg.window_function_ifft);
|
||||
}
|
||||
// get result data from sum buffer
|
||||
rfftWriteData(rfft_data);
|
||||
}
|
||||
|
||||
/// write reverse fft result to buffer to make it available for readBytes
|
||||
void rfftWriteData(BaseBuffer<uint8_t> &data) {
|
||||
// get data to result buffer
|
||||
// for (int j = 0; j < cfg.stride; j++) {
|
||||
// step_data[j] = 0.0;
|
||||
// }
|
||||
rfft_add.getStepData(step_data.data(), cfg.stride,
|
||||
NumberConverter::maxValue(cfg.bits_per_sample));
|
||||
|
||||
switch (cfg.bits_per_sample) {
|
||||
case 8:
|
||||
writeIFFT<int8_t>(step_data.data(), cfg.stride);
|
||||
break;
|
||||
case 16:
|
||||
writeIFFT<int16_t>(step_data.data(), cfg.stride);
|
||||
break;
|
||||
case 24:
|
||||
writeIFFT<int24_t>(step_data.data(), cfg.stride);
|
||||
break;
|
||||
case 32:
|
||||
writeIFFT<int32_t>(step_data.data(), cfg.stride);
|
||||
break;
|
||||
default:
|
||||
LOGE("Unsupported bits: %d", cfg.bits_per_sample);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void writeIFFT(float *data, int len) {
|
||||
for (int j = 0; j < len; j++) {
|
||||
T sample = data[j];
|
||||
T out_data[cfg.channels];
|
||||
for (int ch = 0; ch < cfg.channels; ch++) {
|
||||
out_data[ch] = sample;
|
||||
}
|
||||
int result = rfft_data.writeArray((uint8_t *)out_data, sizeof(out_data));
|
||||
assert(result == sizeof(out_data));
|
||||
}
|
||||
}
|
||||
|
||||
inline int bytesPerSample() { return cfg.bits_per_sample / 8; }
|
||||
|
||||
/// make sure that we do not reuse already found results
|
||||
template <int N>
|
||||
void insertSorted(AudioFFTResult (&result)[N], AudioFFTResult tmp) {
|
||||
// find place where we need to insert new record
|
||||
for (int j = 0; j < N; j++) {
|
||||
// insert when biggen then current record
|
||||
if (tmp.magnitude > result[j].magnitude) {
|
||||
// shift existing values right
|
||||
for (int i = N - 2; i >= j; i--) {
|
||||
result[i + 1] = result[i];
|
||||
}
|
||||
// insert new value
|
||||
result[j] = tmp;
|
||||
// stop after we found the correct index
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// adds samples to stride buffer, returns true if the buffer is full
|
||||
bool writeStrideBuffer(uint8_t *buffer, size_t len) {
|
||||
assert(stride_buffer.availableForWrite() >= len);
|
||||
stride_buffer.writeArray(buffer, len);
|
||||
return stride_buffer.isFull();
|
||||
}
|
||||
|
||||
bool isPowerOfTwo(uint16_t x) { return (x & (x - 1)) == 0; }
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
391
libraries/audio-tools/src/AudioTools/AudioLibs/AudioFaust.h
Normal file
391
libraries/audio-tools/src/AudioTools/AudioLibs/AudioFaust.h
Normal file
@@ -0,0 +1,391 @@
|
||||
#pragma once
|
||||
#include "AudioTools/CoreAudio/AudioStreams.h"
|
||||
#include "AudioTools/AudioLibs/AudioFaustDSP.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Integration into Faust DSP see https://faust.grame.fr/
|
||||
* To generate code from faust, select src and cpp
|
||||
* @ingroup dsp
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
template<class DSP>
|
||||
class FaustStream : public AudioStream {
|
||||
public:
|
||||
|
||||
/// Constructor for Faust as Audio Source
|
||||
FaustStream(bool useSeparateOutputBuffer=true) {
|
||||
with_output_buffer = useSeparateOutputBuffer;
|
||||
}
|
||||
|
||||
/// Constructor for Faust as signal Processor - changing an input signal and sending it to out
|
||||
FaustStream(Print &out, bool useSeparateOutputBuffer=true){
|
||||
p_out = &out;
|
||||
with_output_buffer = useSeparateOutputBuffer;
|
||||
}
|
||||
|
||||
~FaustStream(){
|
||||
end();
|
||||
deleteFloatBuffer();
|
||||
delete p_dsp;
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
DSP::classDestroy();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/// Provides a pointer to the actual dsp object
|
||||
dsp *getDSP(){
|
||||
return p_dsp;
|
||||
}
|
||||
|
||||
AudioInfo defaultConfig() {
|
||||
AudioInfo def;
|
||||
def.channels = 2;
|
||||
def.bits_per_sample = 16;
|
||||
def.sample_rate = 44100;
|
||||
return def;
|
||||
}
|
||||
|
||||
|
||||
/// Checks the parameters and starts the processing
|
||||
bool begin(AudioInfo cfg){
|
||||
TRACED();
|
||||
bool result = true;
|
||||
this->cfg = cfg;
|
||||
this->bytes_per_sample = cfg.bits_per_sample / 8;
|
||||
this->bytes_per_frame = bytes_per_sample * cfg.channels;
|
||||
this->float_to_int_factor = NumberConverter::maxValue(cfg.bits_per_sample);
|
||||
|
||||
if (p_dsp==nullptr){
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
DSP::fManager = new dsp_memory_manager();
|
||||
DSP::memoryInfo();
|
||||
p_dsp = DSP::create();
|
||||
#else
|
||||
p_dsp = new DSP();
|
||||
#endif
|
||||
}
|
||||
|
||||
if (p_dsp==nullptr){
|
||||
LOGE("dsp is null");
|
||||
return false;
|
||||
}
|
||||
|
||||
DSP::classInit(cfg.sample_rate);
|
||||
p_dsp->buildUserInterface(&ui);
|
||||
p_dsp->init(cfg.sample_rate);
|
||||
p_dsp->instanceInit(cfg.sample_rate);
|
||||
|
||||
// we do expect an output
|
||||
result = checkChannels();
|
||||
|
||||
// allocate array of channel data
|
||||
if (p_buffer==nullptr){
|
||||
p_buffer = new FAUSTFLOAT*[cfg.channels]();
|
||||
}
|
||||
if (with_output_buffer && p_buffer_out==nullptr){
|
||||
p_buffer_out = new FAUSTFLOAT*[cfg.channels]();
|
||||
}
|
||||
|
||||
LOGI("is_read: %s", is_read?"true":"false");
|
||||
LOGI("is_write: %s", is_write?"true":"false");
|
||||
gate_exists = ui.exists("gate");
|
||||
LOGI("gate_exists: %s", gate_exists?"true":"false");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/// Ends the processing
|
||||
void end() {
|
||||
TRACED();
|
||||
is_read = false;
|
||||
is_write = false;
|
||||
p_dsp->instanceClear();
|
||||
#ifdef USE_MEMORY_MANAGER
|
||||
DSP::destroy(p_dsp);
|
||||
p_dsp = nullptr;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/// Used if FaustStream is used as audio source
|
||||
size_t readBytes(uint8_t *data, size_t len) override {
|
||||
size_t result = 0;
|
||||
if (is_read){
|
||||
TRACED();
|
||||
result = len;
|
||||
int samples = len / bytes_per_sample;
|
||||
allocateFloatBuffer(samples, false);
|
||||
p_dsp->compute(samples, nullptr, p_buffer);
|
||||
// convert from float to int
|
||||
switch(cfg.bits_per_sample){
|
||||
case 8:
|
||||
convertFloatBufferToInt<int8_t>(samples, p_buffer, data);
|
||||
break;
|
||||
case 16:
|
||||
convertFloatBufferToInt<int16_t>(samples, p_buffer, data);
|
||||
break;
|
||||
case 24:
|
||||
convertFloatBufferToInt<int24_t>(samples, p_buffer, data);
|
||||
break;
|
||||
case 32:
|
||||
convertFloatBufferToInt<int32_t>(samples, p_buffer, data);
|
||||
break;
|
||||
default:
|
||||
TRACEE();
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Used if FaustStream is used as audio sink or filter
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
LOGD("FaustStream::write: %d", len);
|
||||
switch(cfg.bits_per_sample){
|
||||
case 8:
|
||||
return writeT<int8_t>(data, len);
|
||||
case 16:
|
||||
return writeT<int16_t>(data, len);
|
||||
case 24:
|
||||
return writeT<int24_t>(data, len);
|
||||
case 32:
|
||||
return writeT<int32_t>(data, len);
|
||||
default:
|
||||
TRACEE();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int available() override {
|
||||
return DEFAULT_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
int availableForWrite() override {
|
||||
return DEFAULT_BUFFER_SIZE / bytes_per_frame; // we limit the write size
|
||||
}
|
||||
|
||||
/// Determines the value of a parameter
|
||||
virtual FAUSTFLOAT labelValue(const char*label) {
|
||||
return ui.getValue(label);
|
||||
}
|
||||
|
||||
/// Defines the value of a parameter
|
||||
virtual bool setLabelValue(const char*label, FAUSTFLOAT value){
|
||||
if (!is_read && !is_write) LOGE("setLabelValue must be called after begin");
|
||||
bool result = ui.setValue(label, value);
|
||||
LOGI("setLabelValue('%s',%f) -> %s", label, value, result?"true":"false");
|
||||
return result;
|
||||
}
|
||||
|
||||
virtual bool setMidiNote(int note){
|
||||
FAUSTFLOAT frq = noteToFrequency(note);
|
||||
return setFrequency(frq);
|
||||
}
|
||||
|
||||
virtual bool setFrequency(FAUSTFLOAT freq){
|
||||
return setLabelValue("freq", freq);
|
||||
}
|
||||
|
||||
virtual FAUSTFLOAT frequency() {
|
||||
return labelValue("freq");
|
||||
}
|
||||
|
||||
virtual bool setBend(FAUSTFLOAT bend){
|
||||
return setLabelValue("bend", bend);
|
||||
}
|
||||
|
||||
virtual FAUSTFLOAT bend() {
|
||||
return labelValue("bend");
|
||||
}
|
||||
|
||||
virtual bool setGain(FAUSTFLOAT gain){
|
||||
return setLabelValue("gain", gain);
|
||||
}
|
||||
|
||||
virtual FAUSTFLOAT gain() {
|
||||
return labelValue("gain");
|
||||
}
|
||||
|
||||
virtual bool midiOn(int note, FAUSTFLOAT gain){
|
||||
if (gate_exists) setLabelValue("gate",1.0);
|
||||
return setMidiNote(note) && setGain(gain);
|
||||
}
|
||||
|
||||
virtual bool midiOff(int note){
|
||||
if (gate_exists) setLabelValue("gate",0.0);
|
||||
return setMidiNote(note) && setGain(0.0);
|
||||
}
|
||||
|
||||
protected:
|
||||
bool is_init = false;
|
||||
bool is_read = false;
|
||||
bool is_write = false;
|
||||
bool gate_exists = false;
|
||||
bool with_output_buffer;
|
||||
int bytes_per_sample;
|
||||
int bytes_per_frame;
|
||||
int buffer_allocated;
|
||||
float float_to_int_factor = 32767;
|
||||
DSP *p_dsp = nullptr;
|
||||
AudioInfo cfg;
|
||||
Print *p_out=nullptr;
|
||||
FAUSTFLOAT** p_buffer=nullptr;
|
||||
FAUSTFLOAT** p_buffer_out=nullptr;
|
||||
UI ui;
|
||||
|
||||
/// Checks the input and output channels and updates the is_write or is_read scenario flags
|
||||
bool checkChannels() {
|
||||
bool result = true;
|
||||
|
||||
// update channels
|
||||
int num_outputs = p_dsp->getNumOutputs();
|
||||
if (cfg.channels!=num_outputs){
|
||||
cfg.channels = num_outputs;
|
||||
LOGW("Updating channels to %d", num_outputs);
|
||||
}
|
||||
|
||||
if (num_outputs>0){
|
||||
if (num_outputs==cfg.channels){
|
||||
is_read = true;
|
||||
} else {
|
||||
LOGE("NumOutputs %d is not matching with number of channels %d", num_outputs, cfg.channels);
|
||||
result = false;
|
||||
}
|
||||
if (p_dsp->getNumInputs()!=0 && p_dsp->getNumInputs()!=cfg.channels){
|
||||
LOGE("NumInputs is not matching with number of channels");
|
||||
result = false;
|
||||
}
|
||||
if (p_dsp->getNumInputs()>0){
|
||||
if (p_out!=nullptr){
|
||||
is_write = true;
|
||||
} else {
|
||||
LOGE("Faust expects input - you need to provide and AudioStream in the constructor");
|
||||
result = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Converts the float buffer to int values
|
||||
template <class T>
|
||||
void convertFloatBufferToInt(int samples, FAUSTFLOAT**p_float_in, void *data_out){
|
||||
T *dataT = (T*) data_out;
|
||||
int frameCount = samples/cfg.channels;
|
||||
for (int j=0; j<frameCount; j++){
|
||||
for (int i=0;i<cfg.channels;i++){
|
||||
float sample = p_float_in[i][j];
|
||||
// clip input
|
||||
if(sample > 1.0f){
|
||||
sample = 1.0f;
|
||||
}
|
||||
if(sample < -1.0f){
|
||||
sample = -1.0f;
|
||||
}
|
||||
dataT[(j*cfg.channels)+i] = sample * float_to_int_factor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts the int buffer to float values
|
||||
template <class T>
|
||||
void convertIntBufferToFloat(int samples, void *data_in, FAUSTFLOAT**p_float_out ){
|
||||
T *dataT = (T*) data_in;
|
||||
int frameCount = samples/cfg.channels;
|
||||
for(int j=0;j<frameCount;j++){
|
||||
for(int i=0;i<cfg.channels;i++){
|
||||
p_float_out[i][j] = static_cast<FAUSTFLOAT>(dataT[(j*cfg.channels)+i]) / float_to_int_factor;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Used if FaustStream is used as audio sink or filter
|
||||
template <class T>
|
||||
size_t writeT(const uint8_t *write_data, size_t len) {
|
||||
size_t result = 0;
|
||||
if (is_write){
|
||||
TRACED();
|
||||
int samples = len / bytes_per_sample;
|
||||
int frames = samples / cfg.channels;
|
||||
// prepare float input for faust
|
||||
allocateFloatBuffer(samples, with_output_buffer);
|
||||
convertIntBufferToFloat<T>(samples, (void*) write_data, p_buffer);
|
||||
|
||||
// determine result
|
||||
FAUSTFLOAT** p_float_buffer = with_output_buffer ? p_buffer_out : p_buffer;
|
||||
p_dsp->compute(frames, p_buffer, p_float_buffer);
|
||||
|
||||
// update buffer with data from faust
|
||||
convertFloatBufferToInt<T>(samples, p_float_buffer, (void*) write_data);
|
||||
// write data to final output
|
||||
result = p_out->write(write_data, len);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/// Allocate the buffer that is needed by faust
|
||||
void allocateFloatBuffer(int samples, bool allocate_out){
|
||||
if (samples>buffer_allocated){
|
||||
if (p_buffer[0]!=nullptr){
|
||||
for (int j=0;j<cfg.channels;j++){
|
||||
delete[]p_buffer[j];
|
||||
p_buffer[j] = nullptr;
|
||||
}
|
||||
}
|
||||
if (p_buffer_out!=nullptr && p_buffer_out[0]!=nullptr){
|
||||
for (int j=0;j<cfg.channels;j++){
|
||||
delete[]p_buffer_out[j];
|
||||
p_buffer_out[j] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (p_buffer[0]==nullptr){
|
||||
const int ch = cfg.channels;
|
||||
for (int j=0;j<ch;j++){
|
||||
p_buffer[j] = new FAUSTFLOAT[samples];
|
||||
}
|
||||
buffer_allocated = samples;
|
||||
}
|
||||
if (allocate_out){
|
||||
if (p_buffer_out[0]==nullptr){
|
||||
const int ch = cfg.channels;
|
||||
for (int j=0;j<ch;j++){
|
||||
p_buffer_out[j] = new FAUSTFLOAT[samples];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void deleteFloatBuffer() {
|
||||
if (p_buffer!=nullptr) {
|
||||
for (int j=0;j<cfg.channels;j++){
|
||||
if (p_buffer[j]!=nullptr) delete p_buffer[j];
|
||||
}
|
||||
delete[] p_buffer;
|
||||
p_buffer = nullptr;
|
||||
}
|
||||
if (p_buffer_out!=nullptr) {
|
||||
for (int j=0;j<cfg.channels;j++){
|
||||
if (p_buffer_out[j]!=nullptr) delete p_buffer_out[j];
|
||||
}
|
||||
delete[] p_buffer_out;
|
||||
p_buffer_out = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
FAUSTFLOAT noteToFrequency(uint8_t x) {
|
||||
FAUSTFLOAT note = x;
|
||||
return 440.0 * pow(2.0f, (note-69)/12);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
255
libraries/audio-tools/src/AudioTools/AudioLibs/AudioFaustDSP.h
Normal file
255
libraries/audio-tools/src/AudioTools/AudioLibs/AudioFaustDSP.h
Normal file
@@ -0,0 +1,255 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Collections.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Float16.h"
|
||||
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
|
||||
|
||||
#ifndef FAUSTFLOAT
|
||||
#define FAUSTFLOAT float
|
||||
#endif
|
||||
|
||||
#ifndef PSRAM_LIMIT
|
||||
#define PSRAM_LIMIT 1024
|
||||
#endif
|
||||
|
||||
// forward declarations
|
||||
class UI;
|
||||
|
||||
|
||||
/**
|
||||
* @brief minimal dsp base class needed by Faust
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class dsp {
|
||||
public:
|
||||
virtual void init(int sample_rate) = 0;
|
||||
virtual void compute(int count, FAUSTFLOAT** inputs, FAUSTFLOAT** outputs) = 0;
|
||||
virtual void instanceClear() = 0;
|
||||
virtual int getNumInputs() = 0;
|
||||
virtual int getNumOutputs() = 0;
|
||||
virtual void buildUserInterface(UI* ui_interface) = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief minimial implementtion of Meta which just ignores the data
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Meta {
|
||||
public:
|
||||
void declare(const char*, const char*){}
|
||||
};
|
||||
|
||||
typedef void Soundfile;
|
||||
|
||||
/**
|
||||
* @brief Minimum implementation of UI parameters. We only support the setting and getting of values
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class UI {
|
||||
struct Entry {
|
||||
const char* label=nullptr;
|
||||
FAUSTFLOAT* zone=nullptr;
|
||||
bool withLimits;
|
||||
FAUSTFLOAT min;
|
||||
FAUSTFLOAT max;
|
||||
};
|
||||
|
||||
public:
|
||||
// set and get values
|
||||
virtual FAUSTFLOAT getValue(const char*label) {
|
||||
Entry *e = findEntry(label);
|
||||
if (e==nullptr){
|
||||
LOGE("Label '%s' not found", label);
|
||||
}
|
||||
return e!=nullptr ? *(e->zone) :(FAUSTFLOAT) 0.0;
|
||||
}
|
||||
virtual bool setValue(const char* label, FAUSTFLOAT value){
|
||||
bool result = false;
|
||||
Entry* e = findEntry(label);
|
||||
if (e!=nullptr){
|
||||
if (e->withLimits){
|
||||
if (value>=e->min && value<=e->max){
|
||||
*(e->zone) = value;
|
||||
result = true;
|
||||
} else {
|
||||
LOGE("Value '%s' outsde limits %f (%f-%f)", e->label, value, e->min, e->max);
|
||||
}
|
||||
} else {
|
||||
*(e->zone) = value;
|
||||
result = true;
|
||||
}
|
||||
} else {
|
||||
LOGE("Label '%s' not found", label);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// -- widget's layouts
|
||||
virtual void openTabBox(const char* label) {}
|
||||
virtual void openHorizontalBox(const char* label) {}
|
||||
virtual void openVerticalBox(const char* label) {}
|
||||
virtual void closeBox() {}
|
||||
|
||||
// -- active widgets
|
||||
virtual void addButton(const char* label, FAUSTFLOAT* zone) {
|
||||
addEntry(label, zone);
|
||||
}
|
||||
virtual void addCheckButton(const char* label, FAUSTFLOAT* zone) {
|
||||
addEntry(label, zone);
|
||||
}
|
||||
virtual void addVerticalSlider(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT init, FAUSTFLOAT min, FAUSTFLOAT max, FAUSTFLOAT step) {
|
||||
addEntry(label, zone, true, min, max);
|
||||
*zone = init;
|
||||
}
|
||||
virtual void addHorizontalSlider(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT init, FAUSTFLOAT min, FAUSTFLOAT max, FAUSTFLOAT step) {
|
||||
addEntry(label, zone, true, min, max);
|
||||
*zone = init;
|
||||
}
|
||||
virtual void addNumEntry(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT init, FAUSTFLOAT min, FAUSTFLOAT max, FAUSTFLOAT step) {
|
||||
addEntry(label, zone, true, min, max);
|
||||
*zone = init;
|
||||
}
|
||||
|
||||
// -- passive widgets
|
||||
virtual void addHorizontalBargraph(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT min, FAUSTFLOAT max) {}
|
||||
virtual void addVerticalBargraph(const char* label, FAUSTFLOAT* zone, FAUSTFLOAT min, FAUSTFLOAT max) {}
|
||||
|
||||
// -- soundfiles
|
||||
virtual void addSoundfile(const char* label, const char* filename, Soundfile** sf_zone) {}
|
||||
|
||||
// -- metadata declarations
|
||||
virtual void declare(FAUSTFLOAT* zone, const char* key, const char* val) {}
|
||||
|
||||
/// checks if a label exists
|
||||
virtual bool exists(const char*label){
|
||||
return findEntry(label)!=nullptr;
|
||||
}
|
||||
|
||||
/// Returns the number of label entries
|
||||
virtual size_t size() {
|
||||
return entries.size();
|
||||
}
|
||||
|
||||
/// Returns the label at the indicated position. nullptr is returned if the index is too big
|
||||
const char* label(int idx){
|
||||
if (idx<size()){
|
||||
return entries[idx].label;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
audio_tools::Vector<Entry> entries;
|
||||
|
||||
Entry *findEntry(const char* name){
|
||||
StrView nameStr(name);
|
||||
for (int j=0; j<entries.size();j++){
|
||||
if (nameStr.equals(entries[j].label)){
|
||||
return &entries[j];
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void addEntry(const char*label,FAUSTFLOAT* zone, bool withLimits=false, FAUSTFLOAT min=0, FAUSTFLOAT max=0){
|
||||
Entry e;
|
||||
e.label = label;
|
||||
e.zone = zone;
|
||||
e.withLimits = withLimits;
|
||||
if (withLimits){
|
||||
e.min = min;
|
||||
e.max = max;
|
||||
LOGI("Label: %s value: %f range: %f - %f", label, *zone, min, max);
|
||||
} else {
|
||||
LOGI("Label: %s value: %f", label, *zone);
|
||||
}
|
||||
entries.push_back(e);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Memory manager which uses psram when it is available
|
||||
*
|
||||
*/
|
||||
class dsp_memory_manager {
|
||||
public:
|
||||
virtual ~dsp_memory_manager() {}
|
||||
|
||||
/**
|
||||
* Inform the Memory Manager with the number of expected memory zones.
|
||||
* @param count - the number of memory zones
|
||||
*/
|
||||
virtual bool begin(size_t count){
|
||||
this->count = count;
|
||||
total = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Give the Memory Manager information on a given memory zone.
|
||||
* @param size - the size in bytes of the memory zone
|
||||
* @param reads - the number of Read access to the zone used to compute one frame
|
||||
* @param writes - the number of Write access to the zone used to compute one frame
|
||||
*/
|
||||
virtual void info(size_t size, size_t reads, size_t writes) {
|
||||
LOGD("info %d", size);
|
||||
total+=size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inform the Memory Manager that all memory zones have been described,
|
||||
* to possibly start a 'compute the best allocation strategy' step.
|
||||
*/
|
||||
virtual void end(){
|
||||
#ifdef ESP32
|
||||
is_psram = total>2000 && ESP.getFreePsram()>0;
|
||||
#endif
|
||||
LOGI("use PSRAM: %s", is_psram?"true":"false");
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a memory zone.
|
||||
* @param size - the memory zone size in bytes
|
||||
*/
|
||||
virtual void* allocate(size_t size) {
|
||||
LOGD("allocate %d", size);
|
||||
#ifdef ESP32
|
||||
void* result = is_psram && size > PSRAM_LIMIT ? ps_malloc(size) : malloc(size);
|
||||
#else
|
||||
void* result = malloc(size);
|
||||
#endif
|
||||
if (result!=nullptr){
|
||||
memset(result, size, 0);
|
||||
} else {
|
||||
LOGE("allocate %u bytes - failed", (unsigned) size);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Destroy a memory zone.
|
||||
* @param ptr - the memory zone pointer to be deallocated
|
||||
*/
|
||||
virtual void destroy(void* ptr) {
|
||||
LOGD("destroy");
|
||||
free(ptr);
|
||||
};
|
||||
|
||||
private:
|
||||
size_t count;
|
||||
size_t total;
|
||||
bool is_psram = false;
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
109
libraries/audio-tools/src/AudioTools/AudioLibs/AudioKissFFT.h
Normal file
109
libraries/audio-tools/src/AudioTools/AudioLibs/AudioKissFFT.h
Normal file
@@ -0,0 +1,109 @@
|
||||
#pragma once
|
||||
|
||||
#include "kiss_fix.h"
|
||||
#include "AudioFFT.h"
|
||||
|
||||
/**
|
||||
* @defgroup fft-kiss KISS
|
||||
* @ingroup fft
|
||||
* @brief FFT using KISS
|
||||
**/
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Driver for RealFFT
|
||||
* @ingroup fft-kiss
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FFTDriverKissFFT : public FFTDriver {
|
||||
public:
|
||||
bool begin(int len) override {
|
||||
this->len = len;
|
||||
k_data.resize(len);
|
||||
if (p_fft_object==nullptr) p_fft_object = cpp_kiss_fft_alloc(len,false,nullptr,nullptr);
|
||||
assert(p_fft_object!=nullptr);
|
||||
return p_fft_object!=nullptr;
|
||||
}
|
||||
|
||||
void end() override {
|
||||
if (p_fft_object!=nullptr) kiss_fft_free(p_fft_object);
|
||||
if (p_fft_object_inv!=nullptr) kiss_fft_free(p_fft_object_inv);
|
||||
|
||||
p_fft_object = nullptr;
|
||||
k_data.resize(0);
|
||||
}
|
||||
void setValue(int idx, float value) override {
|
||||
k_data[idx].r = value;
|
||||
}
|
||||
|
||||
void fft() override {
|
||||
cpp_kiss_fft (p_fft_object, k_data.data(), k_data.data());
|
||||
};
|
||||
|
||||
void rfft() override {
|
||||
if(p_fft_object_inv==nullptr) {
|
||||
p_fft_object_inv = cpp_kiss_fft_alloc(len,true,nullptr,nullptr);
|
||||
}
|
||||
cpp_kiss_fft (p_fft_object_inv, k_data.data(), k_data.data());
|
||||
};
|
||||
|
||||
float magnitude(int idx) override {
|
||||
return sqrt(magnitudeFast(idx));
|
||||
}
|
||||
|
||||
/// magnitude w/o sqrt
|
||||
float magnitudeFast(int idx) override {
|
||||
return (k_data[idx].r * k_data[idx].r + k_data[idx].i * k_data[idx].i);
|
||||
}
|
||||
|
||||
bool isValid() override{ return p_fft_object!=nullptr; }
|
||||
|
||||
bool isReverseFFT() override {return true;}
|
||||
|
||||
float getValue(int idx) override { return k_data[idx].r; }
|
||||
|
||||
bool setBin(int pos, FFTBin &bin) { return FFTDriver::setBin(pos, bin);}
|
||||
|
||||
bool setBin(int pos, float real, float img) override {
|
||||
if (pos>=len) return false;
|
||||
k_data[pos].r = real;
|
||||
k_data[pos].i = img;
|
||||
return true;
|
||||
}
|
||||
bool getBin(int pos, FFTBin &bin) override {
|
||||
if (pos>=len) return false;
|
||||
bin.real = k_data[pos].r;
|
||||
bin.img = k_data[pos].i;
|
||||
return true;
|
||||
}
|
||||
|
||||
kiss_fft_cfg p_fft_object=nullptr;
|
||||
kiss_fft_cfg p_fft_object_inv=nullptr;
|
||||
Vector<kiss_fft_cpx> k_data{0}; // real
|
||||
int len = 0;
|
||||
|
||||
};
|
||||
/**
|
||||
* @brief AudioFFT using FFTReal. The only specific functionality is the access to the dataArray
|
||||
* @ingroup fft-kiss
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioKissFFT : public AudioFFTBase {
|
||||
public:
|
||||
AudioKissFFT():AudioFFTBase(new FFTDriverKissFFT()) {}
|
||||
|
||||
/// Provides the complex array returned by the FFT
|
||||
kiss_fft_cpx *dataArray() {
|
||||
return driverEx()->k_data.data();
|
||||
}
|
||||
|
||||
FFTDriverKissFFT* driverEx() {
|
||||
return (FFTDriverKissFFT*)driver();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
635
libraries/audio-tools/src/AudioTools/AudioLibs/AudioKit.h
Normal file
635
libraries/audio-tools/src/AudioTools/AudioLibs/AudioKit.h
Normal file
@@ -0,0 +1,635 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools.h"
|
||||
#include "AudioKitHAL.h"
|
||||
#include "AudioTools/CoreAudio/AudioI2S/I2SConfig.h"
|
||||
#include "AudioTools/CoreAudio/AudioActions.h"
|
||||
|
||||
#ifndef AUDIOKIT_V1
|
||||
#error Upgrade the AudioKit library
|
||||
#endif
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
class AudioKitStream;
|
||||
static AudioKitStream *pt_AudioKitStream = nullptr;
|
||||
|
||||
/**
|
||||
* @brief Configuration for AudioKitStream: we use as subclass of I2SConfig
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class AudioKitStreamConfig : public I2SConfig {
|
||||
|
||||
friend class AudioKitStream;
|
||||
|
||||
public:
|
||||
AudioKitStreamConfig(RxTxMode mode=RXTX_MODE) { setupI2SPins(mode); };
|
||||
// set adc channel with audio_hal_adc_input_t
|
||||
audio_hal_adc_input_t input_device = AUDIOKIT_DEFAULT_INPUT;
|
||||
// set dac channel
|
||||
audio_hal_dac_output_t output_device = AUDIOKIT_DEFAULT_OUTPUT;
|
||||
bool sd_active = true;
|
||||
bool default_actions_active = true;
|
||||
audio_kit_pins pins;
|
||||
audio_hal_func_t driver = AUDIO_DRIVER;
|
||||
|
||||
/// convert to config object needed by HAL
|
||||
AudioKitConfig toAudioKitConfig() {
|
||||
TRACED();
|
||||
audiokit_config.driver = driver;
|
||||
audiokit_config.pins = pins;
|
||||
audiokit_config.i2s_num = (i2s_port_t)port_no;
|
||||
audiokit_config.adc_input = input_device;
|
||||
audiokit_config.dac_output = output_device;
|
||||
audiokit_config.codec_mode = toCodecMode();
|
||||
audiokit_config.master_slave_mode = toMode();
|
||||
audiokit_config.fmt = toFormat();
|
||||
audiokit_config.sample_rate = toSampleRate();
|
||||
audiokit_config.bits_per_sample = toBits();
|
||||
#if defined(ESP32)
|
||||
audiokit_config.buffer_size = buffer_size;
|
||||
audiokit_config.buffer_count = buffer_count;
|
||||
#endif
|
||||
// we use the AudioKit library only to set up the codec
|
||||
audiokit_config.i2s_active = false;
|
||||
#if AUDIOKIT_SETUP_SD
|
||||
audiokit_config.sd_active = sd_active;
|
||||
#else
|
||||
// SD has been deactivated in the AudioKitConfig.h file
|
||||
audiokit_config.sd_active = false;
|
||||
#endif
|
||||
LOGW("sd_active = %s", sd_active ? "true" : "false" );
|
||||
|
||||
return audiokit_config;
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
AudioKitConfig audiokit_config;
|
||||
board_driver board;
|
||||
|
||||
/// Defines the pins based on the information provided by the AudioKit project
|
||||
void setupI2SPins(RxTxMode rxtx_mode) {
|
||||
TRACED();
|
||||
this->rx_tx_mode = rxtx_mode;
|
||||
i2s_pin_config_t i2s_pins = {};
|
||||
board.setup(pins);
|
||||
board.get_i2s_pins((i2s_port_t)port_no, &i2s_pins);
|
||||
pin_mck = i2s_pins.mck_io_num;
|
||||
pin_bck = i2s_pins.bck_io_num;
|
||||
pin_ws = i2s_pins.ws_io_num;
|
||||
if (rx_tx_mode == RX_MODE){
|
||||
pin_data = i2s_pins.data_in_num;
|
||||
pin_data_rx = I2S_PIN_NO_CHANGE;
|
||||
} else {
|
||||
pin_data = i2s_pins.data_out_num;
|
||||
pin_data_rx = i2s_pins.data_in_num;
|
||||
}
|
||||
};
|
||||
|
||||
// convert to audio_hal_iface_samples_t
|
||||
audio_hal_iface_bits_t toBits() {
|
||||
TRACED();
|
||||
static const int ia[] = {16, 24, 32};
|
||||
static const audio_hal_iface_bits_t oa[] = {AUDIO_HAL_BIT_LENGTH_16BITS,
|
||||
AUDIO_HAL_BIT_LENGTH_24BITS,
|
||||
AUDIO_HAL_BIT_LENGTH_32BITS};
|
||||
for (int j = 0; j < 3; j++) {
|
||||
if (ia[j] == bits_per_sample) {
|
||||
LOGD("-> %d",ia[j])
|
||||
return oa[j];
|
||||
}
|
||||
}
|
||||
LOGE("Bits per sample not supported: %d", bits_per_sample);
|
||||
return AUDIO_HAL_BIT_LENGTH_16BITS;
|
||||
}
|
||||
|
||||
/// Convert to audio_hal_iface_samples_t
|
||||
audio_hal_iface_samples_t toSampleRate() {
|
||||
TRACED();
|
||||
static const int ia[] = {8000, 11025, 16000, 22050,
|
||||
24000, 32000, 44100, 48000};
|
||||
static const audio_hal_iface_samples_t oa[] = {
|
||||
AUDIO_HAL_08K_SAMPLES, AUDIO_HAL_11K_SAMPLES, AUDIO_HAL_16K_SAMPLES,
|
||||
AUDIO_HAL_22K_SAMPLES, AUDIO_HAL_24K_SAMPLES, AUDIO_HAL_32K_SAMPLES,
|
||||
AUDIO_HAL_44K_SAMPLES, AUDIO_HAL_48K_SAMPLES};
|
||||
int diff = 99999;
|
||||
int result = 0;
|
||||
for (int j = 0; j < 8; j++) {
|
||||
if (ia[j] == sample_rate) {
|
||||
LOGD("-> %d",ia[j])
|
||||
return oa[j];
|
||||
} else {
|
||||
int new_diff = abs((int)(oa[j] - sample_rate));
|
||||
if (new_diff < diff) {
|
||||
result = j;
|
||||
diff = new_diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOGE("Sample Rate not supported: %d - using %d", sample_rate, ia[result]);
|
||||
return oa[result];
|
||||
}
|
||||
|
||||
/// Convert to audio_hal_iface_format_t
|
||||
audio_hal_iface_format_t toFormat() {
|
||||
TRACED();
|
||||
static const int ia[] = {I2S_STD_FORMAT,
|
||||
I2S_LSB_FORMAT,
|
||||
I2S_MSB_FORMAT,
|
||||
I2S_PHILIPS_FORMAT,
|
||||
I2S_RIGHT_JUSTIFIED_FORMAT,
|
||||
I2S_LEFT_JUSTIFIED_FORMAT,
|
||||
I2S_PCM};
|
||||
static const audio_hal_iface_format_t oa[] = {
|
||||
AUDIO_HAL_I2S_NORMAL, AUDIO_HAL_I2S_LEFT, AUDIO_HAL_I2S_RIGHT,
|
||||
AUDIO_HAL_I2S_NORMAL, AUDIO_HAL_I2S_RIGHT, AUDIO_HAL_I2S_LEFT,
|
||||
AUDIO_HAL_I2S_DSP};
|
||||
for (int j = 0; j < 8; j++) {
|
||||
if (ia[j] == i2s_format) {
|
||||
LOGD("-> %d",j)
|
||||
return oa[j];
|
||||
}
|
||||
}
|
||||
LOGE("Format not supported: %d", i2s_format);
|
||||
return AUDIO_HAL_I2S_NORMAL;
|
||||
}
|
||||
|
||||
/// Determine if ESP32 is master or slave - this is just the oposite of the
|
||||
/// HAL device
|
||||
audio_hal_iface_mode_t toMode() {
|
||||
return (is_master) ? AUDIO_HAL_MODE_SLAVE : AUDIO_HAL_MODE_MASTER;
|
||||
}
|
||||
|
||||
/// Convert to audio_hal_codec_mode_t
|
||||
audio_hal_codec_mode_t toCodecMode() {
|
||||
switch (rx_tx_mode) {
|
||||
case TX_MODE:
|
||||
LOGD("-> %s","AUDIO_HAL_CODEC_MODE_DECODE");
|
||||
return AUDIO_HAL_CODEC_MODE_DECODE;
|
||||
case RX_MODE:
|
||||
LOGD("-> %s","AUDIO_HAL_CODEC_MODE_ENCODE");
|
||||
return AUDIO_HAL_CODEC_MODE_ENCODE;
|
||||
default:
|
||||
LOGD("-> %s","AUDIO_HAL_CODEC_MODE_BOTH");
|
||||
return AUDIO_HAL_CODEC_MODE_BOTH;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief AudioKit Stream which uses the
|
||||
* https://github.com/pschatzmann/arduino-audiokit library
|
||||
* @ingroup io
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioKitStream : public AudioStream {
|
||||
public:
|
||||
AudioKitStream() { pt_AudioKitStream = this; }
|
||||
|
||||
/// Provides the default configuration
|
||||
AudioKitStreamConfig defaultConfig(RxTxMode mode = RXTX_MODE) {
|
||||
TRACED();
|
||||
AudioKitStreamConfig result{mode};
|
||||
result.rx_tx_mode = mode;
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Starts the processing
|
||||
bool begin(AudioKitStreamConfig config) {
|
||||
TRACED();
|
||||
cfg = config;
|
||||
|
||||
AudioStream::setAudioInfo(config);
|
||||
cfg.logInfo("AudioKitStream");
|
||||
|
||||
// start codec
|
||||
auto kit_cfg = cfg.toAudioKitConfig();
|
||||
if (!kit.begin(kit_cfg)){
|
||||
LOGE("begin faild: please verify your AUDIOKIT_BOARD setting: %d", AUDIOKIT_BOARD);
|
||||
stop();
|
||||
}
|
||||
|
||||
// start i2s
|
||||
i2s_stream.begin(cfg);
|
||||
|
||||
// Volume control and headphone detection
|
||||
if (cfg.default_actions_active){
|
||||
setupActions();
|
||||
}
|
||||
|
||||
// set initial volume
|
||||
setVolume(volume_value);
|
||||
is_started = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
// restart after end with initial parameters
|
||||
bool begin() override {
|
||||
return begin(cfg);
|
||||
}
|
||||
|
||||
/// Stops the processing
|
||||
void end() override {
|
||||
TRACED();
|
||||
kit.end();
|
||||
i2s_stream.end();
|
||||
is_started = false;
|
||||
}
|
||||
|
||||
/// We get the data via I2S - we expect to fill one buffer size
|
||||
int available() {
|
||||
return cfg.rx_tx_mode == TX_MODE ? 0 : DEFAULT_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
return i2s_stream.write(data, len);
|
||||
}
|
||||
|
||||
/// Reads the audio data
|
||||
size_t readBytes(uint8_t *data, size_t len) override {
|
||||
return i2s_stream.readBytes(data, len);
|
||||
}
|
||||
|
||||
/// Update the audio info with new values: e.g. new sample_rate,
|
||||
/// bits_per_samples or channels.
|
||||
void setAudioInfo(AudioInfo info) override {
|
||||
TRACEI();
|
||||
|
||||
if (cfg.sample_rate != info.sample_rate
|
||||
&& cfg.bits_per_sample == info.bits_per_sample
|
||||
&& cfg.channels == info.channels
|
||||
&& is_started) {
|
||||
// update sample rate only
|
||||
LOGW("Update sample rate: %d", info.sample_rate);
|
||||
cfg.sample_rate = info.sample_rate;
|
||||
i2s_stream.setAudioInfo(cfg);
|
||||
kit.setSampleRate(cfg.toSampleRate());
|
||||
} else if (cfg.sample_rate != info.sample_rate
|
||||
|| cfg.bits_per_sample != info.bits_per_sample
|
||||
|| cfg.channels != info.channels
|
||||
|| !is_started) {
|
||||
// more has changed and we need to start the processing
|
||||
cfg.sample_rate = info.sample_rate;
|
||||
cfg.bits_per_sample = info.bits_per_sample;
|
||||
cfg.channels = info.channels;
|
||||
cfg.logInfo("AudioKit");
|
||||
|
||||
// Stop first
|
||||
if(is_started){
|
||||
end();
|
||||
}
|
||||
// start kit with new config
|
||||
i2s_stream.begin(cfg);
|
||||
kit.begin(cfg.toAudioKitConfig());
|
||||
is_started = true;
|
||||
}
|
||||
}
|
||||
|
||||
AudioKitStreamConfig &config() { return cfg; }
|
||||
|
||||
/// Sets the codec active / inactive
|
||||
bool setActive(bool active) { return kit.setActive(active); }
|
||||
|
||||
/// Mutes the output
|
||||
bool setMute(bool mute) { return kit.setMute(mute); }
|
||||
|
||||
/// Defines the Volume: Range 0 to 100
|
||||
bool setVolume(int vol) {
|
||||
if (vol>100) LOGW("Volume is > 100: %d",vol);
|
||||
// update variable, so if called before begin we set the default value
|
||||
volume_value = vol;
|
||||
return kit.setVolume(vol);
|
||||
}
|
||||
|
||||
/// Defines the Volume: Range 0 to 1.0
|
||||
bool setVolume(float vol) {
|
||||
if (vol>1.0) LOGW("Volume is > 1.0: %f",vol);
|
||||
// update variable, so if called before begin we set the default value
|
||||
volume_value = 100.0 * vol;
|
||||
return kit.setVolume(volume_value);
|
||||
}
|
||||
|
||||
/// Defines the Volume: Range 0 to 1.0
|
||||
bool setVolume(double vol) {
|
||||
return setVolume((float)vol);
|
||||
}
|
||||
|
||||
/// Determines the volume
|
||||
int volume() { return kit.volume(); }
|
||||
|
||||
/// Activates/Deactives the speaker
|
||||
/// @param active
|
||||
void setSpeakerActive (bool active){
|
||||
kit.setSpeakerActive(active);
|
||||
}
|
||||
|
||||
/// @brief Returns true if the headphone was detected
|
||||
/// @return
|
||||
bool headphoneStatus() {
|
||||
return kit.headphoneStatus();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Process input keys and pins
|
||||
*
|
||||
*/
|
||||
void processActions() {
|
||||
// TRACED();
|
||||
actions.processActions();
|
||||
yield();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines a new action that is executed when the indicated pin is
|
||||
* active
|
||||
*
|
||||
* @param pin
|
||||
* @param action
|
||||
* @param ref
|
||||
*/
|
||||
void addAction(int pin, void (*action)(bool,int,void*), void* ref=nullptr ) {
|
||||
TRACEI();
|
||||
// determine logic from config
|
||||
AudioActions::ActiveLogic activeLogic = getActionLogic(pin);
|
||||
actions.add(pin, action, activeLogic, ref);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Defines a new action that is executed when the indicated pin is
|
||||
* active
|
||||
*
|
||||
* @param pin
|
||||
* @param action
|
||||
* @param activeLogic
|
||||
* @param ref
|
||||
*/
|
||||
void addAction(int pin, void (*action)(bool,int,void*), AudioActions::ActiveLogic activeLogic, void* ref=nullptr ) {
|
||||
TRACEI();
|
||||
actions.add(pin, action, activeLogic, ref);
|
||||
}
|
||||
|
||||
/// Provides access to the AudioActions
|
||||
AudioActions &audioActions() {
|
||||
return actions;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Relative volume control
|
||||
*
|
||||
* @param vol
|
||||
*/
|
||||
void incrementVolume(int vol) {
|
||||
volume_value += vol;
|
||||
LOGI("incrementVolume: %d -> %d",vol, volume_value);
|
||||
kit.setVolume(volume_value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Increase the volume
|
||||
*
|
||||
*/
|
||||
static void actionVolumeUp(bool, int, void*) {
|
||||
TRACEI();
|
||||
pt_AudioKitStream->incrementVolume(+2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Decrease the volume
|
||||
*
|
||||
*/
|
||||
static void actionVolumeDown(bool, int, void*) {
|
||||
TRACEI();
|
||||
pt_AudioKitStream->incrementVolume(-2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Toggle start stop
|
||||
*
|
||||
*/
|
||||
static void actionStartStop(bool, int, void*) {
|
||||
TRACEI();
|
||||
pt_AudioKitStream->active = !pt_AudioKitStream->active;
|
||||
pt_AudioKitStream->setActive(pt_AudioKitStream->active);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Start
|
||||
*
|
||||
*/
|
||||
static void actionStart(bool, int, void*) {
|
||||
TRACEI();
|
||||
pt_AudioKitStream->active = true;
|
||||
pt_AudioKitStream->setActive(pt_AudioKitStream->active);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Stop
|
||||
*
|
||||
*/
|
||||
static void actionStop(bool, int, void*) {
|
||||
TRACEI();
|
||||
pt_AudioKitStream->active = false;
|
||||
pt_AudioKitStream->setActive(pt_AudioKitStream->active);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Switch off the PA if the headphone in plugged in
|
||||
* and switch it on again if the headphone is unplugged.
|
||||
* This method complies with the
|
||||
*/
|
||||
static void actionHeadphoneDetection(bool, int, void*) {
|
||||
AudioKit::actionHeadphoneDetection();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @brief Get the gpio number for auxin detection
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinAuxin() { return kit.pinAuxin(); }
|
||||
|
||||
/**
|
||||
* @brief Get the gpio number for headphone detection
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinHeadphoneDetect() { return kit.pinHeadphoneDetect(); }
|
||||
|
||||
/**
|
||||
* @brief Get the gpio number for PA enable
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinPaEnable() { return kit.pinPaEnable(); }
|
||||
|
||||
/**
|
||||
* @brief Get the gpio number for adc detection
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinAdcDetect() { return kit.pinAdcDetect(); }
|
||||
|
||||
/**
|
||||
* @brief Get the mclk gpio number of es7243
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinEs7243Mclk() { return kit.pinEs7243Mclk(); }
|
||||
|
||||
/**
|
||||
* @brief Get the record-button id for adc-button
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others button id
|
||||
*/
|
||||
int8_t pinInputRec() { return kit.pinInputRec(); }
|
||||
|
||||
/**
|
||||
* @brief Get the number for mode-button
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
int8_t pinInputMode() { return kit.pinInputMode(); }
|
||||
|
||||
/**
|
||||
* @brief Get number for set function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
int8_t pinInputSet() { return kit.pinInputSet(); };
|
||||
|
||||
/**
|
||||
* @brief Get number for play function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
int8_t pinInputPlay() { return kit.pinInputPlay(); }
|
||||
|
||||
/**
|
||||
* @brief number for volume up function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
int8_t pinVolumeUp() { return kit.pinVolumeUp(); }
|
||||
|
||||
/**
|
||||
* @brief Get number for volume down function
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others number
|
||||
*/
|
||||
int8_t pinVolumeDown() { return kit.pinVolumeDown(); }
|
||||
|
||||
/**
|
||||
* @brief Get green led gpio number
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinResetCodec() { return kit.pinResetCodec(); }
|
||||
|
||||
/**
|
||||
* @brief Get DSP reset gpio number
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinResetBoard() { return kit.pinResetBoard(); }
|
||||
|
||||
/**
|
||||
* @brief Get DSP reset gpio number
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinGreenLed() { return kit.pinGreenLed(); }
|
||||
|
||||
/**
|
||||
* @brief Get green led gpio number
|
||||
*
|
||||
* @return -1 non-existent
|
||||
* Others gpio number
|
||||
*/
|
||||
int8_t pinBlueLed() { return kit.pinBlueLed(); }
|
||||
|
||||
protected:
|
||||
AudioKit kit;
|
||||
I2SStream i2s_stream;
|
||||
AudioKitStreamConfig cfg = defaultConfig(RXTX_MODE);
|
||||
AudioActions actions;
|
||||
int volume_value = 40;
|
||||
bool active = true;
|
||||
bool is_started = false;
|
||||
|
||||
/// Determines the action logic (ActiveLow or ActiveTouch) for the pin
|
||||
AudioActions::ActiveLogic getActionLogic(int pin){
|
||||
#if defined(USE_EXT_BUTTON_LOGIC)
|
||||
input_key_service_info_t input_key_info[] = INPUT_KEY_DEFAULT_INFO();
|
||||
int size = sizeof(input_key_info) / sizeof(input_key_info[0]);
|
||||
for (int j=0; j<size; j++){
|
||||
if (pin == input_key_info[j].act_id){
|
||||
switch(input_key_info[j].type){
|
||||
case PERIPH_ID_ADC_BTN:
|
||||
LOGD("getActionLogic for pin %d -> %d", pin, AudioActions::ActiveHigh);
|
||||
return AudioActions::ActiveHigh;
|
||||
case PERIPH_ID_BUTTON:
|
||||
LOGD("getActionLogic for pin %d -> %d", pin, AudioActions::ActiveLow);
|
||||
return AudioActions::ActiveLow;
|
||||
case PERIPH_ID_TOUCH:
|
||||
LOGD("getActionLogic for pin %d -> %d", pin, AudioActions::ActiveTouch);
|
||||
return AudioActions::ActiveTouch;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOGW("Undefined ActionLogic for pin: %d ",pin);
|
||||
#endif
|
||||
return AudioActions::ActiveLow;
|
||||
}
|
||||
|
||||
/// Setup the supported default actions
|
||||
void setupActions() {
|
||||
TRACEI();
|
||||
|
||||
// pin conflicts with the SD CS pin for AIThinker and buttons
|
||||
if (! (cfg.sd_active && (AUDIOKIT_BOARD==5 || AUDIOKIT_BOARD==6))){
|
||||
LOGD("actionStartStop")
|
||||
addAction(kit.pinInputMode(), actionStartStop);
|
||||
} else {
|
||||
LOGW("Mode Button ignored because of conflict: %d ",kit.pinInputMode());
|
||||
}
|
||||
|
||||
// pin conflicts with AIThinker A101 and headphone detection
|
||||
if (! (cfg.sd_active && AUDIOKIT_BOARD==6)) {
|
||||
LOGD("actionHeadphoneDetection pin:%d",kit.pinHeadphoneDetect())
|
||||
actions.add(kit.pinHeadphoneDetect(), actionHeadphoneDetection, AudioActions::ActiveChange);
|
||||
} else {
|
||||
LOGW("Headphone detection ignored because of conflict: %d ",kit.pinHeadphoneDetect());
|
||||
}
|
||||
|
||||
// pin conflicts with SD Lyrat SD CS GpioPinand buttons / Conflict on Audiokit V. 2957
|
||||
if (! (cfg.sd_active && (AUDIOKIT_BOARD==1 || AUDIOKIT_BOARD==7))){
|
||||
LOGD("actionVolumeDown")
|
||||
addAction(kit.pinVolumeDown(), actionVolumeDown);
|
||||
LOGD("actionVolumeUp")
|
||||
addAction(kit.pinVolumeUp(), actionVolumeUp);
|
||||
} else {
|
||||
LOGW("Volume Buttons ignored because of conflict: %d ",kit.pinVolumeDown());
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
113
libraries/audio-tools/src/AudioTools/AudioLibs/AudioMP34DT05.h
Normal file
113
libraries/audio-tools/src/AudioTools/AudioLibs/AudioMP34DT05.h
Normal file
@@ -0,0 +1,113 @@
|
||||
#pragma once
|
||||
#include "AudioTools/CoreAudio/AudioStreams.h"
|
||||
#include "PDM.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Config for MP34DT05 Microphone. Supported sample rates 16000, 41667,
|
||||
* Supported bits_per_sample only 16
|
||||
*
|
||||
*/
|
||||
struct AudioMP34DT05Config : public AudioInfo {
|
||||
AudioMP34DT05Config() {
|
||||
channels = 1;
|
||||
sample_rate = 16000;
|
||||
bits_per_sample = 16;
|
||||
}
|
||||
int gain = 20; // value of DEFAULT_PDM_GAIN
|
||||
int buffer_size = 512;
|
||||
int buffer_count = 2;
|
||||
// define pins
|
||||
// int pin_data = PIN_PDM_DIN;
|
||||
// int pin_clk = PIN_PDM_CLK;
|
||||
// int pin_pwr = PIN_PDM_PWR;
|
||||
void logInfo() {
|
||||
AudioInfo::logInfo();
|
||||
LOGI("gain: %d", gain);
|
||||
LOGI("buffer_size: %d", buffer_size);
|
||||
}
|
||||
};
|
||||
|
||||
class AudioMP34DT05 *selfAudioMP34DT05 = nullptr;
|
||||
|
||||
/**
|
||||
* @brief MP34DT05 Microphone of Nano BLE Sense. We provide a proper Stream
|
||||
* implementation. See https://github.com/arduino/ArduinoCore-nRF528x-mbedos
|
||||
* @ingroup io
|
||||
*/
|
||||
class AudioMP34DT05 : public AudioStream {
|
||||
public:
|
||||
AudioMP34DT05() { selfAudioMP34DT05 = this; };
|
||||
virtual ~AudioMP34DT05() {
|
||||
if (p_buffer != nullptr) delete p_buffer;
|
||||
};
|
||||
|
||||
AudioMP34DT05Config defaultConfig(int mode = RX_MODE) {
|
||||
AudioMP34DT05Config cfg;
|
||||
if (mode != RX_MODE) {
|
||||
LOGE("TX_MODE is not supported");
|
||||
}
|
||||
return cfg;
|
||||
}
|
||||
|
||||
bool begin() { return begin(config); }
|
||||
|
||||
bool begin(AudioMP34DT05Config cfg) {
|
||||
TRACEI();
|
||||
config = cfg;
|
||||
cfg.logInfo();
|
||||
if (p_buffer == nullptr) {
|
||||
p_buffer = new NBuffer<uint8_t>(cfg.buffer_size, cfg.buffer_count);
|
||||
}
|
||||
p_mic->setBufferSize(cfg.buffer_size);
|
||||
p_mic->onReceive(onReceiveStatic);
|
||||
LOGD("begin(%d,%d)", cfg.channels, cfg.sample_rate);
|
||||
bool result = p_mic->begin(cfg.channels, cfg.sample_rate);
|
||||
if (!result) {
|
||||
LOGE("begin(%d,%d)", cfg.channels, cfg.sample_rate);
|
||||
}
|
||||
LOGD("setGain: %d", cfg.gain);
|
||||
p_mic->setGain(cfg.gain);
|
||||
return result;
|
||||
}
|
||||
|
||||
void end() {
|
||||
TRACEI();
|
||||
if (p_mic != nullptr) {
|
||||
p_mic->end();
|
||||
}
|
||||
|
||||
delete p_buffer;
|
||||
p_buffer=nullptr;
|
||||
}
|
||||
|
||||
size_t readBytes(uint8_t *data, size_t len) override {
|
||||
if (p_buffer == nullptr) return 0;
|
||||
return p_buffer->readArray(data, len);
|
||||
}
|
||||
|
||||
int available() override {
|
||||
if (p_buffer == nullptr) return 0;
|
||||
return p_buffer->available();
|
||||
}
|
||||
|
||||
protected:
|
||||
PDMClass *p_mic = &PDM;
|
||||
NBuffer<uint8_t> *p_buffer = nullptr;
|
||||
AudioMP34DT05Config config;
|
||||
|
||||
/// for some strange reasons available provides only the right result after
|
||||
/// onReceive, so unfortunately we need to use an additional buffer
|
||||
void onReceive() {
|
||||
int bytesAvailable = p_mic->available();
|
||||
// Read into the sample buffer
|
||||
uint8_t sampleBuffer[bytesAvailable]={0};
|
||||
int read = PDM.read(sampleBuffer, bytesAvailable);
|
||||
p_buffer->writeArray(sampleBuffer, read);
|
||||
}
|
||||
|
||||
static void onReceiveStatic() { selfAudioMP34DT05->onReceive(); }
|
||||
};
|
||||
|
||||
} // namespace
|
||||
114
libraries/audio-tools/src/AudioTools/AudioLibs/AudioRealFFT.h
Normal file
114
libraries/audio-tools/src/AudioTools/AudioLibs/AudioRealFFT.h
Normal file
@@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioFFT.h"
|
||||
#include "FFT/FFTReal.h"
|
||||
|
||||
/**
|
||||
* @defgroup fft-real Real
|
||||
* @ingroup fft
|
||||
* @brief FFT using Real FFT
|
||||
**/
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Driver for RealFFT
|
||||
* @ingroup fft-real
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FFTDriverRealFFT : public FFTDriver {
|
||||
public:
|
||||
bool begin(int len) override {
|
||||
this->len = len;
|
||||
v_x.resize(len);
|
||||
v_f.resize(len);
|
||||
if (p_fft_object==nullptr) p_fft_object = new ffft::FFTReal<float>(len);
|
||||
assert(p_fft_object!=nullptr);
|
||||
return p_fft_object!=nullptr;
|
||||
}
|
||||
void end()override{
|
||||
if (p_fft_object!=nullptr) {
|
||||
delete p_fft_object;
|
||||
p_fft_object = nullptr;
|
||||
}
|
||||
v_x.resize(0);
|
||||
v_f.resize(0);
|
||||
}
|
||||
void setValue(int idx, float value) override{
|
||||
v_x[idx] = value;
|
||||
}
|
||||
|
||||
void fft() override{
|
||||
memset(v_f.data(),0,len*sizeof(float));
|
||||
p_fft_object->do_fft(v_f.data(), v_x.data());
|
||||
};
|
||||
|
||||
/// Inverse fft - convert fft result back to time domain (samples)
|
||||
void rfft() override{
|
||||
// ifft
|
||||
p_fft_object->do_ifft(v_f.data(), v_x.data());
|
||||
}
|
||||
|
||||
bool isReverseFFT() override { return true;}
|
||||
|
||||
float magnitude(int idx) override {
|
||||
return sqrt(magnitudeFast(idx));
|
||||
}
|
||||
|
||||
/// magnitude w/o sqrt
|
||||
float magnitudeFast(int idx) override {
|
||||
return ((v_x[idx] * v_x[idx]) + (v_f[idx] * v_f[idx]));
|
||||
}
|
||||
|
||||
bool isValid() override{ return p_fft_object!=nullptr; }
|
||||
|
||||
/// get Real value
|
||||
float getValue(int idx) override { return v_x[idx];}
|
||||
|
||||
bool setBin(int pos, float real, float img) override {
|
||||
if (pos < 0 || pos >= len) return false;
|
||||
v_x[pos] = real;
|
||||
v_f[pos] = img;
|
||||
return true;
|
||||
}
|
||||
bool getBin(int pos, FFTBin &bin) override {
|
||||
if (pos>=len) return false;
|
||||
bin.real = v_x[pos];
|
||||
bin.img = v_f[pos];
|
||||
return true;
|
||||
}
|
||||
|
||||
ffft::FFTReal <float> *p_fft_object=nullptr;
|
||||
Vector<float> v_x{0}; // real
|
||||
Vector<float> v_f{0}; // complex
|
||||
int len;
|
||||
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief AudioFFT using RealFFT
|
||||
* @ingroup fft-real
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class AudioRealFFT : public AudioFFTBase {
|
||||
public:
|
||||
AudioRealFFT():AudioFFTBase(new FFTDriverRealFFT()) {}
|
||||
|
||||
/// Provides the real array returned by the FFT
|
||||
float* realArray() {
|
||||
return driverEx()->v_x.data();
|
||||
}
|
||||
|
||||
/// Provides the complex array returned by the FFT
|
||||
float *imgArray() {
|
||||
return driverEx()->v_f.data();
|
||||
}
|
||||
|
||||
FFTDriverRealFFT* driverEx() {
|
||||
return (FFTDriverRealFFT*)driver();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
318
libraries/audio-tools/src/AudioTools/AudioLibs/AudioSTK.h
Normal file
318
libraries/audio-tools/src/AudioTools/AudioLibs/AudioSTK.h
Normal file
@@ -0,0 +1,318 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/CoreAudio/AudioEffects/AudioEffect.h"
|
||||
#include "AudioTools/CoreAudio/AudioStreams.h"
|
||||
#include "AudioToolsConfig.h"
|
||||
|
||||
#ifdef ESP32
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#endif
|
||||
#include "StkAll.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief The Synthesis ToolKit in C++ (STK) is a set of open source audio
|
||||
* signal processing and algorithmic synthesis classes written in the C++
|
||||
* programming language. You need to install
|
||||
* https://github.com/pschatzmann/Arduino-STK
|
||||
*
|
||||
* You can find further informarmation in the original Readme of the STK Project
|
||||
*
|
||||
* Like many other sound libraries it originates from an University (Princeton)
|
||||
* and can look back at a very long history: it was created in 1995. In the 90s
|
||||
* the computers had limited processor power and memory available. In todays
|
||||
* world we can get some cheap Microcontrollers, which provide almost the same
|
||||
* capabilities.
|
||||
*
|
||||
* @ingroup generator
|
||||
* @tparam T
|
||||
*/
|
||||
|
||||
template <class StkCls, class T>
|
||||
class STKGenerator : public SoundGenerator<T> {
|
||||
public:
|
||||
STKGenerator() = default;
|
||||
|
||||
// Creates an STKGenerator for an instrument
|
||||
STKGenerator(StkCls& instrument) : SoundGenerator<T>() {
|
||||
this->p_instrument = &instrument;
|
||||
}
|
||||
|
||||
void setInput(StkCls& instrument) { this->p_instrument = &instrument; }
|
||||
|
||||
/// provides the default configuration
|
||||
AudioInfo defaultConfig() {
|
||||
AudioInfo info;
|
||||
info.channels = 2;
|
||||
info.bits_per_sample = sizeof(T) * 8;
|
||||
info.sample_rate = stk::Stk::sampleRate();
|
||||
return info;
|
||||
}
|
||||
|
||||
/// Starts the processing
|
||||
bool begin(AudioInfo cfg) {
|
||||
TRACEI();
|
||||
cfg.logInfo();
|
||||
SoundGenerator<T>::begin(cfg);
|
||||
max_value = NumberConverter::maxValue(sizeof(T) * 8);
|
||||
stk::Stk::setSampleRate(SoundGenerator<T>::info.sample_rate);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Provides a single sample
|
||||
T readSample() {
|
||||
T result = 0;
|
||||
if (p_instrument != nullptr) {
|
||||
result = p_instrument->tick() * max_value;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected:
|
||||
StkCls* p_instrument = nullptr;
|
||||
T max_value;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief STK Stream for Instrument
|
||||
* @ingroup dsp
|
||||
*/
|
||||
template <class StkCls, class T>
|
||||
class STKInstrument : public STKGenerator<StkCls, T> {
|
||||
public:
|
||||
STKInstrument() = default;
|
||||
|
||||
STKInstrument(StkCls& instrument) : STKGenerator<StkCls, T>(instrument) {}
|
||||
|
||||
/// sets the frequency
|
||||
void setFrequency(float frequency) override {
|
||||
this->p_instrument->noteOn(frequency, amplitude);
|
||||
}
|
||||
|
||||
void noteOn(float freq, float vol) { this->p_instrument->noteOn(freq, vol); }
|
||||
|
||||
void noteOff() { this->p_instrument->noteOff(); }
|
||||
|
||||
/// Defines the amplitude (0.0 ... 1.0)
|
||||
void setAmplitude(float amplitude) {
|
||||
this->amplitude = amplitude;
|
||||
if (this->amplitude > 1.0) this->amplitude = 1.0;
|
||||
if (this->amplitude < 0.0) this->amplitude = 0.0;
|
||||
}
|
||||
|
||||
protected:
|
||||
float amplitude = 1.0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief STK Stream for Instrument or Voicer
|
||||
* @ingroup dsp
|
||||
*/
|
||||
template <class StkCls>
|
||||
class STKStream : public GeneratedSoundStream<int16_t> {
|
||||
public:
|
||||
STKStream() { GeneratedSoundStream<int16_t>::setInput(generator); };
|
||||
|
||||
STKStream(StkCls& instrument) {
|
||||
generator.setInput(instrument);
|
||||
GeneratedSoundStream<int16_t>::setInput(generator);
|
||||
}
|
||||
void setInput(StkCls& instrument) {
|
||||
generator.setInput(instrument);
|
||||
GeneratedSoundStream<int16_t>::setInput(generator);
|
||||
}
|
||||
void setInput(StkCls* instrument) {
|
||||
generator.setInput(*instrument);
|
||||
GeneratedSoundStream<int16_t>::setInput(generator);
|
||||
}
|
||||
|
||||
AudioInfo defaultConfig() {
|
||||
AudioInfo info;
|
||||
info.channels = 1;
|
||||
info.bits_per_sample = 16;
|
||||
info.sample_rate = stk::Stk::sampleRate();
|
||||
return info;
|
||||
}
|
||||
|
||||
protected:
|
||||
STKGenerator<StkCls, int16_t> generator;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Use any effect from the STK framework: e.g. Chorus, Echo, FreeVerb,
|
||||
* JCRev, PitShift... https://github.com/pschatzmann/Arduino-STK
|
||||
*
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKEffect : public AudioEffect {
|
||||
public:
|
||||
STKEffect(stk::Effect& stkEffect) { p_effect = &stkEffect; }
|
||||
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return p_effect->tick(value) * 32767.0;
|
||||
}
|
||||
|
||||
protected:
|
||||
stk::Effect* p_effect = nullptr;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Chorus Effect
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKChorus : public AudioEffect, public stk::Chorus {
|
||||
public:
|
||||
STKChorus(float baseDelay = 6000) : stk::Chorus(baseDelay) {}
|
||||
STKChorus(const STKChorus& copy) = default;
|
||||
|
||||
AudioEffect* clone() override { return new STKChorus(*this); }
|
||||
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::Chorus::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Echo Effect
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKEcho : public AudioEffect, public stk::Echo {
|
||||
public:
|
||||
STKEcho(unsigned long maximumDelay = (unsigned long)Stk::sampleRate())
|
||||
: stk::Echo(maximumDelay) {}
|
||||
STKEcho(const STKEcho& copy) = default;
|
||||
|
||||
AudioEffect* clone() override { return new STKEcho(*this); }
|
||||
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::Echo::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Jezar at Dreampoint's FreeVerb, implemented in STK.
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKFreeVerb : public AudioEffect, public stk::FreeVerb {
|
||||
public:
|
||||
STKFreeVerb() = default;
|
||||
STKFreeVerb(const STKFreeVerb& copy) = default;
|
||||
AudioEffect* clone() override { return new STKFreeVerb(*this); }
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::FreeVerb::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief John Chowning's reverberator class.
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKChowningReverb : public AudioEffect, public stk::JCRev {
|
||||
public:
|
||||
STKChowningReverb() = default;
|
||||
STKChowningReverb(const STKChowningReverb& copy) = default;
|
||||
AudioEffect* clone() override { return new STKChowningReverb(*this); }
|
||||
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::JCRev::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief CCRMA's NRev reverberator class.
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKNReverb : public AudioEffect, public stk::NRev {
|
||||
public:
|
||||
STKNReverb(float t60 = 1.0) : NRev(t60) {}
|
||||
STKNReverb(const STKNReverb& copy) = default;
|
||||
AudioEffect* clone() override { return new STKNReverb(*this); }
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::NRev::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Perry's simple reverberator class
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKPerryReverb : public AudioEffect, public stk::PRCRev {
|
||||
public:
|
||||
STKPerryReverb(float t60 = 1.0) : PRCRev(t60) {}
|
||||
STKPerryReverb(const STKPerryReverb& copy) = default;
|
||||
AudioEffect* clone() override { return new STKPerryReverb(*this); }
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::PRCRev::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Pitch shifter effect class based on the Lent algorithm
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKLentPitShift : public AudioEffect, public stk::LentPitShift {
|
||||
public:
|
||||
STKLentPitShift(float periodRatio = 1.0, int tMax = 512)
|
||||
: stk::LentPitShift(periodRatio, tMax) {}
|
||||
STKLentPitShift(const STKLentPitShift& copy) = default;
|
||||
|
||||
AudioEffect* clone() override { return new STKLentPitShift(*this); }
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::LentPitShift::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Simple Pitch shifter effect class: This class implements a simple
|
||||
* pitch shifter using a delay line.
|
||||
* @ingroup effects
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class STKPitShift : public AudioEffect, public stk::PitShift {
|
||||
public:
|
||||
STKPitShift() = default;
|
||||
STKPitShift(const STKPitShift& copy) = default;
|
||||
|
||||
AudioEffect* clone() override { return new STKPitShift(*this); }
|
||||
virtual effect_t process(effect_t in) {
|
||||
// just convert between int16 and float
|
||||
float value = static_cast<float>(in) / 32767.0;
|
||||
return stk::PitShift::tick(value) * 32767.0;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
#warning("obsolete: use AudioTools/Communication/AudioServerEx.h")
|
||||
#include "AudioTools/Communication/AudioServerEx.h"
|
||||
@@ -0,0 +1,7 @@
|
||||
#pragma once
|
||||
/**
|
||||
* @defgroup concurrency Concurrency
|
||||
* @ingroup main
|
||||
* @brief Multicore support
|
||||
*/
|
||||
#include "AudioTools/Concurrency/RTOS.h"
|
||||
@@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
#define VFS_SD SD
|
||||
#include "AudioTools/Disk/VFSFile.h"
|
||||
#include "AudioTools/Disk/VFS.h"
|
||||
|
||||
// We allow the access to the files via the global SD object
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/// @brief Desktop file system compatibility alias
|
||||
/// @ingroup io
|
||||
using File = VFSFile;
|
||||
|
||||
/// @brief Desktop file system compatibility alias
|
||||
/// @ingroup io
|
||||
using FS = VFS;
|
||||
|
||||
static FS SD; // global object for compatibility with Arduino code
|
||||
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioLibs/Desktop/NoArduino.h"
|
||||
#include "AudioTools/CoreAudio/AudioStreams.h"
|
||||
#include "AudioTools/CoreAudio/AudioOutput.h"
|
||||
#include "AudioTools/AudioCodecs/CodecWAV.h"
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <filesystem>
|
||||
#include <stdio.h>
|
||||
#include "nlohmann/json.hpp"
|
||||
#include "xtl/xbase64.hpp"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Simple layer for Print object to write to a c++ file
|
||||
*/
|
||||
class FileOutput : public Print {
|
||||
public:
|
||||
FileOutput(std::fstream &stream){
|
||||
p_audio_stream = &stream;
|
||||
}
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
p_audio_stream->write((const char*)data,len);
|
||||
return len;
|
||||
}
|
||||
int availableForWrite() override {
|
||||
return 1024;
|
||||
}
|
||||
protected:
|
||||
std::fstream *p_audio_stream=nullptr;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Displays audio in a Jupyter as chart
|
||||
* Just wrapps a stream to provide the chart data
|
||||
*/
|
||||
template <typename T>
|
||||
class ChartT {
|
||||
public:
|
||||
void setup(std::string fName, int channelCount, int channelNo) {
|
||||
this->fname = fName;
|
||||
this->channels = channelCount;
|
||||
if (this->channels==0){
|
||||
LOGE("Setting channels to 0");
|
||||
}
|
||||
this->channel = channelNo;
|
||||
}
|
||||
|
||||
int getChannels() {
|
||||
return this->channels;
|
||||
}
|
||||
|
||||
int getChannel() {
|
||||
return this->channel;
|
||||
}
|
||||
|
||||
/// Provides data as svg polyline
|
||||
const std::string chartData() {
|
||||
str.clear();
|
||||
str.str("");
|
||||
// reset buffer;
|
||||
if (channel<channels){
|
||||
ifstream is;
|
||||
is.open(fname, is.binary);
|
||||
is.seekg(wav_header_size, is.beg);
|
||||
std::list<int16_t> audioList;
|
||||
T buffer[channels];
|
||||
size_t rec_size = channels*sizeof(T);
|
||||
while(is.read((char *)buffer, rec_size)){
|
||||
audioList.push_back(transform(buffer[channel]));
|
||||
}
|
||||
string str_size = "102400"; //std::to_string(audioList.size());
|
||||
str << "<style>div.x-svg {width: "<< str_size <<"px; }</style>";
|
||||
str << "<div class='x-svg'><svg viewBox='0 0 "<< str_size << " 100'> <polyline fill='none' stroke='blue' stroke-width='1' points ='";
|
||||
// copy data from input stream
|
||||
size_t idx = 0;
|
||||
for(int16_t sample: audioList){
|
||||
str << idx++ << "," << sample << " ";
|
||||
}
|
||||
str << "'/></svg></div>";
|
||||
} else {
|
||||
str << "<p>Channel " << channel << " of " << channels << " does not exist!</p>";
|
||||
}
|
||||
return str.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::stringstream str;
|
||||
std::string fname;
|
||||
const int wav_header_size = 44;
|
||||
int channels=0;
|
||||
int channel=0;
|
||||
|
||||
int transform(int x){
|
||||
int result = x / 1000; // scale -32 to 32
|
||||
result += 60; // shift down
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
/// @brief Default chart type for Jupyter integration
|
||||
/// @ingroup io
|
||||
using Chart = ChartT<int16_t>;
|
||||
|
||||
/**
|
||||
* @brief Output to Jupyter. We write the data just to a file from where we can
|
||||
* load the data again for different representations.
|
||||
*/
|
||||
template <typename T>
|
||||
class JupyterAudioT : public AudioStream {
|
||||
public:
|
||||
|
||||
JupyterAudioT(const char* fileName, AudioStream &stream, int bufferCount=20, int bufferSize=1024) {
|
||||
buffer_count = bufferCount;
|
||||
p_audio_stream = &stream;
|
||||
cfg = stream.audioInfo();
|
||||
copier.resize(bufferSize);
|
||||
fname = fileName;
|
||||
if (fileExists()){
|
||||
remove(fileName);
|
||||
}
|
||||
}
|
||||
|
||||
ChartT<T> &chart(int channel=0) {
|
||||
createWAVFile();
|
||||
assert(cfg.channels>0);
|
||||
chrt.setup(fname, cfg.channels, channel);
|
||||
return chrt;
|
||||
}
|
||||
|
||||
// provide the file name
|
||||
const std::string &name() const {
|
||||
return fname;
|
||||
}
|
||||
|
||||
// provides the absolute file path as string
|
||||
const std::string path() const {
|
||||
std::filesystem::path p = fname;
|
||||
std::string result = std::filesystem::absolute(p);
|
||||
return result;
|
||||
}
|
||||
|
||||
// fills a wav file with data once, the first time it was requested
|
||||
void createWAVFile(){
|
||||
try{
|
||||
if (!fileExists()){
|
||||
std::fstream fstream(fname, fstream.binary | fstream.trunc | fstream.out);
|
||||
FileOutput fp(fstream);
|
||||
wave_encoder.setAudioInfo(audioInfo());
|
||||
out.setOutput(&fp);
|
||||
out.setEncoder(&wave_encoder);
|
||||
out.begin(); // output to decoder
|
||||
copier.begin(out, *p_audio_stream);
|
||||
copier.copyN(buffer_count);
|
||||
fstream.close();
|
||||
}
|
||||
} catch(const std::exception& ex){
|
||||
std::cerr << ex.what();
|
||||
}
|
||||
}
|
||||
|
||||
bool fileExists() {
|
||||
ifstream f(fname.c_str());
|
||||
return f.good();
|
||||
}
|
||||
|
||||
int bufferCount(){
|
||||
return buffer_count;
|
||||
}
|
||||
|
||||
// provides the wav data as bas64 encded string
|
||||
std::string audio() {
|
||||
std::ifstream fin(fname, std::ios::binary);
|
||||
std::stringstream m_buffer;
|
||||
m_buffer << fin.rdbuf();
|
||||
return xtl::base64encode(m_buffer.str());
|
||||
}
|
||||
|
||||
// Provides the audion information
|
||||
AudioInfo audioInfo() {
|
||||
return cfg;
|
||||
}
|
||||
|
||||
protected:
|
||||
AudioStream *p_audio_stream=nullptr;
|
||||
ChartT<T> chrt;
|
||||
WAVEncoder wave_encoder;
|
||||
EncodedAudioOutput out;
|
||||
StreamCopyT<T> copier;
|
||||
AudioInfo cfg;
|
||||
string fname;
|
||||
size_t buffer_count=0;
|
||||
};
|
||||
|
||||
/// @brief Default Jupyter audio output with 16-bit samples
|
||||
/// @ingroup io
|
||||
using JupyterAudio = JupyterAudioT<int16_t>;
|
||||
|
||||
} // namespace audio_tools
|
||||
|
||||
/// Disply Chart in Jupyterlab xeus
|
||||
nl::json mime_bundle_repr(Chart &in) {
|
||||
auto bundle = nl::json::object();
|
||||
bundle["text/html"] = in.chartData();
|
||||
return bundle;
|
||||
}
|
||||
|
||||
/// Disply Audio player in Jupyterlab xeus
|
||||
nl::json mime_bundle_repr(JupyterAudio &in) {
|
||||
auto bundle = nl::json::object();
|
||||
in.createWAVFile();
|
||||
bundle["text/html"] = "<audio controls "
|
||||
"src='data:audio/wav;base64," +
|
||||
in.audio() + "'/>";
|
||||
return bundle;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Generic main for desktop arduino emulation
|
||||
*/
|
||||
#ifndef NO_MAIN
|
||||
|
||||
#pragma once
|
||||
void loop();
|
||||
void setup();
|
||||
|
||||
int main (void) {
|
||||
setup();
|
||||
while(true){
|
||||
loop();
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,240 @@
|
||||
#pragma once
|
||||
/**
|
||||
* @file NoArduino.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief If you want to use the framework w/o Arduino you need to provide the
|
||||
* implementation of a couple of classes and methods!
|
||||
* @version 0.1
|
||||
* @date 2022-09-19
|
||||
*
|
||||
* @copyright Copyright (c) 2022
|
||||
*
|
||||
*/
|
||||
#include "AudioToolsConfig.h"
|
||||
#ifdef IS_DESKTOP
|
||||
# error We should not get here!
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm> // std::max
|
||||
#include <chrono>
|
||||
|
||||
|
||||
#define IS_NOARDUINO
|
||||
|
||||
#ifndef PSTR
|
||||
#define PSTR(fmt) fmt
|
||||
#endif
|
||||
|
||||
#ifndef PI
|
||||
#define PI 3.14159265359f
|
||||
#endif
|
||||
|
||||
#ifndef INPUT
|
||||
#define INPUT 0x0
|
||||
#endif
|
||||
|
||||
#ifndef OUTPUT
|
||||
#define OUTPUT 0x1
|
||||
#endif
|
||||
|
||||
#ifndef INPUT_PULLUP
|
||||
#define INPUT_PULLUP 0x2
|
||||
#endif
|
||||
|
||||
#ifndef HIGH
|
||||
#define HIGH 0x1
|
||||
#endif
|
||||
#ifndef LOW
|
||||
#define LOW 0x0
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
enum PrintCharFmt { DEC=10, HEX=16 };
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
class Print {
|
||||
public:
|
||||
#ifndef DOXYGEN
|
||||
virtual size_t write(uint8_t ch) {
|
||||
// not implememnted: to be overritten
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual size_t write(const char *str) {
|
||||
return write((const uint8_t *)str, strlen(str));
|
||||
}
|
||||
|
||||
virtual size_t write(const char *buffer, size_t size) {
|
||||
return write((const uint8_t *)buffer, size);
|
||||
}
|
||||
|
||||
virtual int print(const char *msg) {
|
||||
int result = strlen(msg);
|
||||
return write(msg, result);
|
||||
}
|
||||
|
||||
virtual int println(const char *msg = "") {
|
||||
int result = print(msg);
|
||||
write('\n');
|
||||
return result + 1;
|
||||
}
|
||||
|
||||
virtual int println(float number) {
|
||||
char buffer[120];
|
||||
snprintf(buffer, 120, "%f", number);
|
||||
return println(buffer);
|
||||
}
|
||||
|
||||
virtual int print(float number) {
|
||||
char buffer[120];
|
||||
snprintf(buffer, 120, "%f", number);
|
||||
return print(buffer);
|
||||
}
|
||||
|
||||
virtual int print(int number) {
|
||||
char buffer[80];
|
||||
snprintf(buffer, 80, "%d", number);
|
||||
return print(buffer);
|
||||
}
|
||||
|
||||
virtual int print(char c, PrintCharFmt spec) {
|
||||
char result[5];
|
||||
switch (spec) {
|
||||
case DEC:
|
||||
snprintf(result, 3, "%c", c);
|
||||
return print(result);
|
||||
case HEX:
|
||||
snprintf(result, 3, "%x", c);
|
||||
return print(result);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int println(int value, PrintCharFmt fmt) {
|
||||
return print(value, fmt) + println();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
if (data == nullptr) return 0;
|
||||
for (size_t j = 0; j < len; j++) {
|
||||
write(data[j]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
virtual int availableForWrite() { return 1024; }
|
||||
|
||||
virtual void flush() { /* Empty implementation for backward compatibility */ }
|
||||
|
||||
protected:
|
||||
int _timeout = 10;
|
||||
};
|
||||
|
||||
class Stream : public Print {
|
||||
|
||||
public:
|
||||
virtual ~Stream() = default;
|
||||
virtual int available() { return 0; }
|
||||
virtual size_t readBytes(uint8_t *data, size_t len) { return 0; }
|
||||
#ifndef DOXYGEN
|
||||
virtual int read() { return -1; }
|
||||
virtual int peek() { return -1; }
|
||||
virtual void setTimeout(size_t timeoutMs) {}
|
||||
size_t readBytesUntil(char terminator, char *buffer, size_t length) {
|
||||
for (int j=0;j<length;j++){
|
||||
int val = read();
|
||||
if (val == -1) return j-1;
|
||||
if (val == terminator) return j;
|
||||
buffer[j] = val;
|
||||
}
|
||||
return length;
|
||||
};
|
||||
size_t readBytesUntil(char terminator, uint8_t *buffer, size_t length) {
|
||||
return readBytesUntil(terminator, (char *)buffer, length);
|
||||
}
|
||||
|
||||
#endif
|
||||
operator bool() { return true; }
|
||||
};
|
||||
|
||||
class Client : public Stream {
|
||||
public:
|
||||
void stop() {};
|
||||
virtual int read(uint8_t *buffer, size_t len) { return 0; };
|
||||
virtual int read() { return 0; };
|
||||
bool connected() { return false; };
|
||||
bool connect(const char *ip, int port) { return false; }
|
||||
virtual operator bool() { return false; }
|
||||
};
|
||||
|
||||
class HardwareSerial : public Stream {
|
||||
public:
|
||||
size_t write(uint8_t ch) override { return putchar(ch); }
|
||||
virtual operator bool() { return true; }
|
||||
bool begin(long baudrate, int config=0) { return true; }
|
||||
};
|
||||
|
||||
static HardwareSerial Serial;
|
||||
|
||||
/// Maps input to output values
|
||||
inline long map(long x, long in_min, long in_max, long out_min, long out_max) {
|
||||
return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min;
|
||||
}
|
||||
|
||||
} // namespace audio_tools
|
||||
|
||||
#if defined(ESP32)
|
||||
#include "driver/gpio.h"
|
||||
#include "freertos/FreeRTOS.h" // needed for ESP Arduino < 2.0
|
||||
#include "freertos/FreeRTOSConfig.h"
|
||||
|
||||
/// e.g. for AudioActions
|
||||
inline int digitalRead(int pin) {
|
||||
printf("digitalRead:%d\n", pin);
|
||||
return gpio_get_level((gpio_num_t)pin);
|
||||
}
|
||||
|
||||
inline void digitalWrite(int pin, int value) {
|
||||
gpio_set_level((gpio_num_t)pin, value);
|
||||
}
|
||||
|
||||
inline void pinMode(int pin, int mode) {
|
||||
gpio_num_t gpio_pin = (gpio_num_t)pin;
|
||||
printf("pinMode(%d,%d)\n", pin, mode);
|
||||
|
||||
gpio_reset_pin(gpio_pin);
|
||||
switch (mode) {
|
||||
case INPUT:
|
||||
gpio_set_direction(gpio_pin, GPIO_MODE_INPUT);
|
||||
break;
|
||||
case OUTPUT:
|
||||
gpio_set_direction(gpio_pin, GPIO_MODE_OUTPUT);
|
||||
break;
|
||||
case INPUT_PULLUP:
|
||||
gpio_set_direction(gpio_pin, GPIO_MODE_INPUT);
|
||||
gpio_set_pull_mode(gpio_pin, GPIO_PULLUP_ONLY);
|
||||
break;
|
||||
default:
|
||||
gpio_set_direction(gpio_pin, GPIO_MODE_INPUT_OUTPUT);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
inline void delay(uint32_t ms){ vTaskDelay(ms / portTICK_PERIOD_MS);}
|
||||
inline uint32_t millis() {return (xTaskGetTickCount() * portTICK_PERIOD_MS);}
|
||||
inline void delayMicroseconds(uint32_t ms) {esp_rom_delay_us(ms);}
|
||||
inline uint64_t micros() { return xTaskGetTickCount() * portTICK_PERIOD_MS * 1000;}
|
||||
|
||||
// delay and millis has been defined
|
||||
#define DESKTOP_MILLIS_DEFINED
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioLibs/Desktop/NoArduino.h"
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#ifndef DESKTOP_MILLIS_DEFINED
|
||||
#define DESKTOP_MILLIS_DEFINED
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/// Returns the milliseconds since the start
|
||||
inline uint32_t millis(){
|
||||
using namespace std::chrono;
|
||||
// Get current time with precision of milliseconds
|
||||
auto now = time_point_cast<milliseconds>(system_clock::now());
|
||||
// sys_milliseconds is type time_point<system_clock, milliseconds>
|
||||
using sys_milliseconds = decltype(now);
|
||||
// Convert time_point to signed integral type
|
||||
return now.time_since_epoch().count();
|
||||
}
|
||||
|
||||
// sleep ms milliseconds
|
||||
void delay(unsigned long ms){
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(ms));
|
||||
}
|
||||
|
||||
// sleep us milliseconds
|
||||
void delayMicroseconds(unsigned int us){
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(us));
|
||||
}
|
||||
|
||||
// Returns the micros of milliseconds passed since epich
|
||||
inline unsigned long micros(void){
|
||||
using namespace std::chrono;
|
||||
// Get current time with precision of milliseconds
|
||||
auto now = time_point_cast<microseconds>(system_clock::now());
|
||||
// sys_milliseconds is type time_point<system_clock, milliseconds>
|
||||
using sys_milliseconds = decltype(now);
|
||||
// Convert time_point to signed integral type
|
||||
return now.time_since_epoch().count();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
3987
libraries/audio-tools/src/AudioTools/AudioLibs/FFT/FFTReal.h
Normal file
3987
libraries/audio-tools/src/AudioTools/AudioLibs/FFT/FFTReal.h
Normal file
File diff suppressed because it is too large
Load Diff
265
libraries/audio-tools/src/AudioTools/AudioLibs/FFT/FFTWindows.h
Normal file
265
libraries/audio-tools/src/AudioTools/AudioLibs/FFT/FFTWindows.h
Normal file
@@ -0,0 +1,265 @@
|
||||
/**
|
||||
* @file FFTWindows.h
|
||||
* @author Phil Schatzmann
|
||||
* @brief Different Window functions that can be used by FFT
|
||||
* @version 0.1
|
||||
* @date 2022-04-29
|
||||
*
|
||||
* @copyright Copyright (c) 2022
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <math.h>
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Collections/Vector.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief FFT Window Function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class WindowFunction {
|
||||
public:
|
||||
WindowFunction() = default;
|
||||
|
||||
/// Setup the window function providing the fft length
|
||||
virtual void begin(int samples) {
|
||||
this->samples_minus_1 = -1.0f + samples;
|
||||
this->i_samples = samples;
|
||||
this->i_half_samples = samples / 2;
|
||||
}
|
||||
|
||||
/// Provides the multipication factor at the indicated position. The result is
|
||||
/// symetrically mirrored around the center
|
||||
inline float factor(int idx) {
|
||||
assert(i_half_samples == i_samples / 2);
|
||||
float result = idx <= i_half_samples ? factor_internal(idx)
|
||||
: factor_internal(i_samples - idx - 1);
|
||||
return result > 1.0f ? 1.0f : result;
|
||||
}
|
||||
|
||||
/// Provides the number of samples (fft length)
|
||||
inline int samples() { return i_samples; }
|
||||
|
||||
virtual const char* name() = 0;
|
||||
|
||||
protected:
|
||||
float samples_minus_1 = 0.0f;
|
||||
int i_samples = 0;
|
||||
int i_half_samples = 0;
|
||||
const float twoPi = 6.28318531f;
|
||||
const float fourPi = 12.56637061f;
|
||||
const float sixPi = 18.84955593f;
|
||||
|
||||
// virtual function provide implementation in subclass
|
||||
virtual float factor_internal(int idx) = 0;
|
||||
|
||||
// the ratio idx / samples -1
|
||||
inline float ratio(int idx) {
|
||||
return (static_cast<float>(idx)) / samples_minus_1;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Buffered window function, so that we do not need to re-calculate the
|
||||
* values
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class BufferedWindow : public WindowFunction {
|
||||
public:
|
||||
BufferedWindow(WindowFunction* wf) { p_wf = wf; }
|
||||
|
||||
const char* name() override {
|
||||
static char buffer[80] = "Buffered ";
|
||||
strncpy(buffer + 9, p_wf->name(), 69);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
virtual void begin(int samples) override {
|
||||
// process only if there is a change
|
||||
WindowFunction::begin(samples);
|
||||
if (p_wf->samples() != samples) {
|
||||
p_wf->begin(samples);
|
||||
int to_be_size = i_half_samples + 1;
|
||||
if (buffer.size() != to_be_size) {
|
||||
buffer.resize(to_be_size);
|
||||
for (int j = 0; j <= i_half_samples; j++) {
|
||||
buffer[j] = p_wf->factor(j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
WindowFunction* p_wf = nullptr;
|
||||
Vector<float> buffer{0};
|
||||
|
||||
float factor_internal(int idx) override {
|
||||
if (idx < 0 || idx > i_half_samples) return 0.0;
|
||||
return buffer[idx];
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Rectange FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Rectange : public WindowFunction {
|
||||
public:
|
||||
Rectange() = default;
|
||||
float factor_internal(int idx) {
|
||||
if (idx < 0 || idx >= i_samples) return 0;
|
||||
return 1.0f;
|
||||
}
|
||||
const char* name() { return "Rectange"; }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Hamming FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Hamming : public WindowFunction {
|
||||
public:
|
||||
Hamming() = default;
|
||||
float factor_internal(int idx) {
|
||||
return 0.54f - (0.46f * cos(twoPi * ratio(idx)));
|
||||
}
|
||||
const char* name() { return "Hamming"; }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Hann FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Hann : public WindowFunction {
|
||||
public:
|
||||
Hann() = default;
|
||||
const char* name() { return "Hann"; }
|
||||
|
||||
float factor_internal(int idx) {
|
||||
return 0.54f * (1.0f - cos(twoPi * ratio(idx)));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Triangle FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Triangle : public WindowFunction {
|
||||
public:
|
||||
Triangle() = default;
|
||||
const char* name() { return "Triangle"; }
|
||||
float factor_internal(int idx) {
|
||||
return 1.0f - ((2.0f * fabs((idx - 1) -
|
||||
(static_cast<float>(i_samples - 1) / 2.0f))) /
|
||||
samples_minus_1);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Nuttall FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class Nuttall : public WindowFunction {
|
||||
public:
|
||||
Nuttall() = default;
|
||||
const char* name() override { return "Nuttall"; }
|
||||
float factor_internal(int idx) override {
|
||||
float r = ratio(idx);
|
||||
return 0.355768f - (0.487396f * (cos(twoPi * r))) +
|
||||
(0.144232f * (cos(fourPi * r))) - (0.012604f * (cos(sixPi * r)));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Blackman FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
class Blackman : public WindowFunction {
|
||||
public:
|
||||
Blackman() = default;
|
||||
const char* name() override { return "Blackman"; }
|
||||
float factor_internal(int idx) override {
|
||||
float r = ratio(idx);
|
||||
return 0.42323f - (0.49755f * (cos(twoPi * r))) +
|
||||
(0.07922f * (cos(fourPi * r)));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief BlackmanNuttall FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class BlackmanNuttall : public WindowFunction {
|
||||
public:
|
||||
BlackmanNuttall() = default;
|
||||
const char* name() override { return "BlackmanNuttall"; }
|
||||
float factor_internal(int idx) override {
|
||||
float r = ratio(idx);
|
||||
return 0.3635819f - (0.4891775f * (cos(twoPi * r))) +
|
||||
(0.1365995f * (cos(fourPi * r))) - (0.0106411f * (cos(sixPi * r)));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief BlackmanHarris FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class BlackmanHarris : public WindowFunction {
|
||||
public:
|
||||
BlackmanHarris() = default;
|
||||
const char* name() override { return "BlackmanHarris"; }
|
||||
float factor_internal(int idx) override {
|
||||
float r = ratio(idx);
|
||||
return 0.35875f - (0.48829f * (cos(twoPi * r))) +
|
||||
(0.14128f * (cos(fourPi * r))) - (0.01168f * (cos(sixPi * r)));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief FlatTop FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class FlatTop : public WindowFunction {
|
||||
public:
|
||||
FlatTop() = default;
|
||||
const char* name() override { return "FlatTop"; }
|
||||
float factor_internal(int idx) override {
|
||||
float r = ratio(idx);
|
||||
return 0.2810639f - (0.5208972f * cos(twoPi * r)) +
|
||||
(0.1980399f * cos(fourPi * r));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Welch FFT Window function
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class Welch : public WindowFunction {
|
||||
public:
|
||||
Welch() = default;
|
||||
const char* name() override { return "Welch"; }
|
||||
float factor_internal(int idx) override {
|
||||
float tmp =
|
||||
(((idx - 1) - samples_minus_1 / 2.0f) / (samples_minus_1 / 2.0f));
|
||||
return 1.0f - (tmp * tmp);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
90
libraries/audio-tools/src/AudioTools/AudioLibs/FFTDisplay.h
Normal file
90
libraries/audio-tools/src/AudioTools/AudioLibs/FFTDisplay.h
Normal file
@@ -0,0 +1,90 @@
|
||||
#pragma once
|
||||
#include "AudioTools/AudioLibs/AudioFFT.h"
|
||||
#include "AudioTools/Concurrency/LockGuard.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
class FFTDisplay;
|
||||
static FFTDisplay *selfFFTDisplay = nullptr;
|
||||
#if defined(USE_CONCURRENCY)
|
||||
// fft mutex
|
||||
static Mutex fft_mux;
|
||||
#endif
|
||||
/**
|
||||
* Display FFT result: we can define a start bin and group susequent bins for a
|
||||
* combined result.
|
||||
*/
|
||||
|
||||
class FFTDisplay {
|
||||
public:
|
||||
FFTDisplay(AudioFFTBase &fft) {
|
||||
p_fft = &fft;
|
||||
selfFFTDisplay = this;
|
||||
}
|
||||
|
||||
/// start bin which is displayed
|
||||
int fft_start_bin = 0;
|
||||
/// group result by adding subsequent bins
|
||||
int fft_group_bin = 1;
|
||||
/// Influences the senitivity
|
||||
float fft_max_magnitude = 700.0f;
|
||||
|
||||
void begin() {
|
||||
// assign fft callback
|
||||
AudioFFTConfig &fft_cfg = p_fft->config();
|
||||
fft_cfg.callback = fftCallback;
|
||||
|
||||
// number of bins
|
||||
magnitudes.resize(p_fft->size());
|
||||
for (int j = 0; j < p_fft->size(); j++) {
|
||||
magnitudes[j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the magnitude for the indicated led x position. We might
|
||||
/// need to combine values from the magnitudes array if this is much bigger.
|
||||
float getMagnitude(int x) {
|
||||
// get magnitude from fft
|
||||
float total = 0;
|
||||
for (int j = 0; j < fft_group_bin; j++) {
|
||||
int idx = fft_start_bin + (x * fft_group_bin) + j;
|
||||
if (idx >= magnitudes.size()) {
|
||||
idx = magnitudes.size() - 1;
|
||||
}
|
||||
total += magnitudes[idx];
|
||||
}
|
||||
return total / fft_group_bin;
|
||||
}
|
||||
|
||||
int getMagnitudeScaled(int x, int max) {
|
||||
int result = mapT<float>(getMagnitude(x), 0, fft_max_magnitude, 0.0f,
|
||||
static_cast<float>(max));
|
||||
if (result > max){
|
||||
LOGD("fft_max_magnitude too small: current value is %f", getMagnitude(x))
|
||||
}
|
||||
// limit value to max
|
||||
return min(result, max);
|
||||
}
|
||||
|
||||
/// callback method which provides updated data from fft
|
||||
static void fftCallback(AudioFFTBase &fft) {
|
||||
selfFFTDisplay->loadMangnitudes();
|
||||
};
|
||||
|
||||
protected:
|
||||
AudioFFTBase *p_fft = nullptr;
|
||||
Vector<float> magnitudes{0};
|
||||
|
||||
void loadMangnitudes() {
|
||||
// just save magnitudes to be displayed
|
||||
#if defined(USE_CONCURRENCY)
|
||||
LockGuard guard(fft_mux);
|
||||
#endif
|
||||
for (int j = 0; j < p_fft->size(); j++) {
|
||||
float value = p_fft->magnitude(j);
|
||||
magnitudes[j] = value;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
274
libraries/audio-tools/src/AudioTools/AudioLibs/FFTEffects.h
Normal file
274
libraries/audio-tools/src/AudioTools/AudioLibs/FFTEffects.h
Normal file
@@ -0,0 +1,274 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioTools/AudioLibs/AudioRealFFT.h" // using RealFFT
|
||||
#include "AudioTools/CoreAudio/AudioOutput.h"
|
||||
#include "AudioTools/CoreAudio/StreamCopy.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
static Hann fft_effects_hann;
|
||||
static BufferedWindow fft_effects_buffered_window{&fft_effects_hann};
|
||||
|
||||
/**
|
||||
* @brief Common configuration for FFT effects
|
||||
* @ingroup transform
|
||||
* @author phil schatzmann
|
||||
*/
|
||||
struct FFTEffectConfig : public AudioInfo {
|
||||
int length = 1024;
|
||||
int stride = 512;
|
||||
WindowFunction *window_function = &fft_effects_buffered_window;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Abstract class for common Logic for FFT based effects. The effect is
|
||||
* applied after the fft to the frequency domain before executing the ifft.
|
||||
* Please note that this is quite processing time intensitive: so you might keep
|
||||
* the sample rate quite low if the processor is not fast enough!
|
||||
* @ingroup transform
|
||||
* @author phil schatzmann
|
||||
*/
|
||||
|
||||
class FFTEffect : public AudioOutput {
|
||||
public:
|
||||
FFTEffect(Print &out) {
|
||||
p_out = &out;
|
||||
fft_cfg.ref = this;
|
||||
}
|
||||
|
||||
FFTEffectConfig defaultConfig() {
|
||||
FFTEffectConfig c;
|
||||
return c;
|
||||
}
|
||||
|
||||
bool begin(FFTEffectConfig info) {
|
||||
copier.setLogName("ifft");
|
||||
setAudioInfo(info);
|
||||
fft_cfg.length = info.length;
|
||||
fft_cfg.stride = info.stride > 0 ? info.stride : info.length;
|
||||
fft_cfg.window_function = info.window_function;
|
||||
return begin();
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACED();
|
||||
// copy result to output
|
||||
copier.begin(*p_out, fft);
|
||||
|
||||
// setup fft
|
||||
fft_cfg.copyFrom(audioInfo());
|
||||
fft_cfg.callback = effect_callback;
|
||||
LOGI("length: %d", fft_cfg.length);
|
||||
LOGI("stride: %d", fft_cfg.stride);
|
||||
LOGI("window_function: %s", (fft_cfg.window_function != nullptr)
|
||||
? fft_cfg.window_function->name()
|
||||
: "-");
|
||||
return fft.begin(fft_cfg);
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
TRACED();
|
||||
return fft.write(data, len);
|
||||
}
|
||||
|
||||
protected:
|
||||
Print *p_out = nullptr;
|
||||
AudioRealFFT fft;
|
||||
AudioFFTConfig fft_cfg{fft.defaultConfig(RXTX_MODE)};
|
||||
Hann hann;
|
||||
BufferedWindow buffered{&hann};
|
||||
StreamCopy copier;
|
||||
|
||||
virtual void effect(AudioFFTBase &fft) = 0;
|
||||
|
||||
static void effect_callback(AudioFFTBase &fft) {
|
||||
TRACED();
|
||||
FFTEffect *ref = (FFTEffect *)fft.config().ref;
|
||||
// execute effect
|
||||
ref->effect(fft);
|
||||
// write ifft to output
|
||||
ref->processOutput();
|
||||
}
|
||||
|
||||
void processOutput() {
|
||||
TRACED();
|
||||
while (copier.copy());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Apply Robotize FFT Effect on frequency domain data. See
|
||||
* https://learn.bela.io/tutorials/c-plus-plus-for-real-time-audio-programming/phase-vocoder-part-3/
|
||||
* @ingroup transform
|
||||
* @author phil schatzmann
|
||||
*/
|
||||
class FFTRobotize : public FFTEffect {
|
||||
friend FFTEffect;
|
||||
|
||||
public:
|
||||
FFTRobotize(AudioStream &out) : FFTEffect(out) { addNotifyAudioChange(out); };
|
||||
FFTRobotize(AudioOutput &out) : FFTEffect(out) { addNotifyAudioChange(out); };
|
||||
FFTRobotize(Print &out) : FFTEffect(out) {};
|
||||
|
||||
protected:
|
||||
/// Robotise the output
|
||||
void effect(AudioFFTBase &fft) {
|
||||
TRACED();
|
||||
AudioFFTResult best = fft.result();
|
||||
|
||||
FFTBin bin;
|
||||
for (int n = 0; n < fft.size(); n++) {
|
||||
float amplitude = fft.magnitude(n);
|
||||
|
||||
// update new bin value
|
||||
bin.real = amplitude / best.magnitude;
|
||||
bin.img = 0.0;
|
||||
Serial.println(bin.real);
|
||||
|
||||
fft.setBin(n, bin);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Apply Robotize FFT Effect on frequency domain data. See
|
||||
* https://learn.bela.io/tutorials/c-plus-plus-for-real-time-audio-programming/phase-vocoder-part-3/
|
||||
* @ingroup transform
|
||||
* @author phil schatzmann
|
||||
*/
|
||||
class FFTWhisper : public FFTEffect {
|
||||
friend FFTEffect;
|
||||
|
||||
public:
|
||||
FFTWhisper(AudioStream &out) : FFTEffect(out) { addNotifyAudioChange(out); };
|
||||
FFTWhisper(AudioOutput &out) : FFTEffect(out) { addNotifyAudioChange(out); };
|
||||
FFTWhisper(Print &out) : FFTEffect(out) {};
|
||||
|
||||
protected:
|
||||
/// Robotise the output
|
||||
void effect(AudioFFTBase &fft) {
|
||||
TRACED();
|
||||
FFTBin bin;
|
||||
for (int n = 0; n < fft.size(); n++) {
|
||||
float amplitude = fft.magnitude(n);
|
||||
float phase = rand() / (float)RAND_MAX * 2.f * PI;
|
||||
|
||||
// update new bin value
|
||||
bin.real = cosf(phase) * amplitude;
|
||||
bin.img = sinf(phase) * amplitude;
|
||||
fft.setBin(n, bin);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Apply FFT and IFFT w/o any changes to the frequency domain
|
||||
* @ingroup transform
|
||||
* @author phil schatzmann
|
||||
*/
|
||||
|
||||
class FFTNop : public FFTEffect {
|
||||
friend FFTEffect;
|
||||
|
||||
public:
|
||||
FFTNop(AudioStream &out) : FFTEffect(out) { addNotifyAudioChange(out); };
|
||||
FFTNop(AudioOutput &out) : FFTEffect(out) { addNotifyAudioChange(out); };
|
||||
FFTNop(Print &out) : FFTEffect(out) {};
|
||||
|
||||
protected:
|
||||
/// Do nothing
|
||||
void effect(AudioFFTBase &fft) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Pitch Shift FFT Effect Configuration
|
||||
* @ingroup transform
|
||||
* @author phil schatzmann
|
||||
*/
|
||||
|
||||
struct FFTPitchShiftConfig : public FFTEffectConfig {
|
||||
int shift = 1;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Apply Pitch Shift FFT Effect on frequency domain data: we just move
|
||||
* the bins up or down
|
||||
* @ingroup transform
|
||||
* @author phil schatzmann
|
||||
*/
|
||||
class FFTPitchShift : public FFTEffect {
|
||||
friend FFTEffect;
|
||||
|
||||
public:
|
||||
FFTPitchShift(AudioStream &out) : FFTEffect(out) {
|
||||
addNotifyAudioChange(out);
|
||||
};
|
||||
FFTPitchShift(AudioOutput &out) : FFTEffect(out) {
|
||||
addNotifyAudioChange(out);
|
||||
};
|
||||
FFTPitchShift(Print &out) : FFTEffect(out) {};
|
||||
|
||||
FFTPitchShiftConfig defaultConfig() {
|
||||
FFTPitchShiftConfig result;
|
||||
result.shift = shift;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool begin(FFTPitchShiftConfig psConfig) {
|
||||
setShift(psConfig.shift);
|
||||
FFTEffect::begin(psConfig);
|
||||
return begin();
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
bool rc = FFTEffect::begin();
|
||||
// you can not shift more then you have bins
|
||||
assert(abs(shift) < fft.size());
|
||||
return rc;
|
||||
}
|
||||
|
||||
/// defines how many bins should be shifted up (>0) or down (<0);
|
||||
void setShift(int bins) { shift = bins; }
|
||||
|
||||
protected:
|
||||
int shift = 1;
|
||||
|
||||
/// Pitch Shift
|
||||
void effect(AudioFFTBase &fft) override {
|
||||
TRACED();
|
||||
FFTBin bin;
|
||||
int max = fft.size();
|
||||
|
||||
if (shift < 0) {
|
||||
// copy bins: left shift
|
||||
for (int n = -shift; n < max; n++) {
|
||||
int to_bin = n + shift;
|
||||
assert(to_bin >= 0);
|
||||
assert(to_bin < max);
|
||||
fft.getBin(n, bin);
|
||||
fft.setBin(to_bin, bin);
|
||||
}
|
||||
// clear tail
|
||||
bin.clear();
|
||||
for (int n = max + shift; n < max; n++) {
|
||||
fft.setBin(n, bin);
|
||||
}
|
||||
} else if (shift > 0) {
|
||||
// copy bins: right shift
|
||||
for (int n = max - shift - 1; n >= 0; n--) {
|
||||
int to_bin = n + shift;
|
||||
assert(to_bin >= 0);
|
||||
assert(to_bin < max);
|
||||
fft.getBin(n, bin);
|
||||
fft.setBin(to_bin, bin);
|
||||
}
|
||||
// clear head
|
||||
bin.clear();
|
||||
for (int n = 0; n < shift; n++) {
|
||||
fft.setBin(n, bin);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,3 @@
|
||||
#pragma once
|
||||
#WARNING("Obsolete - use /AudioTools/Communication/HLSStream.h")
|
||||
#include "AudioTools/Communication/HLSStream.h"
|
||||
387
libraries/audio-tools/src/AudioTools/AudioLibs/I2SCodecStream.h
Normal file
387
libraries/audio-tools/src/AudioTools/AudioLibs/I2SCodecStream.h
Normal file
@@ -0,0 +1,387 @@
|
||||
#pragma once
|
||||
#include "AudioBoard.h" // install audio-driver library
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioTools/CoreAudio/AudioI2S/I2SStream.h"
|
||||
|
||||
//#pragma GCC diagnostic ignored "-Wclass-memaccess"
|
||||
|
||||
// Added to be compatible with the AudioKitStream.h
|
||||
#ifndef PIN_AUDIO_KIT_SD_CARD_CS
|
||||
#define PIN_AUDIO_KIT_SD_CARD_CS 13
|
||||
#define PIN_AUDIO_KIT_SD_CARD_MISO 2
|
||||
#define PIN_AUDIO_KIT_SD_CARD_MOSI 15
|
||||
#define PIN_AUDIO_KIT_SD_CARD_CLK 14
|
||||
#endif
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Configuration for I2SCodecStream
|
||||
* @ingroup io
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
struct I2SCodecConfig : public I2SConfig {
|
||||
input_device_t input_device = ADC_INPUT_LINE1;
|
||||
output_device_t output_device = DAC_OUTPUT_ALL;
|
||||
// to be compatible with the AudioKitStream -> do not activate SD spi if false
|
||||
bool sd_active = true;
|
||||
// define pin source in driver configuration
|
||||
PinFunction i2s_function = PinFunction::UNDEFINED; //CODEC;
|
||||
bool operator==(I2SCodecConfig alt) {
|
||||
return input_device == alt.input_device &&
|
||||
output_device == alt.output_device && *((AudioInfo *)this) == alt;
|
||||
}
|
||||
|
||||
bool operator!=(I2SCodecConfig alt) { return !(*this == alt); }
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief I2S Stream which also sets up a codec chip and i2s
|
||||
* @ingroup io
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class I2SCodecStream : public AudioStream, public VolumeSupport {
|
||||
public:
|
||||
/// Default Constructor (w/o codec)
|
||||
I2SCodecStream() = default;
|
||||
/**
|
||||
* @brief Default constructor: for available AudioBoard values check
|
||||
* audioboard variables in
|
||||
* https://pschatzmann.github.io/arduino-audio-driver/html/group__audio__driver.html
|
||||
* Further information can be found in
|
||||
* https://github.com/pschatzmann/arduino-audio-driver/wiki
|
||||
*/
|
||||
I2SCodecStream(AudioBoard &board) { setBoard(board); }
|
||||
/// Provide board via pointer
|
||||
I2SCodecStream(AudioBoard *board) { setBoard(board); }
|
||||
|
||||
/// Provides the default configuration
|
||||
I2SCodecConfig defaultConfig(RxTxMode mode = TX_MODE) {
|
||||
auto cfg1 = i2s.defaultConfig(mode);
|
||||
I2SCodecConfig cfg;
|
||||
memcpy(&cfg, &cfg1, sizeof(cfg1));
|
||||
cfg.input_device = ADC_INPUT_LINE1;
|
||||
cfg.output_device = DAC_OUTPUT_ALL;
|
||||
cfg.sd_active = true;
|
||||
cfg.rx_tx_mode = mode;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
bool begin() {
|
||||
TRACED();
|
||||
return begin(cfg);
|
||||
}
|
||||
|
||||
/// Starts the I2S interface
|
||||
virtual bool begin(I2SCodecConfig cfg) {
|
||||
TRACED();
|
||||
this->cfg = cfg;
|
||||
this->info = cfg;
|
||||
return begin1();
|
||||
}
|
||||
|
||||
/// Stops the I2S interface
|
||||
void end() {
|
||||
TRACED();
|
||||
if (p_board) p_board->end();
|
||||
i2s.end();
|
||||
is_active = false;
|
||||
}
|
||||
|
||||
/// updates the sample rate dynamically
|
||||
virtual void setAudioInfo(AudioInfo info) {
|
||||
TRACEI();
|
||||
AudioStream::setAudioInfo(info);
|
||||
i2s.setAudioInfo(info);
|
||||
|
||||
cfg.sample_rate = info.sample_rate;
|
||||
cfg.bits_per_sample = info.bits_per_sample;
|
||||
cfg.channels = info.channels;
|
||||
|
||||
// update codec_cfg
|
||||
codec_cfg.i2s.bits = toCodecBits(cfg.bits_per_sample);
|
||||
codec_cfg.i2s.rate = toRate(cfg.sample_rate);
|
||||
|
||||
// return if we we are not ready
|
||||
if (!is_active || p_board == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
// return if there is nothing to do
|
||||
if (cfg.sample_rate == info.sample_rate &&
|
||||
cfg.bits_per_sample == info.bits_per_sample &&
|
||||
cfg.channels == info.channels) {
|
||||
return;
|
||||
}
|
||||
|
||||
// update cfg
|
||||
p_board->setConfig(codec_cfg);
|
||||
}
|
||||
|
||||
/// Writes the audio data to I2S
|
||||
virtual size_t write(const uint8_t *data, size_t len) {
|
||||
LOGD("I2SStream::write: %d", len);
|
||||
return i2s.write(data, len);
|
||||
}
|
||||
|
||||
/// Reads the audio data
|
||||
virtual size_t readBytes(uint8_t *data, size_t len) override {
|
||||
return i2s.readBytes(data, len);
|
||||
}
|
||||
|
||||
/// Provides the available audio data
|
||||
virtual int available() override { return i2s.available(); }
|
||||
|
||||
/// Provides the available audio data
|
||||
virtual int availableForWrite() override { return i2s.availableForWrite(); }
|
||||
|
||||
/// sets the volume (range 0.0f - 1.0f)
|
||||
bool setVolume(float vol) override {
|
||||
VolumeSupport::setVolume(vol);
|
||||
if (!is_active || p_board == nullptr) return false;
|
||||
return p_board->setVolume(vol * 100.0);
|
||||
}
|
||||
|
||||
/// Provides the actual volume (0.0f - 1.0f)
|
||||
float volume() override {
|
||||
if (p_board == nullptr) return 0.0f;
|
||||
return static_cast<float>(p_board->getVolume()) / 100.0f;
|
||||
}
|
||||
/// legacy: same as volume()
|
||||
float getVolume() { return volume(); }
|
||||
|
||||
/// Mute / unmote
|
||||
bool setMute(bool mute) {
|
||||
if (p_board == nullptr) return false;
|
||||
return p_board->setMute(mute);
|
||||
}
|
||||
/// Mute / unmute of an individual line (codec)
|
||||
bool setMute(bool mute, int line) {
|
||||
if (p_board == nullptr) return false;
|
||||
return p_board->setMute(mute, line);
|
||||
}
|
||||
|
||||
/// Sets the output of the PA Power Pin
|
||||
bool setPAPower(bool active) {
|
||||
if (p_board == nullptr) return false;
|
||||
return p_board->setPAPower(active);
|
||||
}
|
||||
|
||||
/// Sets the volume of the microphone (if available)
|
||||
bool setInputVolume(float vol){
|
||||
if (!is_active || p_board == nullptr) return false;
|
||||
return p_board->setInputVolume(100.0 * vol);
|
||||
}
|
||||
|
||||
/// Provides the board
|
||||
AudioBoard &board() { return *p_board; }
|
||||
/// (re)defines the board
|
||||
void setBoard(AudioBoard &board) { p_board = &board; }
|
||||
/// (re)defines the board
|
||||
void setBoard(AudioBoard *board) { p_board = board; }
|
||||
/// checks if a board has been defined
|
||||
bool hasBoard() { return p_board != nullptr; }
|
||||
|
||||
/// Provides the gpio for the indicated function
|
||||
GpioPin getPinID(PinFunction function) {
|
||||
if (p_board == nullptr) return -1;
|
||||
return p_board->getPins().getPinID(function);
|
||||
}
|
||||
|
||||
/// Provides the gpio for the indicated function
|
||||
GpioPin getPinID(PinFunction function, int pos) {
|
||||
if (p_board == nullptr) return -1;
|
||||
return p_board->getPins().getPinID(function, pos);
|
||||
}
|
||||
|
||||
/// Provides the gpio for the indicated key pos
|
||||
GpioPin getKey(int pos) { return getPinID(PinFunction::KEY, pos); }
|
||||
|
||||
/// Provides access to the pin information
|
||||
DriverPins &getPins() { return p_board->getPins(); }
|
||||
|
||||
/// Provides the i2s driver
|
||||
I2SDriver *driver() { return i2s.driver(); }
|
||||
|
||||
protected:
|
||||
I2SStream i2s;
|
||||
I2SCodecConfig cfg;
|
||||
CodecConfig codec_cfg;
|
||||
AudioBoard *p_board = nullptr;
|
||||
bool is_active = false;
|
||||
|
||||
bool begin1() {
|
||||
TRACED();
|
||||
setupI2SFunction();
|
||||
setupI2SPins();
|
||||
if (!beginCodec(cfg)) {
|
||||
TRACEE();
|
||||
is_active = false;
|
||||
return false;
|
||||
}
|
||||
is_active = i2s.begin(cfg);
|
||||
|
||||
// if setvolume was called before begin
|
||||
float tobeVol = VolumeSupport::volume();
|
||||
if (is_active && tobeVol >= 0.0f) {
|
||||
setVolume(tobeVol);
|
||||
}
|
||||
return is_active;
|
||||
}
|
||||
|
||||
/// if the cfg.i2s_function was not defined we determine the "correct" default value
|
||||
void setupI2SFunction() {
|
||||
if (cfg.i2s_function == PinFunction::UNDEFINED){
|
||||
if (cfg.rx_tx_mode == RX_MODE){
|
||||
auto i2s = p_board->getPins().getI2SPins(PinFunction::CODEC_ADC);
|
||||
if (i2s){
|
||||
cfg.i2s_function = PinFunction::CODEC_ADC;
|
||||
LOGI("using i2s_function: CODEC_ADC");
|
||||
} else {
|
||||
cfg.i2s_function = PinFunction::CODEC;
|
||||
}
|
||||
} else {
|
||||
cfg.i2s_function = PinFunction::CODEC;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We use the board pins if they are available
|
||||
void setupI2SPins() {
|
||||
TRACED();
|
||||
// determine relevant I2S pins from driver configuration
|
||||
auto i2s = getI2SPins();
|
||||
if (i2s) {
|
||||
// determine i2s pins from board definition
|
||||
PinsI2S i2s_pins = i2s.value();
|
||||
cfg.pin_bck = i2s_pins.bck;
|
||||
cfg.pin_mck = i2s_pins.mclk;
|
||||
cfg.pin_ws = i2s_pins.ws;
|
||||
switch (cfg.rx_tx_mode) {
|
||||
case RX_MODE:
|
||||
cfg.pin_data = i2s_pins.data_in;
|
||||
break;
|
||||
case TX_MODE:
|
||||
cfg.pin_data = i2s_pins.data_out;
|
||||
break;
|
||||
default:
|
||||
cfg.pin_data = i2s_pins.data_out;
|
||||
cfg.pin_data_rx = i2s_pins.data_in;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
audio_driver_local::Optional<PinsI2S> getI2SPins(){
|
||||
TRACED();
|
||||
audio_driver_local::Optional<PinsI2S> i2s;
|
||||
// Deterine I2S pins
|
||||
return p_board->getPins().getI2SPins(cfg.i2s_function);
|
||||
}
|
||||
|
||||
bool beginCodec(I2SCodecConfig info) {
|
||||
TRACED();
|
||||
switch (cfg.rx_tx_mode) {
|
||||
case RX_MODE:
|
||||
codec_cfg.input_device = info.input_device;
|
||||
codec_cfg.output_device = DAC_OUTPUT_NONE;
|
||||
break;
|
||||
case TX_MODE:
|
||||
codec_cfg.output_device = info.output_device;
|
||||
codec_cfg.input_device = ADC_INPUT_NONE;
|
||||
break;
|
||||
default:
|
||||
codec_cfg.input_device = info.input_device;
|
||||
codec_cfg.output_device = info.output_device;
|
||||
break;
|
||||
}
|
||||
codec_cfg.sd_active = info.sd_active;
|
||||
LOGD("input: %d", info.input_device);
|
||||
LOGD("output: %d", info.output_device);
|
||||
codec_cfg.i2s.bits = toCodecBits(info.bits_per_sample);
|
||||
codec_cfg.i2s.rate = toRate(info.sample_rate);
|
||||
codec_cfg.i2s.fmt = toFormat(info.i2s_format);
|
||||
codec_cfg.i2s.signal_type = (signal_t) info.signal_type;
|
||||
// use reverse logic for codec setting
|
||||
codec_cfg.i2s.mode = info.is_master ? MODE_SLAVE : MODE_MASTER;
|
||||
if (p_board == nullptr) return false;
|
||||
|
||||
// setup driver only on changes
|
||||
return p_board->begin(codec_cfg);
|
||||
}
|
||||
|
||||
sample_bits_t toCodecBits(int bits) {
|
||||
switch (bits) {
|
||||
case 16:
|
||||
LOGD("BIT_LENGTH_16BITS");
|
||||
return BIT_LENGTH_16BITS;
|
||||
case 24:
|
||||
LOGD("BIT_LENGTH_24BITS");
|
||||
return BIT_LENGTH_24BITS;
|
||||
case 32:
|
||||
LOGD("BIT_LENGTH_32BITS");
|
||||
return BIT_LENGTH_32BITS;
|
||||
}
|
||||
LOGE("Unsupported bits: %d", bits);
|
||||
return BIT_LENGTH_16BITS;
|
||||
}
|
||||
samplerate_t toRate(int rate) {
|
||||
if (rate <= 8000) {
|
||||
LOGD("RATE_8K");
|
||||
return RATE_8K;
|
||||
}
|
||||
if (rate <= 11000) {
|
||||
LOGD("RATE_11K");
|
||||
return RATE_11K;
|
||||
}
|
||||
if (rate <= 16000) {
|
||||
LOGD("RATE_16K");
|
||||
return RATE_16K;
|
||||
}
|
||||
if (rate <= 22050) {
|
||||
LOGD("RATE_22K");
|
||||
return RATE_22K;
|
||||
}
|
||||
if (rate <= 32000) {
|
||||
LOGD("RATE_32K");
|
||||
return RATE_32K;
|
||||
}
|
||||
if (rate <= 44100) {
|
||||
LOGD("RATE_44K");
|
||||
return RATE_44K;
|
||||
}
|
||||
if (rate <= 48000 || rate > 48000) {
|
||||
LOGD("RATE_48K");
|
||||
return RATE_44K;
|
||||
}
|
||||
LOGE("Invalid rate: %d using 44K", rate);
|
||||
return RATE_44K;
|
||||
}
|
||||
|
||||
i2s_format_t toFormat(I2SFormat fmt) {
|
||||
switch (fmt) {
|
||||
case I2S_PHILIPS_FORMAT:
|
||||
case I2S_STD_FORMAT:
|
||||
LOGD("I2S_NORMAL");
|
||||
return I2S_NORMAL;
|
||||
case I2S_LEFT_JUSTIFIED_FORMAT:
|
||||
case I2S_MSB_FORMAT:
|
||||
LOGD("I2S_LEFT");
|
||||
return I2S_LEFT;
|
||||
case I2S_RIGHT_JUSTIFIED_FORMAT:
|
||||
case I2S_LSB_FORMAT:
|
||||
LOGD("I2S_RIGHT");
|
||||
return I2S_RIGHT;
|
||||
case I2S_PCM:
|
||||
LOGD("I2S_DSP");
|
||||
return I2S_DSP;
|
||||
default:
|
||||
LOGE("unsupported mode");
|
||||
return I2S_NORMAL;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
4
libraries/audio-tools/src/AudioTools/AudioLibs/Jupyter.h
Normal file
4
libraries/audio-tools/src/AudioTools/AudioLibs/Jupyter.h
Normal file
@@ -0,0 +1,4 @@
|
||||
#pragma once
|
||||
#include "Desktop/Time.h"
|
||||
#include "Desktop/JupyterAudio.h"
|
||||
#include "Desktop/File.h"
|
||||
282
libraries/audio-tools/src/AudioTools/AudioLibs/LEDOutput.h
Normal file
282
libraries/audio-tools/src/AudioTools/AudioLibs/LEDOutput.h
Normal file
@@ -0,0 +1,282 @@
|
||||
#pragma once
|
||||
#include <FastLED.h>
|
||||
|
||||
#include "AudioTools/CoreAudio/AudioBasic/Collections/Vector.h"
|
||||
#include "AudioTools/AudioLibs/AudioFFT.h"
|
||||
#include "FFTDisplay.h"
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
class LEDOutput;
|
||||
struct LEDOutputConfig;
|
||||
|
||||
// default callback function which implements led update
|
||||
void fftLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix);
|
||||
// led update for volume
|
||||
void volumeLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix);
|
||||
// default color
|
||||
CHSV getDefaultColor(int x, int y, int magnitude);
|
||||
|
||||
/**
|
||||
* LED Matrix Configuration. Provide the number of leds in x and y direction and
|
||||
* the data pin.
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
struct LEDOutputConfig {
|
||||
/// Number of leds in x direction
|
||||
int x = 0;
|
||||
/// Number of leds in y direction
|
||||
int y = 1;
|
||||
/// optinal custom logic to provide CHSV color: Prividing a 'rainbow' color
|
||||
/// with hue 0-255, saturating 0-255, and brightness (value) 0-255 (v2)
|
||||
CHSV (*color_callback)(int x, int y, int magnitude) = getDefaultColor;
|
||||
/// Custom callback logic to update the LEDs - by default we use
|
||||
/// fftLEDOutput()
|
||||
void (*update_callback)(LEDOutputConfig *cfg, LEDOutput *matrix) = nullptr;
|
||||
/// Update the leds only ever nth call
|
||||
int update_frequency = 1; // update every call
|
||||
bool is_serpentine_layout = true;
|
||||
bool is_matrix_vertical = true;
|
||||
/// Influences the senitivity
|
||||
int max_magnitude = 700;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief LED output using the FastLED library.
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
class LEDOutput {
|
||||
public:
|
||||
/// @brief Default Constructor
|
||||
LEDOutput() = default;
|
||||
|
||||
/// @brief Constructor for FFT scenario
|
||||
/// @param fft
|
||||
LEDOutput(FFTDisplay &fft) {
|
||||
p_fft = &fft;
|
||||
cfg.update_callback = fftLEDOutput;
|
||||
}
|
||||
|
||||
/// @brief Constructor for VolumeMeter scenario
|
||||
/// @param vol
|
||||
LEDOutput(VolumeMeter &vol) {
|
||||
p_vol = &vol;
|
||||
cfg.update_callback = volumeLEDOutput;
|
||||
}
|
||||
|
||||
/// Provides the default config object
|
||||
LEDOutputConfig defaultConfig() { return cfg; }
|
||||
|
||||
/// Setup Led matrix
|
||||
bool begin(LEDOutputConfig config) {
|
||||
cfg = config;
|
||||
if (ledCount() == 0) {
|
||||
LOGE("x or y == 0");
|
||||
return false;
|
||||
}
|
||||
|
||||
// allocate leds
|
||||
leds.resize(ledCount());
|
||||
for (int j = 0; j < ledCount(); j++) {
|
||||
led(j) = CRGB::Black;
|
||||
}
|
||||
|
||||
// clear LED
|
||||
FastLED.clear(); // clear all pixel data
|
||||
|
||||
if (p_fft != nullptr) {
|
||||
p_fft->begin();
|
||||
}
|
||||
|
||||
max_column = -1;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Provides the number of LEDs: call begin() first!
|
||||
int ledCount() {
|
||||
int num_leds = cfg.x * cfg.y;
|
||||
return num_leds;
|
||||
}
|
||||
|
||||
/// Provides the address fo the CRGB array: call begin() first!
|
||||
CRGB *ledData() {
|
||||
if (ledCount() == 0) {
|
||||
LOGE("x or y == 0");
|
||||
return nullptr;
|
||||
}
|
||||
// leds.resize(ledCount());
|
||||
return leds.data();
|
||||
}
|
||||
|
||||
/// Updates the display: call this method in your loop
|
||||
virtual void update() {
|
||||
if (cfg.update_callback != nullptr && count++ % cfg.update_frequency == 0) {
|
||||
// use custom update logic defined in config
|
||||
cfg.update_callback(&cfg, this);
|
||||
} else {
|
||||
display();
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the led with the help of the x and y pos
|
||||
CRGB &ledXY(uint8_t x, uint8_t y) {
|
||||
if (x > cfg.x) x = cfg.x - 1;
|
||||
if (x < 0) x = 0;
|
||||
if (y > cfg.y) y = cfg.y - 1;
|
||||
if (y < 0) y = 0;
|
||||
int index = xy(x, y);
|
||||
return leds[index];
|
||||
}
|
||||
|
||||
/// Determine the led with the help of the index pos
|
||||
CRGB &led(uint8_t index) {
|
||||
if (index > cfg.x * cfg.y) return not_valid;
|
||||
return leds[index];
|
||||
}
|
||||
|
||||
/// Update the indicated column with the indicated bar
|
||||
void setColumnBar(int x, int currY) {
|
||||
// update vertical bar
|
||||
for (uint8_t y = 0; y < currY; y++) {
|
||||
// determine color
|
||||
CHSV color = cfg.color_callback(x, y, currY);
|
||||
// update LED
|
||||
ledXY(x, y) = color;
|
||||
}
|
||||
for (uint8_t y = currY; y < cfg.y; y++) {
|
||||
ledXY(x, y) = CRGB::Black;
|
||||
}
|
||||
if (x > max_column) max_column = x;
|
||||
}
|
||||
|
||||
/// Update the last column with the indicated bar
|
||||
void setColumnBar(int currY) { setColumnBar(cfg.x - 1, currY); }
|
||||
|
||||
/// Update the last column with the indicated bar
|
||||
void addColumnBar(int currY) {
|
||||
max_column++;
|
||||
if (max_column >= cfg.x) {
|
||||
addEmptyColumn();
|
||||
}
|
||||
if (max_column > cfg.x - 1) {
|
||||
max_column = cfg.x - 1;
|
||||
}
|
||||
setColumnBar(max_column, currY);
|
||||
}
|
||||
|
||||
/// Provides access to the actual config object. E.g. to change the update logic
|
||||
LEDOutputConfig &config() { return cfg; }
|
||||
|
||||
///Provodes the max magnitude for both the
|
||||
virtual float getMaxMagnitude() {
|
||||
// get magnitude from
|
||||
if (p_vol != nullptr) {
|
||||
return p_vol->volume();
|
||||
}
|
||||
float max = 0;
|
||||
if (p_fft != nullptr) {
|
||||
for (int j = 0; j < cfg.x; j++) {
|
||||
float value = p_fft->getMagnitude(j);
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
/// Update the led_matrix (calling FastLED.show();
|
||||
void display() {
|
||||
FastLED.show();
|
||||
}
|
||||
|
||||
/// Provides acces to the FFTDisplay object
|
||||
FFTDisplay &fftDisplay() { return *p_fft; }
|
||||
|
||||
protected:
|
||||
friend class AudioFFTBase;
|
||||
CRGB not_valid;
|
||||
Vector<CRGB> leds{0};
|
||||
LEDOutputConfig cfg;
|
||||
VolumeMeter *p_vol = nullptr;
|
||||
FFTDisplay *p_fft = nullptr;
|
||||
uint64_t count = 0;
|
||||
int max_column = -1;
|
||||
|
||||
/// Adds an empty column to the end shifting the content to the left
|
||||
void addEmptyColumn() {
|
||||
for (int x = 1; x < cfg.x; x++) {
|
||||
for (int y = 0; y < cfg.y; y++) {
|
||||
ledXY(x - 1, y) = ledXY(x, y);
|
||||
}
|
||||
}
|
||||
for (int y = 0; y < cfg.y; y++) {
|
||||
ledXY(cfg.x - 1, y) = CRGB::Black;
|
||||
}
|
||||
}
|
||||
|
||||
uint16_t xy(uint8_t x, uint8_t y) {
|
||||
uint16_t i;
|
||||
|
||||
if (cfg.is_serpentine_layout == false) {
|
||||
if (cfg.is_matrix_vertical == false) {
|
||||
i = (y * cfg.x) + x;
|
||||
} else {
|
||||
i = cfg.y * (cfg.x - (x + 1)) + y;
|
||||
}
|
||||
}
|
||||
|
||||
if (cfg.is_serpentine_layout == true) {
|
||||
if (cfg.is_matrix_vertical == false) {
|
||||
if (y & 0x01) {
|
||||
// Odd rows run backwards
|
||||
uint8_t reverseX = (cfg.x - 1) - x;
|
||||
i = (y * cfg.x) + reverseX;
|
||||
} else {
|
||||
// Even rows run forwards
|
||||
i = (y * cfg.x) + x;
|
||||
}
|
||||
} else { // vertical positioning
|
||||
if (x & 0x01) {
|
||||
i = cfg.y * (cfg.x - (x + 1)) + y;
|
||||
} else {
|
||||
i = cfg.y * (cfg.x - x) - (y + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
};
|
||||
|
||||
/// Default update implementation which provides the fft result as "barchart"
|
||||
void fftLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix) {
|
||||
// process horizontal
|
||||
LockGuard guard(fft_mux);
|
||||
for (int x = 0; x < cfg->x; x++) {
|
||||
// max y determined by magnitude
|
||||
int currY = matrix->fftDisplay().getMagnitudeScaled(x, cfg->y);
|
||||
LOGD("x: %d, y: %d", x, currY);
|
||||
matrix->setColumnBar(x, currY);
|
||||
}
|
||||
FastLED.show();
|
||||
}
|
||||
|
||||
/// Default update implementation which provides the fft result as "barchart"
|
||||
void volumeLEDOutput(LEDOutputConfig *cfg, LEDOutput *matrix) {
|
||||
float vol = matrix->getMaxMagnitude();
|
||||
int currY = mapT<float>(vol, 0,
|
||||
cfg->max_magnitude, 0.0f,
|
||||
static_cast<float>(cfg->y));
|
||||
matrix->addColumnBar(currY);
|
||||
FastLED.show();
|
||||
}
|
||||
|
||||
/// Default logic to update the color for the indicated x,y position
|
||||
CHSV getDefaultColor(int x, int y, int magnitude) {
|
||||
int color = map(magnitude, 0, 7, 255, 0);
|
||||
return CHSV(color, 255, 100); // blue CHSV(160, 255, 255
|
||||
}
|
||||
|
||||
} // namespace audio_tools
|
||||
193
libraries/audio-tools/src/AudioTools/AudioLibs/LEDOutputUnoR4.h
Normal file
193
libraries/audio-tools/src/AudioTools/AudioLibs/LEDOutputUnoR4.h
Normal file
@@ -0,0 +1,193 @@
|
||||
#pragma once
|
||||
#include "Arduino_LED_Matrix.h"
|
||||
#include "AudioTools/AudioLibs/AudioFFT.h"
|
||||
#include "FFTDisplay.h"
|
||||
|
||||
namespace audio_tools {
|
||||
class LEDOutputUnoR4;
|
||||
struct LEDOutputUnoR4Config;
|
||||
|
||||
// default callback function which implements led update based on fft
|
||||
void fftLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix);
|
||||
// led update for volume
|
||||
void volumeLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix);
|
||||
|
||||
/**
|
||||
* LED Matrix Configuration. Provide the number of leds in x and y direction and
|
||||
* the data pin.
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
struct LEDOutputUnoR4Config {
|
||||
/// Custom callback logic to update the LEDs when update() is called
|
||||
void (*update_callback)(LEDOutputUnoR4Config *cfg,
|
||||
LEDOutputUnoR4 *matrix) = nullptr;
|
||||
/// Update the leds only ever nth call
|
||||
int update_frequency = 1; // update every call
|
||||
/// Number of LEDs in a rows
|
||||
int x = 12;
|
||||
/// Number of LEDs in a column
|
||||
int y = 8;
|
||||
/// when true 0,0 is in the lower left corder
|
||||
bool y_mirror = true;
|
||||
/// Influences the senitivity
|
||||
int max_magnitude = 700;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief LED output using the R4 LED matrix library.
|
||||
* @ingroup io
|
||||
* @author Phil Schatzmann
|
||||
*/
|
||||
class LEDOutputUnoR4 {
|
||||
|
||||
public:
|
||||
/// @brief Default Constructor
|
||||
LEDOutputUnoR4() = default;
|
||||
|
||||
/// @brief Constructor for FFT scenario
|
||||
/// @param fft
|
||||
LEDOutputUnoR4(FFTDisplay &fft) {
|
||||
p_fft = &fft;
|
||||
cfg.update_callback = fftLEDOutputUnoR4;
|
||||
}
|
||||
|
||||
/// @brief Constructor for VolumeMeter scenario
|
||||
/// @param vol
|
||||
LEDOutputUnoR4(VolumeMeter &vol) {
|
||||
p_vol = &vol;
|
||||
cfg.update_callback = volumeLEDOutputUnoR4;
|
||||
}
|
||||
|
||||
/// Provides the default config object
|
||||
LEDOutputUnoR4Config defaultConfig() { return cfg; }
|
||||
|
||||
/// Starts the processing with the default configuration
|
||||
bool begin() { return begin(defaultConfig()); }
|
||||
|
||||
/// Setup Led matrix
|
||||
bool begin(LEDOutputUnoR4Config config) {
|
||||
cfg = config;
|
||||
frame.resize(cfg.x * cfg.y);
|
||||
led_matrix.begin();
|
||||
max_column = -1;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Updates the display by calling the update callback method: call this method in your loop
|
||||
virtual void update() {
|
||||
if (cfg.update_callback != nullptr && count++ % cfg.update_frequency == 0) {
|
||||
// use custom update logic defined in config
|
||||
cfg.update_callback(&cfg, this);
|
||||
} else {
|
||||
display();
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine the led with the help of the x and y pos
|
||||
bool &ledXY(uint8_t x, uint8_t y) {
|
||||
if (cfg.y_mirror) y = cfg.y - y - 1;
|
||||
return frame[x + (y * cfg.x)];
|
||||
}
|
||||
|
||||
/// Provodes the max magnitude for the VolumeMeter and FFT scenario
|
||||
virtual float getMaxMagnitude() {
|
||||
// get magnitude from
|
||||
if (p_vol != nullptr) {
|
||||
return p_vol->volume();
|
||||
}
|
||||
float max = 0;
|
||||
if (p_fft != nullptr) {
|
||||
for (int j = 0; j < cfg.x; j++) {
|
||||
float value = p_fft->getMagnitude(j);
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
/// Update the indicated column with the indicated bar
|
||||
void setColumnBar(int x, int currY) {
|
||||
// update vertical bar
|
||||
for (uint8_t y = 0; y < currY; y++) {
|
||||
// update LED
|
||||
ledXY(x, y) = true;
|
||||
}
|
||||
for (uint8_t y = currY; y < cfg.y; y++) {
|
||||
ledXY(x, y) = false;
|
||||
}
|
||||
if (x > max_column) max_column = x;
|
||||
}
|
||||
|
||||
/// Update the last column with the indicated bar
|
||||
void addColumnBar(int currY) {
|
||||
max_column++;
|
||||
if (max_column >= cfg.x) {
|
||||
addEmptyColumn();
|
||||
}
|
||||
if (max_column > cfg.x - 1) {
|
||||
max_column = cfg.x - 1;
|
||||
}
|
||||
setColumnBar(max_column, currY);
|
||||
}
|
||||
|
||||
/// Provides access to the actual config object. E.g. to change the update logic
|
||||
LEDOutputUnoR4Config &config() { return cfg; }
|
||||
|
||||
/// Update the led_matrix
|
||||
void display() {
|
||||
led_matrix.loadPixels((uint8_t *)frame.data(), cfg.x * cfg.y);
|
||||
}
|
||||
|
||||
/// Provides access to the FFTDisplay object
|
||||
FFTDisplay& fftDisplay() {
|
||||
return *p_fft;
|
||||
}
|
||||
|
||||
protected:
|
||||
friend class AudioFFTBase;
|
||||
LEDOutputUnoR4Config cfg;
|
||||
FFTDisplay *p_fft = nullptr;
|
||||
VolumeMeter *p_vol = nullptr;
|
||||
uint64_t count = 0;
|
||||
ArduinoLEDMatrix led_matrix;
|
||||
Vector<bool> frame{0};
|
||||
int max_column = -1;
|
||||
|
||||
/// Adds an empty column to the end shifting the content to the left
|
||||
void addEmptyColumn() {
|
||||
for (int x = 1; x < cfg.x; x++) {
|
||||
for (int y = 0; y < cfg.y; y++) {
|
||||
ledXY(x - 1, y) = ledXY(x, y);
|
||||
}
|
||||
}
|
||||
for (int y = 0; y < cfg.y; y++) {
|
||||
ledXY(cfg.x - 1, y) = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Default update implementation which provides the fft result as "barchart"
|
||||
void fftLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix) {
|
||||
// process horizontal
|
||||
for (int x = 0; x < cfg->x; x++) {
|
||||
// max y determined by magnitude
|
||||
int currY = matrix->fftDisplay().getMagnitudeScaled(x, cfg->y);
|
||||
LOGD("x: %d, y: %d", x, currY);
|
||||
matrix->setColumnBar(x, currY);
|
||||
}
|
||||
matrix->display();
|
||||
}
|
||||
|
||||
/// Default update implementation which provides the fft result as "barchart"
|
||||
void volumeLEDOutputUnoR4(LEDOutputUnoR4Config *cfg, LEDOutputUnoR4 *matrix) {
|
||||
float vol = matrix->getMaxMagnitude();
|
||||
int currY = mapT<float>(vol, 0.0,
|
||||
cfg->max_magnitude, 0.0f,
|
||||
static_cast<float>(cfg->y));
|
||||
matrix->addColumnBar(currY);
|
||||
matrix->display();
|
||||
}
|
||||
|
||||
} // namespace audio_tools
|
||||
@@ -0,0 +1,78 @@
|
||||
#pragma once
|
||||
#include "AudioToolsConfig.h"
|
||||
#include "AudioTools/CoreAudio/AudioTypes.h"
|
||||
#include "maximilian.h"
|
||||
#include "libs/maxiClock.h"
|
||||
|
||||
// Maximilian play function - return an array of 2 channels
|
||||
void play(maxi_float_t *channels);//run dac!
|
||||
void play1(maxi_float_t *channels);//run dac!
|
||||
void play2(maxi_float_t *channels);//run dac!
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief AudioTools integration with Maximilian
|
||||
* @ingroup dsp
|
||||
*/
|
||||
class Maximilian : public VolumeSupport {
|
||||
public:
|
||||
|
||||
Maximilian(Print &out, int bufferSize=DEFAULT_BUFFER_SIZE, void (*callback)(maxi_float_t *channels)=play){
|
||||
buffer_size = bufferSize;
|
||||
p_sink = &out;
|
||||
this->callback = callback;
|
||||
}
|
||||
|
||||
~Maximilian() {
|
||||
}
|
||||
|
||||
/// Setup Maximilian with audio parameters
|
||||
void begin(AudioInfo cfg){
|
||||
this->cfg = cfg;
|
||||
buffer.resize(buffer_size);
|
||||
maxiSettings::setup(cfg.sample_rate, cfg.channels, DEFAULT_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
/// Defines the volume. The values are between 0.0 and 1.0
|
||||
bool setVolume(float f) override{
|
||||
if (f>1.0f){
|
||||
VolumeSupport::setVolume(1.0f);
|
||||
return false;
|
||||
}
|
||||
if (f<0.0f){
|
||||
VolumeSupport::setVolume(0.0f);
|
||||
return false;
|
||||
}
|
||||
VolumeSupport::setVolume(f);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Copies the audio data from maximilian to the audio sink, Call this method from the Arduino Loop.
|
||||
void copy() {
|
||||
// fill buffer with data
|
||||
maxi_float_t out[cfg.channels];
|
||||
uint16_t samples = buffer_size / sizeof(uint16_t);
|
||||
int16_t *p_samples = (int16_t *) buffer.data();
|
||||
for (uint16_t j=0;j<samples;j+=cfg.channels){
|
||||
callback(out);
|
||||
// convert all channels to int16
|
||||
for (int ch=0;ch<cfg.channels;ch++){
|
||||
p_samples[j+ch] = volume() * out[ch] * 32767.0f;
|
||||
}
|
||||
}
|
||||
// write buffer to audio sink
|
||||
unsigned int result = p_sink->write(buffer.data(), buffer_size);
|
||||
LOGI("bytes written %u", result)
|
||||
}
|
||||
|
||||
protected:
|
||||
Vector<uint8_t> buffer;
|
||||
int buffer_size=256;
|
||||
Print *p_sink=nullptr;
|
||||
AudioInfo cfg;
|
||||
void (*callback)(maxi_float_t *channels);
|
||||
};
|
||||
|
||||
|
||||
} // namespace
|
||||
@@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
|
||||
#include "AudioLogger.h"
|
||||
#ifdef ESP32
|
||||
#include "esp_heap_caps.h"
|
||||
#endif
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief MemoryManager which activates the use of external SPIRAM memory.
|
||||
* When external memory is in use, the allocation strategy is to initially try
|
||||
* to satisfy smaller allocation requests with internal memory and larger requests
|
||||
* with external memory. This sets the limit between the two, as well as generally
|
||||
* enabling allocation in external memory.
|
||||
* @ingroup memorymgmt
|
||||
*/
|
||||
class MemoryManager {
|
||||
public:
|
||||
/// Default Constructor - call begin() to activate PSRAM
|
||||
MemoryManager() = default;
|
||||
/// Constructor which activates PSRAM. This constructor automatically calls begin()
|
||||
MemoryManager(int limit) {
|
||||
begin(limit);
|
||||
};
|
||||
/// Activate the PSRAM for allocated memory > limit
|
||||
bool begin(int limit = 10000) {
|
||||
#ifdef ESP32
|
||||
LOGI("Activate PSRAM from %d bytes", limit);
|
||||
heap_caps_malloc_extmem_enable(limit);
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
439
libraries/audio-tools/src/AudioTools/AudioLibs/MiniAudioStream.h
Normal file
439
libraries/audio-tools/src/AudioTools/AudioLibs/MiniAudioStream.h
Normal file
@@ -0,0 +1,439 @@
|
||||
#pragma once
|
||||
/**
|
||||
* @brief MiniAudio
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
|
||||
#include "AudioTools.h"
|
||||
#include <mutex>
|
||||
#include <atomic>
|
||||
|
||||
#define MINIAUDIO_IMPLEMENTATION
|
||||
#include "miniaudio.h"
|
||||
|
||||
#define MA_BUFFER_COUNT 10
|
||||
#define MA_BUFFER_SIZE 1200
|
||||
#define MA_START_COUNT 2
|
||||
#define MA_DELAY 10
|
||||
|
||||
namespace audio_tools {
|
||||
|
||||
/**
|
||||
* @brief Configuration for MiniAudio
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MiniAudioConfig : public AudioInfo {
|
||||
public:
|
||||
MiniAudioConfig() {
|
||||
sample_rate = 44100;
|
||||
channels = 2;
|
||||
bits_per_sample = 16;
|
||||
};
|
||||
MiniAudioConfig(const MiniAudioConfig &) = default;
|
||||
MiniAudioConfig(const AudioInfo &in) {
|
||||
sample_rate = in.sample_rate;
|
||||
channels = in.channels;
|
||||
bits_per_sample = in.bits_per_sample;
|
||||
}
|
||||
|
||||
bool is_input = false;
|
||||
bool is_output = true;
|
||||
int delay_ms_if_buffer_full = MA_DELAY;
|
||||
int buffer_size = MA_BUFFER_SIZE;
|
||||
int buffer_count = MA_BUFFER_COUNT;
|
||||
int buffer_start_count = MA_START_COUNT;
|
||||
bool auto_restart_on_underrun = true; // Automatically restart after buffer underrun
|
||||
int underrun_tolerance = 5; // Number of empty reads before stopping playback
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief MiniAudio: https://miniaud.io/
|
||||
* @ingroup io
|
||||
* @author Phil Schatzmann
|
||||
* @copyright GPLv3
|
||||
*/
|
||||
class MiniAudioStream : public AudioStream {
|
||||
public:
|
||||
MiniAudioStream() = default;
|
||||
~MiniAudioStream() { end(); };
|
||||
|
||||
MiniAudioConfig defaultConfig(RxTxMode mode = RXTX_MODE) {
|
||||
MiniAudioConfig info;
|
||||
info.sample_rate = 44100;
|
||||
info.channels = 2;
|
||||
info.bits_per_sample = 16;
|
||||
switch (mode) {
|
||||
case RX_MODE:
|
||||
info.is_input = true;
|
||||
info.is_output = false;
|
||||
break;
|
||||
case TX_MODE:
|
||||
info.is_input = false;
|
||||
info.is_output = true;
|
||||
break;
|
||||
case RXTX_MODE:
|
||||
info.is_input = true;
|
||||
info.is_output = true;
|
||||
break;
|
||||
default:
|
||||
info.is_input = false;
|
||||
info.is_output = false;
|
||||
break;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
void setAudioInfo(AudioInfo in) override {
|
||||
AudioStream::setAudioInfo(in);
|
||||
if (in.sample_rate != config.sample_rate ||
|
||||
in.channels != config.channels ||
|
||||
in.bits_per_sample != config.bits_per_sample) {
|
||||
config.copyFrom(in);
|
||||
if (is_active.load()) {
|
||||
is_active.store(false);
|
||||
is_playing.store(false);
|
||||
// This will stop the device so no need to do that manually.
|
||||
ma_device_uninit(&device_ma);
|
||||
|
||||
begin();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool begin(MiniAudioConfig info) {
|
||||
AudioStream::setAudioInfo(info);
|
||||
this->config = info;
|
||||
return begin();
|
||||
}
|
||||
|
||||
bool begin() override {
|
||||
TRACEI();
|
||||
setupBuffers(config.buffer_size);
|
||||
if (config.is_output && !config.is_input)
|
||||
config_ma = ma_device_config_init(ma_device_type_playback);
|
||||
else if (!config.is_output && config.is_input)
|
||||
config_ma = ma_device_config_init(ma_device_type_capture);
|
||||
else if (config.is_output && config.is_input)
|
||||
config_ma = ma_device_config_init(ma_device_type_duplex);
|
||||
else if (!config.is_output && !config.is_input)
|
||||
config_ma = ma_device_config_init(ma_device_type_loopback);
|
||||
|
||||
config_ma.pUserData = this;
|
||||
config_ma.playback.channels = config.channels;
|
||||
config_ma.sampleRate = config.sample_rate;
|
||||
config_ma.dataCallback = data_callback;
|
||||
switch (config.bits_per_sample) {
|
||||
case 8:
|
||||
config_ma.playback.format = ma_format_u8;
|
||||
break;
|
||||
case 16:
|
||||
config_ma.playback.format = ma_format_s16;
|
||||
break;
|
||||
case 24:
|
||||
config_ma.playback.format = ma_format_s24;
|
||||
break;
|
||||
case 32:
|
||||
config_ma.playback.format = ma_format_s32;
|
||||
break;
|
||||
default:
|
||||
LOGE("Invalid format");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ma_device_init(NULL, &config_ma, &device_ma) != MA_SUCCESS) {
|
||||
// Failed to initialize the device.
|
||||
return false;
|
||||
}
|
||||
|
||||
// The device is sleeping by default so you'll need to start it manually.
|
||||
if (ma_device_start(&device_ma) != MA_SUCCESS) {
|
||||
// Failed to initialize the device.
|
||||
ma_device_uninit(&device_ma);
|
||||
return false;
|
||||
}
|
||||
|
||||
is_active.store(true);
|
||||
return is_active.load();
|
||||
}
|
||||
|
||||
void end() override {
|
||||
is_active.store(false);
|
||||
is_playing.store(false);
|
||||
// This will stop the device so no need to do that manually.
|
||||
ma_device_uninit(&device_ma);
|
||||
// release buffer memory
|
||||
buffer_in.resize(0);
|
||||
buffer_out.resize(0);
|
||||
is_buffers_setup.store(false);
|
||||
}
|
||||
|
||||
int availableForWrite() override {
|
||||
return buffer_out.size() == 0 ? 0 : DEFAULT_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
size_t write(const uint8_t *data, size_t len) override {
|
||||
// Input validation
|
||||
if (!data || len == 0) {
|
||||
LOGW("Invalid write parameters: data=%p, len=%zu", data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (buffer_out.size() == 0) {
|
||||
LOGW("Output buffer not initialized");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!is_active.load()) {
|
||||
LOGW("Stream not active");
|
||||
return 0;
|
||||
}
|
||||
|
||||
LOGD("write: %zu", len);
|
||||
|
||||
// write data to buffer
|
||||
int open = len;
|
||||
int written = 0;
|
||||
int retry_count = 0;
|
||||
const int max_retries = 1000; // Prevent infinite loops
|
||||
|
||||
while (open > 0 && retry_count < max_retries) {
|
||||
size_t result = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(write_mtx);
|
||||
result = buffer_out.writeArray(data + written, open);
|
||||
open -= result;
|
||||
written += result;
|
||||
}
|
||||
|
||||
if (result == 0) {
|
||||
retry_count++;
|
||||
doWait();
|
||||
} else {
|
||||
retry_count = 0; // Reset on successful write
|
||||
}
|
||||
}
|
||||
|
||||
if (retry_count >= max_retries) {
|
||||
LOGE("Write timeout after %d retries, written %d of %zu bytes", max_retries, written, len);
|
||||
}
|
||||
|
||||
// activate playing
|
||||
// if (!is_playing && buffer_out.bufferCountFilled()>=MA_START_COUNT) {
|
||||
int current_buffer_size = buffer_size.load();
|
||||
bool should_start_playing = false;
|
||||
|
||||
// Start playing if we have enough data and either:
|
||||
// 1. We're not playing yet, or
|
||||
// 2. We stopped due to buffer underrun but now have data again
|
||||
if (current_buffer_size > 0) {
|
||||
int available_data = buffer_out.available();
|
||||
int threshold = config.buffer_start_count * current_buffer_size;
|
||||
|
||||
if (!is_playing.load() && available_data >= threshold) {
|
||||
should_start_playing = true;
|
||||
} else if (is_playing.load() && available_data == 0) {
|
||||
// Stop playing if buffer is completely empty (helps with long delays)
|
||||
LOGW("Buffer empty, pausing playback");
|
||||
is_playing.store(false);
|
||||
}
|
||||
}
|
||||
|
||||
if (should_start_playing) {
|
||||
LOGI("starting audio playback");
|
||||
is_playing.store(true);
|
||||
}
|
||||
|
||||
// std::this_thread::yield();
|
||||
return written;
|
||||
}
|
||||
|
||||
int available() override {
|
||||
return buffer_in.size() == 0 ? 0 : buffer_in.available();
|
||||
}
|
||||
|
||||
size_t readBytes(uint8_t *data, size_t len) override {
|
||||
if (!data || len == 0) {
|
||||
LOGW("Invalid read parameters: data=%p, len=%zu", data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (buffer_in.size() == 0) {
|
||||
LOGW("Input buffer not initialized");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!is_active.load()) {
|
||||
LOGW("Stream not active");
|
||||
return 0;
|
||||
}
|
||||
|
||||
LOGD("read: %zu", len);
|
||||
std::lock_guard<std::mutex> guard(read_mtx);
|
||||
return buffer_in.readArray(data, len);
|
||||
}
|
||||
|
||||
/// Manually restart playback (useful after long delays)
|
||||
void restartPlayback() {
|
||||
if (!is_active.load()) {
|
||||
LOGW("Cannot restart playback - stream not active");
|
||||
return;
|
||||
}
|
||||
|
||||
int current_buffer_size = buffer_size.load();
|
||||
if (current_buffer_size > 0 && buffer_out.available() > 0) {
|
||||
LOGI("Manually restarting playback");
|
||||
is_playing.store(true);
|
||||
} else {
|
||||
LOGW("Cannot restart playback - no data available");
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if playback is currently active
|
||||
bool isPlaying() const {
|
||||
return is_playing.load();
|
||||
}
|
||||
|
||||
protected:
|
||||
MiniAudioConfig config;
|
||||
ma_device_config config_ma;
|
||||
ma_device device_ma;
|
||||
std::atomic<bool> is_playing{false};
|
||||
std::atomic<bool> is_active{false};
|
||||
std::atomic<bool> is_buffers_setup{false};
|
||||
RingBuffer<uint8_t> buffer_out{0};
|
||||
RingBuffer<uint8_t> buffer_in{0};
|
||||
std::mutex write_mtx;
|
||||
std::mutex read_mtx;
|
||||
std::atomic<int> buffer_size{0};
|
||||
|
||||
// In playback mode copy data to pOutput. In capture mode read data from
|
||||
// pInput. In full-duplex mode, both pOutput and pInput will be valid and
|
||||
// you can move data from pInput into pOutput. Never process more than
|
||||
// frameCount frames.
|
||||
|
||||
void setupBuffers(int size = MA_BUFFER_SIZE) {
|
||||
std::lock_guard<std::mutex> guard(write_mtx);
|
||||
if (is_buffers_setup.load()) return;
|
||||
|
||||
// Validate buffer size
|
||||
if (size <= 0 || size > 1024 * 1024) { // Max 1MB per buffer chunk
|
||||
LOGE("Invalid buffer size: %d", size);
|
||||
return;
|
||||
}
|
||||
|
||||
buffer_size.store(size);
|
||||
int buffer_count = config.buffer_count;
|
||||
|
||||
// Validate total buffer size to prevent excessive memory allocation
|
||||
size_t total_size = static_cast<size_t>(size) * buffer_count;
|
||||
if (total_size > 100 * 1024 * 1024) { // Max 100MB total
|
||||
LOGE("Buffer size too large: %zu bytes", total_size);
|
||||
return;
|
||||
}
|
||||
|
||||
LOGI("setupBuffers: %d * %d = %zu bytes", size, buffer_count, total_size);
|
||||
|
||||
if (buffer_out.size() == 0 && config.is_output) {
|
||||
if (!buffer_out.resize(size * buffer_count)) {
|
||||
LOGE("Failed to resize output buffer");
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (buffer_in.size() == 0 && config.is_input) {
|
||||
if (!buffer_in.resize(size * buffer_count)) {
|
||||
LOGE("Failed to resize input buffer");
|
||||
return;
|
||||
}
|
||||
}
|
||||
is_buffers_setup.store(true);
|
||||
}
|
||||
|
||||
void doWait() {
|
||||
//std::this_thread::yield();
|
||||
delay(config.delay_ms_if_buffer_full);
|
||||
//std::this_thread::sleep_for (std::chrono::milliseconds(MA_DELAY));
|
||||
}
|
||||
|
||||
static void data_callback(ma_device *pDevice, void *pOutput,
|
||||
const void *pInput, ma_uint32 frameCount) {
|
||||
MiniAudioStream *self = (MiniAudioStream *)pDevice->pUserData;
|
||||
if (!self || !self->is_active.load()) {
|
||||
return; // Safety check
|
||||
}
|
||||
|
||||
AudioInfo cfg = self->audioInfo();
|
||||
if (cfg.channels == 0 || cfg.bits_per_sample == 0) {
|
||||
LOGE("Invalid audio configuration in callback");
|
||||
return;
|
||||
}
|
||||
|
||||
int bytes = frameCount * cfg.channels * cfg.bits_per_sample / 8;
|
||||
if (bytes <= 0 || bytes > 1024 * 1024) { // Sanity check
|
||||
LOGE("Invalid byte count in callback: %d", bytes);
|
||||
return;
|
||||
}
|
||||
|
||||
self->setupBuffers(bytes);
|
||||
|
||||
if (pInput && self->buffer_in.size() > 0) {
|
||||
int open = bytes;
|
||||
int processed = 0;
|
||||
int retry_count = 0;
|
||||
const int max_retries = 100;
|
||||
|
||||
while (open > 0 && retry_count < max_retries && self->is_active.load()) {
|
||||
int len = 0;
|
||||
{
|
||||
std::unique_lock<std::mutex> guard(self->read_mtx);
|
||||
len = self->buffer_in.writeArray((uint8_t *)pInput + processed, open);
|
||||
open -= len;
|
||||
processed += len;
|
||||
}
|
||||
if (len == 0) {
|
||||
retry_count++;
|
||||
self->doWait();
|
||||
} else {
|
||||
retry_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pOutput) {
|
||||
memset(pOutput, 0, bytes);
|
||||
if (self->is_playing.load() && self->buffer_out.size() > 0) {
|
||||
int open = bytes;
|
||||
int processed = 0;
|
||||
int consecutive_failures = 0;
|
||||
const int max_failures = self->config.underrun_tolerance;
|
||||
|
||||
while (open > 0 && self->is_active.load()) {
|
||||
size_t len = 0;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(self->write_mtx);
|
||||
len = self->buffer_out.readArray((uint8_t *)pOutput + processed, open);
|
||||
open -= len;
|
||||
processed += len;
|
||||
}
|
||||
|
||||
if (len == 0) {
|
||||
consecutive_failures++;
|
||||
// If we can't get data for too long, stop playing to prevent issues
|
||||
if (consecutive_failures >= max_failures && self->config.auto_restart_on_underrun) {
|
||||
LOGW("Buffer underrun detected, stopping playback");
|
||||
self->is_playing.store(false);
|
||||
break;
|
||||
}
|
||||
// Don't wait in callback for too long - just output silence
|
||||
break;
|
||||
} else {
|
||||
consecutive_failures = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace audio_tools
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user