This commit is contained in:
2026-02-12 21:00:02 -08:00
parent cb1f2b0efd
commit 40714a3a68
1141 changed files with 1010880 additions and 2 deletions

View File

@@ -0,0 +1,687 @@
#pragma once
#include "AudioTools/AudioCodecs/AudioCodecsBase.h"
#include "AudioTools/AudioCodecs/AudioEncoded.h"
#include "AudioTools/AudioCodecs/AudioFormat.h"
#include "AudioTools/CoreAudio/AudioBasic/StrView.h"
#define READ_BUFFER_SIZE 512
#define MAX_WAV_HEADER_LEN 200
namespace audio_tools {
/**
* @brief Sound information which is available in the WAV header
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
struct WAVAudioInfo : AudioInfo {
WAVAudioInfo() = default;
WAVAudioInfo(const AudioInfo &from) {
sample_rate = from.sample_rate;
channels = from.channels;
bits_per_sample = from.bits_per_sample;
}
AudioFormat format = AudioFormat::PCM;
int byte_rate = 0;
int block_align = 0;
bool is_streamed = true;
bool is_valid = false;
uint32_t data_length = 0;
uint32_t file_size = 0;
int offset = 0;
};
static const char *wav_mime = "audio/wav";
/**
* @brief Parser for Wav header data
* for details see https://de.wikipedia.org/wiki/RIFF_WAVE
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
class WAVHeader {
public:
WAVHeader() = default;
/// Adds data to the 44 byte wav header data buffer and make it available for
/// parsing
int write(uint8_t *data, size_t data_len) {
return buffer.writeArray(data, data_len);
}
/// Call begin when header data is complete to parse the data
bool parse() {
LOGI("WAVHeader::begin: %u", (unsigned)buffer.available());
this->data_pos = 0l;
memset((void *)&headerInfo, 0, sizeof(WAVAudioInfo));
if (!setPos("RIFF")) return false;
headerInfo.file_size = read_int32();
if (!setPos("WAVE")) return false;
if (!setPos("fmt ")) return false;
int fmt_length = read_int32();
headerInfo.format = (AudioFormat)read_int16();
headerInfo.channels = read_int16();
headerInfo.sample_rate = read_int32();
headerInfo.byte_rate = read_int32();
headerInfo.block_align = read_int16();
headerInfo.bits_per_sample = read_int16();
if (!setPos("data")) return false;
headerInfo.data_length = read_int32();
if (headerInfo.data_length == 0 || headerInfo.data_length >= 0x7fff0000) {
headerInfo.is_streamed = true;
headerInfo.data_length = ~0;
}
logInfo();
buffer.clear();
return true;
}
/// Returns true if the header is complete (containd data tag)
bool isDataComplete() {
int pos = getDataPos();
return pos > 0 && buffer.available() >= pos;
}
/// number of bytes available in the header buffer
size_t available() { return buffer.available(); }
/// Determines the data start position using the data tag
int getDataPos() {
int pos =
StrView((char *)buffer.data(), MAX_WAV_HEADER_LEN, buffer.available())
.indexOf("data");
return pos > 0 ? pos + 8 : 0;
}
/// provides the info from the header
WAVAudioInfo &audioInfo() { return headerInfo; }
/// Sets the info in the header
void setAudioInfo(WAVAudioInfo info) { headerInfo = info; }
/// Just write a wav header to the indicated outputbu
int writeHeader(Print *out) {
writeRiffHeader(buffer);
writeFMT(buffer);
writeDataHeader(buffer);
int len = buffer.available();
out->write(buffer.data(), buffer.available());
return len;
}
/// Reset internal stored header information and buffer
void clear() {
data_pos = 0;
WAVAudioInfo empty;
empty.sample_rate = 0;
empty.channels = 0;
empty.bits_per_sample = 0;
headerInfo = empty;
buffer.setClearWithZero(true);
buffer.reset();
}
/// Debug helper: dumps header bytes as printable characters
void dumpHeader() {
char msg[buffer.available() + 1];
memset(msg, 0, buffer.available() + 1);
for (int j = 0; j < buffer.available(); j++) {
char c = (char)buffer.data()[j];
if (!isalpha(c)) {
c = '.';
}
msg[j] = c;
}
LOGI("Header: %s", msg);
}
protected:
struct WAVAudioInfo headerInfo;
SingleBuffer<uint8_t> buffer{MAX_WAV_HEADER_LEN};
size_t data_pos = 0;
bool setPos(const char *id) {
int id_len = strlen(id);
int pos = indexOf(id);
if (pos < 0) return false;
data_pos = pos + id_len;
return true;
}
int indexOf(const char *str) {
return StrView((char *)buffer.data(), MAX_WAV_HEADER_LEN,
buffer.available())
.indexOf(str);
}
uint32_t read_tag() {
uint32_t tag = 0;
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
tag = (tag << 8) | getChar();
return tag;
}
uint32_t getChar32() { return getChar(); }
uint32_t read_int32() {
uint32_t value = 0;
value |= getChar32() << 0;
value |= getChar32() << 8;
value |= getChar32() << 16;
value |= getChar32() << 24;
return value;
}
uint16_t read_int16() {
uint16_t value = 0;
value |= getChar() << 0;
value |= getChar() << 8;
return value;
}
void skip(int n) {
int i;
for (i = 0; i < n; i++) getChar();
}
int getChar() {
if (data_pos < buffer.size())
return buffer.data()[data_pos++];
else
return -1;
}
void seek(long int offset, int origin) {
if (origin == SEEK_SET) {
data_pos = offset;
} else if (origin == SEEK_CUR) {
data_pos += offset;
}
}
size_t tell() { return data_pos; }
bool eof() { return data_pos >= buffer.size() - 1; }
void logInfo() {
LOGI("WAVHeader sound_pos: %d", getDataPos());
LOGI("WAVHeader channels: %d ", headerInfo.channels);
LOGI("WAVHeader bits_per_sample: %d", headerInfo.bits_per_sample);
LOGI("WAVHeader sample_rate: %d ", (int)headerInfo.sample_rate);
LOGI("WAVHeader format: %d", (int)headerInfo.format);
}
void writeRiffHeader(BaseBuffer<uint8_t> &buffer) {
buffer.writeArray((uint8_t *)"RIFF", 4);
write32(buffer, headerInfo.file_size - 8);
buffer.writeArray((uint8_t *)"WAVE", 4);
}
void writeFMT(BaseBuffer<uint8_t> &buffer) {
uint16_t fmt_len = 16;
buffer.writeArray((uint8_t *)"fmt ", 4);
write32(buffer, fmt_len);
write16(buffer, (uint16_t)headerInfo.format); // PCM
write16(buffer, headerInfo.channels);
write32(buffer, headerInfo.sample_rate);
write32(buffer, headerInfo.byte_rate);
write16(buffer, headerInfo.block_align); // frame size
write16(buffer, headerInfo.bits_per_sample);
}
void write32(BaseBuffer<uint8_t> &buffer, uint64_t value) {
buffer.writeArray((uint8_t *)&value, 4);
}
void write16(BaseBuffer<uint8_t> &buffer, uint16_t value) {
buffer.writeArray((uint8_t *)&value, 2);
}
void writeDataHeader(BaseBuffer<uint8_t> &buffer) {
buffer.writeArray((uint8_t *)"data", 4);
write32(buffer, headerInfo.file_size);
int offset = headerInfo.offset;
if (offset > 0) {
uint8_t empty[offset];
memset(empty, 0, offset);
buffer.writeArray(empty, offset); // resolve issue with wrong aligment
}
}
};
/**
* @brief A simple WAVDecoder: We parse the header data on the first record to
* determine the format. If no AudioDecoderExt is specified we just write the
* PCM data to the output that is defined by calling setOutput(). You can define
* a ADPCM decoder to decode WAV files that contain ADPCM data.
*
* Optionally, if the input WAV file contains 8-bit PCM data, you can enable automatic
* conversion to 16-bit PCM output by calling setConvert8to16(true). This will convert
* unsigned 8-bit samples to signed 16-bit samples before writing to the output stream,
* and the reported bits_per_sample in audioInfo() will be 16 when conversion is active.
* The same is valid for the 24 bit conversion which converts 24 bit (3 byte) to 32 bit
* (4 byte).
*
* Please note that you need to call begin() everytime you process a new file to let the decoder
* know that we start with a new header.
*
* @ingroup codecs
* @ingroup decoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class WAVDecoder : public AudioDecoder {
public:
/**
* @brief Construct a new WAVDecoder object for PCM data
*/
WAVDecoder() = default;
/**
* @brief Construct a new WAVDecoder object for ADPCM data
*
*/
WAVDecoder(AudioDecoderExt &dec, AudioFormat fmt) { setDecoder(dec, fmt); }
/// Defines an optional decoder if the format is not PCM
void setDecoder(AudioDecoderExt &dec, AudioFormat fmt) {
TRACED();
decoder_format = fmt;
p_decoder = &dec;
}
/// Defines the output Stream
void setOutput(Print &out_stream) override { this->p_print = &out_stream; }
/// Prepare decoder for a new WAV stream
bool begin() override {
TRACED();
header.clear();
setupEncodedAudio();
byte_buffer.reset();
buffer24.reset();
isFirst = true;
active = true;
return true;
}
/// Finish decoding and release temporary buffers
void end() override {
TRACED();
byte_buffer.reset();
buffer24.reset();
active = false;
}
/// Provides MIME type "audio/wav"
const char *mime() { return wav_mime; }
/// Extended WAV specific info (original header values)
WAVAudioInfo &audioInfoEx() { return header.audioInfo(); }
/// Exposed AudioInfo (may reflect conversion flags)
AudioInfo audioInfo() override {
WAVAudioInfo info = header.audioInfo();
if (convert8to16 && info.format == AudioFormat::PCM &&
info.bits_per_sample == 8) {
info.bits_per_sample = 16;
}
// 32 bits gives better result
if (convert24 && info.format == AudioFormat::PCM &&
info.bits_per_sample == 24) {
info.bits_per_sample = 32;
}
return info;
}
/// Write incoming WAV data (header + PCM) into output
virtual size_t write(const uint8_t *data, size_t len) override {
TRACED();
size_t result = 0;
if (active) {
if (isFirst) {
int data_start = decodeHeader((uint8_t *)data, len);
// we do not have the complete header yet: need more data
if (data_start == 0) return len;
// process the outstanding data
result = data_start +
write_out((uint8_t *)data + data_start, len - data_start);
} else if (isValid) {
result = write_out((uint8_t *)data, len);
}
}
return result;
}
/// Check if the decoder is active
virtual operator bool() override { return active; }
/// Convert 8 bit to 16 bit PCM data (default: enabled)
void setConvert8Bit(bool enable) {
convert8to16 = enable;
}
/// Convert 24 bit (3 byte) to 32 bit (4 byte) PCM data (default: enabled)
void setConvert24Bit(bool enable) {
convert24 = enable;
}
protected:
WAVHeader header;
bool isFirst = true;
bool isValid = true;
bool active = false;
AudioFormat decoder_format = AudioFormat::PCM;
AudioDecoderExt *p_decoder = nullptr;
EncodedAudioOutput dec_out;
SingleBuffer<uint8_t> byte_buffer{0};
SingleBuffer<int32_t> buffer24{0};
bool convert8to16 = true; // Optional conversion flag
bool convert24 = true; // Optional conversion flag
const size_t batch_size = 256;
Print &out() { return p_decoder == nullptr ? *p_print : dec_out; }
virtual size_t write_out(const uint8_t *in_ptr, size_t in_size) {
// check if we need to convert int24 data from 3 bytes to 4 bytes
size_t result = 0;
if (convert24 && header.audioInfo().format == AudioFormat::PCM &&
header.audioInfo().bits_per_sample == 24 && sizeof(int24_t) == 4) {
write_out_24(in_ptr, in_size);
result = in_size;
} else if (convert8to16 && header.audioInfo().format == AudioFormat::PCM &&
header.audioInfo().bits_per_sample == 8) {
result = write_out_8to16(in_ptr, in_size);
} else {
result = out().write(in_ptr, in_size);
}
return result;
}
/// Convert 8-bit PCM to 16-bit PCM and write out
size_t write_out_8to16(const uint8_t *in_ptr, size_t in_size) {
size_t total_written = 0;
size_t samples_remaining = in_size;
size_t offset = 0;
int16_t out_buf[batch_size];
while (samples_remaining > 0) {
size_t current_batch =
samples_remaining > batch_size ? batch_size : samples_remaining;
for (size_t i = 0; i < current_batch; ++i) {
out_buf[i] = ((int16_t)in_ptr[offset + i] - 128) << 8;
}
writeDataT<int16_t>(&out(), out_buf, current_batch);
offset += current_batch;
samples_remaining -= current_batch;
}
return in_size;
}
/// convert 3 byte int24 to 4 byte int32
size_t write_out_24(const uint8_t *in_ptr, size_t in_size) {
// store 1 sample
buffer24.resize(batch_size);
byte_buffer.resize(3);
for (size_t i = 0; i < in_size; i++) {
// Add byte to buffer
byte_buffer.write(in_ptr[i]);
// Process complete sample when buffer is full
if (byte_buffer.isFull()) {
int24_3bytes_t sample24{byte_buffer.data()};
int32_t converted_sample = sample24.scale32();
buffer24.write(converted_sample);
if (buffer24.isFull()) {
writeDataT<int32_t>(&out(), buffer24.data(), buffer24.available());
buffer24.reset();
}
byte_buffer.reset();
}
}
return in_size;
}
/// Decodes the header data: Returns the start pos of the data
int decodeHeader(uint8_t *in_ptr, size_t in_size) {
int result = in_size;
// we expect at least the full header
int written = header.write(in_ptr, in_size);
if (!header.isDataComplete()) {
LOGW("WAV header misses 'data' section in len: %d",
(int)header.available());
header.dumpHeader();
return 0;
}
// parse header
if (!header.parse()) {
LOGE("WAV header parsing failed");
return 0;
}
isFirst = false;
isValid = header.audioInfo().is_valid;
LOGI("WAV sample_rate: %d", (int)header.audioInfo().sample_rate);
LOGI("WAV data_length: %u", (unsigned)header.audioInfo().data_length);
LOGI("WAV is_streamed: %d", header.audioInfo().is_streamed);
LOGI("WAV is_valid: %s", header.audioInfo().is_valid ? "true" : "false");
// check format
AudioFormat format = header.audioInfo().format;
isValid = format == decoder_format;
if (isValid) {
// update blocksize
if (p_decoder != nullptr) {
int block_size = header.audioInfo().block_align;
p_decoder->setBlockSize(block_size);
}
// update sampling rate if the target supports it
AudioInfo bi = audioInfo();
notifyAudioChange(bi);
} else {
LOGE("WAV format not supported: %d", (int)format);
}
return header.getDataPos();
}
void setupEncodedAudio() {
if (p_decoder != nullptr) {
assert(p_print != nullptr);
dec_out.setOutput(p_print);
dec_out.setDecoder(p_decoder);
dec_out.begin(info);
}
}
};
/**
* @brief A simple WAV file encoder. If no AudioEncoderExt is specified the WAV
* file contains PCM data, otherwise it is encoded as ADPCM. The WAV header is
* written with the first writing of audio data. Calling begin() is making sure
* that the header is written again.
* @ingroup codecs
* @ingroup encoder
* @author Phil Schatzmann
* @copyright GPLv3
*/
class WAVEncoder : public AudioEncoder {
public:
/**
* @brief Construct a new WAVEncoder object for PCM data
*/
WAVEncoder() = default;
/**
* @brief Construct a new WAVEncoder object for ADPCM data
*/
WAVEncoder(AudioEncoderExt &enc, AudioFormat fmt) { setEncoder(enc, fmt); };
/// Associates an external encoder for non-PCM formats
void setEncoder(AudioEncoderExt &enc, AudioFormat fmt) {
TRACED();
wav_info.format = fmt;
p_encoder = &enc;
}
/// Defines the otuput stream
void setOutput(Print &out) override {
TRACED();
p_print = &out;
}
/// Provides "audio/wav"
const char *mime() override { return wav_mime; }
/// Provides the default configuration
WAVAudioInfo defaultConfig() {
WAVAudioInfo info;
info.format = AudioFormat::PCM;
info.sample_rate = DEFAULT_SAMPLE_RATE;
info.bits_per_sample = DEFAULT_BITS_PER_SAMPLE;
info.channels = DEFAULT_CHANNELS;
info.is_streamed = true;
info.is_valid = true;
info.data_length = 0x7fff0000;
info.file_size = info.data_length + 36;
return info;
}
/// Update actual WAVAudioInfo
virtual void setAudioInfo(AudioInfo from) override {
wav_info.sample_rate = from.sample_rate;
wav_info.channels = from.channels;
wav_info.bits_per_sample = from.bits_per_sample;
// recalculate byte rate, block align...
setAudioInfo(wav_info);
}
/// Defines the WAVAudioInfo
virtual void setAudioInfo(WAVAudioInfo ai) {
AudioEncoder::setAudioInfo(ai);
if (p_encoder) p_encoder->setAudioInfo(ai);
wav_info = ai;
LOGI("sample_rate: %d", (int)wav_info.sample_rate);
LOGI("channels: %d", wav_info.channels);
// bytes per second
wav_info.byte_rate = wav_info.sample_rate * wav_info.channels *
wav_info.bits_per_sample / 8;
if (wav_info.format == AudioFormat::PCM) {
wav_info.block_align =
wav_info.bits_per_sample / 8 * wav_info.channels;
}
if (wav_info.is_streamed || wav_info.data_length == 0 ||
wav_info.data_length >= 0x7fff0000) {
LOGI("is_streamed! because length is %u",
(unsigned)wav_info.data_length);
wav_info.is_streamed = true;
wav_info.data_length = ~0;
} else {
size_limit = wav_info.data_length;
LOGI("size_limit is %d", (int)size_limit);
}
}
/// starts the processing
bool begin(WAVAudioInfo ai) {
header.clear();
setAudioInfo(ai);
return begin();
}
/// starts the processing using the actual WAVAudioInfo
virtual bool begin() override {
TRACED();
setupEncodedAudio();
header_written = false;
is_open = true;
return true;
}
/// stops the processing
void end() override { is_open = false; }
/// Writes PCM data to be encoded as WAV
virtual size_t write(const uint8_t *data, size_t len) override {
if (!is_open) {
LOGE("The WAVEncoder is not open - please call begin()");
return 0;
}
if (p_print == nullptr) {
LOGE("No output stream was provided");
return 0;
}
if (!header_written) {
LOGI("Writing Header");
header.setAudioInfo(wav_info);
int len = header.writeHeader(p_print);
wav_info.file_size -= len;
header_written = true;
}
int32_t result = 0;
Print *p_out = p_encoder == nullptr ? p_print : &enc_out;
;
if (wav_info.is_streamed) {
result = p_out->write((uint8_t *)data, len);
} else if (size_limit > 0) {
size_t write_size = min((size_t)len, (size_t)size_limit);
result = p_out->write((uint8_t *)data, write_size);
size_limit -= result;
if (size_limit <= 0) {
LOGI("The defined size was written - so we close the WAVEncoder now");
is_open = false;
}
}
return result;
}
/// Check if encoder is active and ready to write
operator bool() override { return is_open; }
/// Check if encoder is open
bool isOpen() { return is_open; }
/// Adds n empty bytes at the beginning of the data
void setDataOffset(uint16_t offset) { wav_info.offset = offset; }
protected:
WAVHeader header;
Print *p_print = nullptr; // final output CopyEncoder copy; // used for PCM
AudioEncoderExt *p_encoder = nullptr;
EncodedAudioOutput enc_out;
WAVAudioInfo wav_info = defaultConfig();
int64_t size_limit = 0;
bool header_written = false;
volatile bool is_open = false;
void setupEncodedAudio() {
if (p_encoder != nullptr) {
assert(p_print != nullptr);
enc_out.setOutput(p_print);
enc_out.setEncoder(p_encoder);
enc_out.setAudioInfo(wav_info);
enc_out.begin();
// block size only available after begin(): update block size
wav_info.block_align = p_encoder->blockSize();
}
}
};
} // namespace audio_tools