This commit is contained in:
2026-02-12 21:00:02 -08:00
parent 77f8236347
commit 8bdbf227ca
1141 changed files with 1010880 additions and 2 deletions

View File

@@ -0,0 +1,5 @@
# Text to Speech Examples
I already provided the text to speech examples a part of the webserver examples, but decided to to some more examples that demonstrate how to do TTS using the regular audio-tools output streams!
Please note in the readme of the example, I specify which additional library you need to install!

View File

@@ -0,0 +1,48 @@
/**
* @file streams-azure_tts-i2s.ino
* @author Kevin Saye
* @copyright GPLv3
*
*/
#include "AudioTools.h"
#include "AudioTools/Communication/AudioHttp.h"
String speechKey = "...."; // deploy a Speech Service in Azure and get both the key and the region. info here: https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/
String spechregion = "....";
String voice = "en-US-JennyNeural"; // for the next 3 settings, chose from: https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=stt#prebuilt-neural-voices
String gender = "Female";
String language = "en-US";
String url_str = "https://" + spechregion + ".tts.speech.microsoft.com/cognitiveservices/v1";
String msg = "This is a demonstration of Phil Schatzmann's AudioTools integrating with the Azure Speech Service. Hope you like it.";
URLStream AzureURLStream("ssid", "pwd");
I2SStream i2s; // or I2SStream
StreamCopy copier(i2s, AzureURLStream); // copy in to out
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
// setup i2s
auto config = i2s.defaultConfig(TX_MODE);
config.sample_rate = 8000;
config.bits_per_sample = 16;
config.channels = 1;
config.pin_ws = GPIO_NUM_12; //LCK
config.pin_bck = GPIO_NUM_13; //BCK
config.pin_data = GPIO_NUM_21; //DIN
i2s.begin(config);
// Source: https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech?tabs=windows%2Cterminal&pivots=programming-language-rest
String ssml = "<speak version='1.0' xml:lang='" + language + "'><voice xml:lang='" + language + "' xml:gender='" + gender + "' name='" + voice + "'>" + msg + "</voice></speak>";
AzureURLStream.addRequestHeader("Ocp-Apim-Subscription-Key", speechKey.c_str());
AzureURLStream.addRequestHeader("X-Microsoft-OutputFormat", "raw-8khz-16bit-mono-pcm"); // if you change this, change the settings for i2s and the decoder
AzureURLStream.addRequestHeader(USER_AGENT, String("Arduino with Audiotools version:" + String(AUDIOTOOLS_VERSION)).c_str());
AzureURLStream.begin(url_str.c_str(), "audio/wav", POST, "application/ssml+xml", ssml.c_str());
}
void loop(){
copier.copy();
}

View File

@@ -0,0 +1,3 @@
# Name, Type, SubType, Offset, Size, Flags
nvs, data, nvs, 36K, 20K,
factory, app, factory, 64K, 4000K,
1 # Name Type SubType Offset Size Flags
2 nvs data nvs 36K 20K
3 factory app factory 64K 4000K

View File

@@ -0,0 +1,41 @@
/**
* @file espeak-min.ino
* @author Phil Schatzmann
* @brief Arduino C++ API - minimum example. The espeak-ng-data is stored on in
* progmem with the arduino-posix-fs library and we output audio to I2S with the
* help of the AudioTools library
* @version 0.1
* @date 2022-10-27
*
* @copyright Copyright (c) 2022
*/
#include "AudioTools.h" // https://github.com/pschatzmann/arduino-audio-tools
#include "AudioTools/AudioLibs/AudioBoardStream.h" // https://github.com/pschatzmann/arduino-audio-driver
#include "FileSystems.h" // https://github.com/pschatzmann/arduino-posix-fs
#include "espeak.h" // https://github.com/pschatzmann/arduino-espeak-ng
AudioBoardStream i2s(AudioKitEs8388V1);
ESpeak espeak(i2s);
void setup() {
Serial.begin(115200);
//file_systems::FSLogger.begin(file_systems::FSInfo, Serial);
// setup espeak
espeak.begin();
// setup output
audio_info espeak_info = espeak.audioInfo();
auto cfg = i2s.defaultConfig();
cfg.channels = espeak_info.channels; // 1
cfg.sample_rate = espeak_info.sample_rate; // 22050
cfg.bits_per_sample = espeak_info.bits_per_sample; // 16
i2s.begin(cfg);
}
void loop() {
espeak.say("Hello world!");
delay(5000);
}

View File

@@ -0,0 +1,3 @@
# Name, Type, SubType, Offset, Size, Flags
nvs, data, nvs, 36K, 20K,
factory, app, factory, 64K, 4000K,
1 # Name Type SubType Offset Size Flags
2 nvs data nvs 36K 20K
3 factory app factory 64K 4000K

View File

@@ -0,0 +1,40 @@
/**
* @file espeak-min.ino
* @author Phil Schatzmann
* @brief Arduino C++ API - minimum example. The espeak-ng-data is stored on in
* progmem with the arduino-posix-fs library and we output audio to I2S with the
* help of the AudioTools library
* @version 0.1
* @date 2022-10-27
*
* @copyright Copyright (c) 2022
*/
#include "AudioTools.h" // https://github.com/pschatzmann/arduino-audio-tools
#include "FileSystems.h" // https://github.com/pschatzmann/arduino-posix-fs
#include "espeak.h" // https://github.com/pschatzmann/arduino-espeak-ng
I2SStream i2s; // or replace with any other audio sink
ESpeak espeak(i2s);
void setup() {
Serial.begin(115200);
//file_systems::FSLogger.begin(file_systems::FSInfo, Serial);
// setup espeak
espeak.begin();
// setup output
audio_info espeak_info = espeak.audioInfo();
auto cfg = i2s.defaultConfig();
cfg.channels = espeak_info.channels; // 1
cfg.sample_rate = espeak_info.sample_rate; // 22050
cfg.bits_per_sample = espeak_info.bits_per_sample; // 16
i2s.begin(cfg);
}
void loop() {
espeak.say("Hello world!");
delay(5000);
}

View File

@@ -0,0 +1,19 @@
# Using FLITE Speach to Text
I am providing a simple sketch which generates sound data with the Flite text to speach engine.
You need to install https://github.com/pschatzmann/arduino-flite
The output goes to a AI Thinker Audio Kit.
<img src="https://pschatzmann.github.io/Resources/img/audio-toolkit.png" alt="Audio Kit" />
### Dependencies
You need to install the following libraries:
- [Arduino Audio Tools](https://github.com/pschatzmann/arduino-audio-tools)
- [Audio Driver](https://github.com/pschatzmann/arduino-audio-driver)
- [FLITE](https://github.com/pschatzmann/arduino-flite)
FLITE is quite big: you need to need to use the custom Partition Schema or RainMaker 4MB no OTA

View File

@@ -0,0 +1,3 @@
# Name, Type, SubType, Offset, Size, Flags
nvs, data, nvs, 36K, 20K,
factory, app, factory, 64K, 4000K,
1 # Name Type SubType Offset Size Flags
2 nvs data nvs 36K 20K
3 factory app factory 64K 4000K

View File

@@ -0,0 +1,32 @@
/**
@file streams-flite-audiokit.ino
* You need to install https://github.com/pschatzmann/arduino-flite
Read the README.md!
@author Phil Schatzmann
@copyright GPLv3
*/
#include "flite_arduino.h"
#include "AudioTools.h"
#include "AudioTools/AudioLibs/AudioBoardStream.h"
AudioBoardStream kit(AudioKitEs8388V1);
Flite flite(kit);
const char* alice = "Hallo my name is FLITE";
void setup(){
Serial.begin(115200);
auto cfg = kit.defaultConfig();
cfg.bits_per_sample = 16;
cfg.channels = 1;
cfg.sample_rate = 8000;
cfg.sd_active = false;
kit.begin(cfg);
flite.say(alice);
}
void loop() {
}

View File

@@ -0,0 +1,12 @@
# Using FLITE Speach to Text
I am providing a simple sketch which generates sound data with the Flite text to speach engine.
You need to install https://github.com/pschatzmann/arduino-flite
In this demo we provide the result as I2SStream but you can easly replace with any other output stream.
FLITE is quite big: you need to need to use the custom Partition Schema or RainMaker 4MB no OTA
## External DAC
for defails see the [Wiki](https://github.com/pschatzmann/arduino-audio-tools/wiki/External-DAC)

View File

@@ -0,0 +1,3 @@
# Name, Type, SubType, Offset, Size, Flags
nvs, data, nvs, 36K, 20K,
factory, app, factory, 64K, 4000K,
1 # Name Type SubType Offset Size Flags
2 nvs data nvs 36K 20K
3 factory app factory 64K 4000K

View File

@@ -0,0 +1,35 @@
/**
* @file streams-flite-i2s.ino
* You need to install https://github.com/pschatzmann/arduino-flite
*
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
#include "flite_arduino.h"
#include "AudioTools.h"
//#include "AudioTools/AudioLibs/AudioBoardStream.h"
I2SStream out; // Replace with desired class e.g. AudioBoardStream, AnalogAudioStream etc.
Flite flite(out);
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
// start data sink
auto cfg = out.defaultConfig();
cfg.sample_rate = 8000;
cfg.channels = 1;
cfg.bits_per_sample = 16;
out.begin(cfg);
}
// Arduino loop
void loop() {
Serial.println("providing data...");
flite.say("Hallo, my name is Alice");
delay(5000);
}

View File

@@ -0,0 +1,58 @@
/**
* @file streams-google-audiokit.ino
* @author Phil Schatzmann
* @brief decode MP3 stream from url and output it on I2S on audiokit.
* We are using the free google translate service to generate the mp3
* @version 0.1
* @date 2021-96-25
*
* @copyright Copyright (c) 2021
*/
// install https://github.com/pschatzmann/arduino-libhelix.git
#include "AudioTools.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
#include "AudioTools/AudioLibs/AudioBoardStream.h"
#include "AudioTools/Communication/AudioHttp.h"
URLStream url("ssid","password");
AudioBoardStream i2s(AudioKitEs8388V1); // final output of decoded stream
EncodedAudioStream dec(&i2s, new MP3DecoderHelix()); // Decoding stream
StreamCopy copier(dec, url); // copy url to decoder
Str query("http://translate.google.com/translate_tts?ie=UTF-8&tl=%1&client=tw-ob&ttsspeed=%2&q=%3");
const char* tts(const char* text, const char* lang="en", const char* speed="1"){
query.replace("%1",lang);
query.replace("%2",speed);
Str encoded(text);
encoded.urlEncode();
query.replace("%3", encoded.c_str());
return query.c_str();
}
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
// setup i2s
auto config = i2s.defaultConfig(TX_MODE);
i2s.begin(config);
// setup decoder
dec.begin();
// display url
const char* url_str = tts("this is an english text");
Serial.println(url_str);
// generate mp3 with the help of google translate
url.begin(url_str ,"audio/mp3");
}
void loop(){
copier.copy();
}

View File

@@ -0,0 +1,19 @@
# Using SAM Speach to Text
I am providing a simple sketch which generates sound data with the SAM text to speach engine.
You need to install https://github.com/pschatzmann/arduino-SAM
The output goes to a AI Thinker Audio Kit.
<img src="https://pschatzmann.github.io/Resources/img/audio-toolkit.png" alt="Audio Kit" />
### Dependencies
You need to install the following libraries:
- [Arduino Audio Tools](https://github.com/pschatzmann/arduino-audio-tools)
- [Audio Driver](https://github.com/pschatzmann/arduino-audio-driver)
- [SAM](https://github.com/pschatzmann/arduino-SAM)

View File

@@ -0,0 +1,34 @@
/**
@file streams-sam-audiokit.ino
@author Phil Schatzmann
@copyright GPLv3
*/
#include "AudioTools.h"
#include "AudioTools/AudioLibs/AudioBoardStream.h"
#include "sam_arduino.h"
AudioBoardStream kit(AudioKitEs8388V1);
SAM sam(kit);
const char* text = "Hallo my name is SAM";
void setup(){
Serial.begin(115200);
// setup audiokit i2s
auto cfg = kit.defaultConfig();
cfg.bits_per_sample = sam.bitsPerSample();
cfg.channels = sam.channels();
cfg.sample_rate = sam.sampleRate();
cfg.sd_active = false;
kit.begin(cfg);
sam.say(text);
}
void loop() {
// feed watchdog
delay(100);
}

View File

@@ -0,0 +1,10 @@
# Using SAM Speach to Text
I am providing a simple sketch which generates sound data with the SAM text to speach engine.
You need to install https://github.com/pschatzmann/arduino-SAM
In this demo we provide the result as I2SStream but you can easly replace with any other output stream.
## External DAC
for defails see the [Wiki](https://github.com/pschatzmann/arduino-audio-tools/wiki/External-DAC)

View File

@@ -0,0 +1,39 @@
/**
* @file streams-sam-i2s.ino
* You need to install https://github.com/pschatzmann/SAM
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
#include "AudioTools.h"
#include "sam_arduino.h"
//#include "AudioTools/AudioLibs/AudioBoardStream.h"
I2SStream out; // Replace with desired class e.g. AudioBoardStream, AnalogAudioStream etc.
SAM sam(out, false);
// Callback which provides the audio data
void outputData(Print *out){
}
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
// start data sink
auto cfg = out.defaultConfig();
cfg.sample_rate = SAM::sampleRate();
cfg.channels = 1;
cfg.bits_per_sample = 16;
out.begin(cfg);
}
// Arduino loop
void loop() {
Serial.println("providing data...");
sam.say("Hallo, my name is Alice");
delay(5000);
}

View File

@@ -0,0 +1,17 @@
# Simple TTS to A2DP
This is just an adaptation of the [streams-simple_tts-i2s example](https://github.com/pschatzmann/arduino-audio-tools/tree/main/examples/examples-tts/streams-simple_tts-i2s) where the output stream has been replaced with a A2DPStream.
So the output goes to a Bluetooth Speaker!
More examples can be found at https://github.com/pschatzmann/arduino-simple-tts/tree/main/examples
Because we need a lot of progmem, do not forget to set the partition scheme to Huge APP!
### Dependencies
- https://github.com/pschatzmann/ESP32-A2DP.git
- https://github.com/pschatzmann/arduino-simple-tts
A word of warning: The A2DP output has it's challenges, so I do not recommend this as your first sketch.

View File

@@ -0,0 +1,57 @@
/**
* @file streams-tts-a2dp.ino
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
#include "AudioTools.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
#include "AudioTools/Communication/A2DPStream.h"
#include "SimpleTTS.h"
const char* name = "LEXON MINO L"; // Replace with your device name
AudioInfo from(24000, 1, 16); // TTS
AudioInfo to(44100, 2, 16); // A2DP
NumberToText ntt;
A2DPStream a2dp;
FormatConverterStream out(a2dp);
MP3DecoderHelix mp3;
AudioDictionary dictionary(ExampleAudioDictionaryValues);
TextToSpeech tts(ntt, out, mp3, dictionary);
int64_t number = 1;
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
Serial.println("Starting...");
// setup conversion to provide stereo at 44100hz
out.begin(from, to);
// setup a2dp
auto cfg = a2dp.defaultConfig(TX_MODE);
cfg.name = name;
cfg.silence_on_nodata = true; // allow delays with silence
a2dp.begin(cfg);
a2dp.setVolume(0.3);
Serial.println("A2DP Started");
}
void loop() {
// speach output
Serial.print("Providing data: ");
Serial.println(number);
ntt.say(number);
number +=1;
delay(1000);
}

View File

@@ -0,0 +1,14 @@
# Simple TTS
I am providing a simple sketch which generates sound data with my Simple TTS text to speach engine that
uses a configurable library of prerecorded words.
You need to install https://github.com/pschatzmann/arduino-simple-tts
In this demo we provide the result as I2SStream but you can easly replace with any other output stream.
More examples can be found at https://github.com/pschatzmann/arduino-simple-tts/tree/main/examples
## External DAC
for defails see the [Wiki](https://github.com/pschatzmann/arduino-audio-tools/wiki/External-DAC)

View File

@@ -0,0 +1,41 @@
/**
* @file streams-tts-i2s.ino
* You need to install https://github.com/pschatzmann/arduino-simple-tts
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
#include "SimpleTTS.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
//#include "AudioTools/AudioLibs/AudioBoardStream.h"
NumberToText ntt;
I2SStream out; // Replace with desired class e.g. AudioBoardStream, AnalogAudioStream etc.
MP3DecoderHelix mp3;
AudioDictionary dictionary(ExampleAudioDictionaryValues);
TextToSpeech tts(ntt, out, mp3, dictionary);
int64_t number = 1;
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Warning);
// setup out
auto cfg = out.defaultConfig();
cfg.sample_rate = 24000;
cfg.channels = 1;
out.begin(cfg);
}
void loop() {
// speach output
Serial.println("providing data...");
ntt.say(number);
number +=1;
delay(1000);
}

View File

@@ -0,0 +1,54 @@
/**
* @file streams-talkie-a2dp.ino
* @author Phil Schatzmann
* @copyright GPLv3
* Using TalkiePCM to generate audio to be sent to a Bluetooth Speaker
*/
#include "AudioTools.h"
#include "AudioTools/AudioCodecs/CodecMP3Helix.h"
#include "AudioTools/Communication/A2DPStream.h"
#include "TalkiePCM.h" // https://github.com/pschatzmann/TalkiePCM
#include "Vocab_US_Large.h"
const char* name = "LEXON MINO L"; // Replace with your device name
AudioInfo from(8000, 2, 16); // TTS
AudioInfo to(44100, 2, 16); // A2DP
A2DPStream a2dp;
FormatConverterStream out(a2dp);
// talkie is sumbmitting too many individual samples, so we buffer them
BufferedStream bs(1024, out);
TalkiePCM voice(bs, from.channels);
void setup() {
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
Serial.println("Starting...");
// setup conversion to provide stereo at 44100hz
out.begin(from, to);
// setup a2dp
auto cfg = a2dp.defaultConfig(TX_MODE);
cfg.name = name;
cfg.silence_on_nodata = true; // allow delays with silence
a2dp.begin(cfg);
a2dp.setVolume(0.3);
Serial.println("A2DP Started");
}
void loop() {
voice.say(sp2_DANGER);
voice.say(sp2_DANGER);
voice.say(sp2_RED);
voice.say(sp2_ALERT);
voice.say(sp2_MOTOR);
voice.say(sp2_IS);
voice.say(sp2_ON);
voice.say(sp2_FIRE);
bs.flush();
voice.silence(1000);
}

View File

@@ -0,0 +1,40 @@
/**
* @file streams-talkie-audiokit.ino
* We use the TalkiePCM TTS library to generate the audio
* You need to install https://github.com/pschatzmann/TalkiePCM
* @author Phil Schatzmann
* @copyright GPLv3
*/
#include "AudioTools.h"
#include "AudioTools/AudioLibs/AudioBoardStream.h" //https://github.com/pschatzmann/arduino-audio-driver
#include "TalkiePCM.h" // https://github.com/pschatzmann/TalkiePCM
#include "Vocab_US_Large.h"
const AudioInfo info(8000, 2, 16);
AudioBoardStream out(AudioKitEs8388V1); // Audio sink
//CsvOutput<int16_t> out(Serial);
TalkiePCM voice(out, info.channels);
void setup() {
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
// setup AudioKit
auto cfg = out.defaultConfig();
cfg.copyFrom(info);
out.begin(cfg);
Serial.println("Talking...");
}
void loop() {
voice.say(sp2_DANGER);
voice.say(sp2_DANGER);
voice.say(sp2_RED);
voice.say(sp2_ALERT);
voice.say(sp2_MOTOR);
voice.say(sp2_IS);
voice.say(sp2_ON);
voice.say(sp2_FIRE);
voice.silence(1000);
}

View File

@@ -0,0 +1,10 @@
# Using SAM Speach to Text
I am providing a simple sketch which generates sound data with the TTS text to speach engine.
You need to install https://github.com/pschatzmann/tts
In this demo we provide the result as I2SStream but you can easly replace with any other output stream.
## External DAC
for defails see the [Wiki](https://github.com/pschatzmann/arduino-audio-tools/wiki/External-DAC)

View File

@@ -0,0 +1,36 @@
/**
* @file streams-tts-i2s.ino
* You need to install https://github.com/pschatzmann/tts
* @author Phil Schatzmann
* @copyright GPLv3
*
*/
#include "AudioTools.h"
#include "TTS.h"
//#include "AudioTools/AudioLibs/AudioBoardStream.h"
I2SStream out; // Replace with desired class e.g. AudioBoardStream, AnalogAudioStream etc.
TTS tts = TTS(out);
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
// start data sink
TTSInfo info = TTS::getInfo();
auto cfg = out.defaultConfig();
cfg.sample_rate = info.sample_rate;
cfg.channels = info.channels;
cfg.bits_per_sample = info.bits_per_sample;
out.begin(cfg);
}
// Arduino loop
void loop() {
Serial.println("providing data...");
tts.sayText("Hallo, my name is Alice");
delay(5000);
}

View File

@@ -0,0 +1,5 @@
We generate generate a WAV from some text with the help of [rhasspy](https://rhasspy.readthedocs.io/en/latest/) which is running e.g. on a Rasperry Pi.
and output the result via I2S.
Further details can be fond in https://www.pschatzmann.ch/home/2021/06/23/text-to-speach-in-arduino-conclusions/

View File

@@ -0,0 +1,44 @@
/**
* @file streams-url_wav-serial.ino
* @author Phil Schatzmann
* @brief decode WAV stream from rhasspy url and output it on I2S
* @version 0.1
* @date 2021-96-25
*
* @copyright Copyright (c) 2021
*/
#include "AudioTools.h"
#include "AudioTools/Communication/AudioHttp.h"
// UrlStream -copy-> EncodedAudioStream -> I2S
URLStream url("ssid","password");
I2SStream i2s; // I2S stream
WAVDecoder decoder; // decode wav to pcm and send it to I2S
EncodedAudioStream out(&i2s, &decoder); // Decoder stream
StreamCopy copier(out, url); // copy in to out
void setup(){
Serial.begin(115200);
AudioToolsLogger.begin(Serial, AudioToolsLogLevel::Info);
// setup i2s
auto config = i2s.defaultConfig(TX_MODE);
config.sample_rate = 16000;
config.bits_per_sample = 32;
config.channels = 1;
i2s.begin(config);
// rhasspy
url.begin("http://192.168.1.37:12101/api/text-to-speech?play=false","audio/wav", POST, "text/plain","Hallo, my name is Alice");
}
void loop(){
if (decoder) {
copier.copy();
} else {
stop();
}
}