diff --git a/src/main/java/example/ExampleBot.java b/src/main/java/example/ExampleBot.java index 56414fd..f0bfd05 100644 --- a/src/main/java/example/ExampleBot.java +++ b/src/main/java/example/ExampleBot.java @@ -32,7 +32,7 @@ public ExampleBot() { // Windows (with closed captioning instead of wake detection) cord = VocalCord.newConfig(this).withClosedCaptioning().withTTS(SsmlVoiceGender.MALE, - true).build(); + false).build(); // Linux (using WSL) // cord = VocalCord.newConfig(this).withWakeDetection("/mnt/c/Users/wdavi/IdeaProjects/VocalCord/native/linux/libjni_porcupine.so", diff --git a/src/main/java/vocalcord/TTSEngine.java b/src/main/java/vocalcord/TTSEngine.java index e1b8553..9dee9af 100644 --- a/src/main/java/vocalcord/TTSEngine.java +++ b/src/main/java/vocalcord/TTSEngine.java @@ -48,8 +48,10 @@ byte[] tts(String text) throws Exception { byte[] pcm = audioContents.toByteArray(); // Three things need to happen - big endian, stereo, pad to a multiple of 3840 - byte[] converted = new byte[pcm.length * 2 + (AUDIO_FRAME - pcm.length * 2 % AUDIO_FRAME)]; // ensures converted is a multiple of AUDIO_FRAME - for(int i = 0; i < pcm.length; i += 2) { + // Add a frame of silence at the beginning so that the sound doesn't clip weirdly + byte[] converted = new byte[AUDIO_FRAME + pcm.length * 2 + (AUDIO_FRAME - pcm.length * 2 % AUDIO_FRAME)]; + // ensures converted is a multiple of AUDIO_FRAME + for(int i = AUDIO_FRAME; i < pcm.length; i += 2) { short reversed = Short.reverseBytes((short) ((pcm[i] << 8) | (pcm[i + 1] & 0xFF))); byte low = (byte) (reversed >> 8); byte high = (byte) (reversed & 0x00FF); @@ -66,6 +68,8 @@ byte[] tts(String text) throws Exception { } void say(String phrase) throws Exception { + this.index = Integer.MAX_VALUE; + if(ttsCache != null) { TTSCache.CacheResponse response = ttsCache.checkCache(phrase);