diff --git a/README.md b/README.md
index ec207b1a..3acb4ceb 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,10 @@ Currently Dicio answers questions about:
## Speech to text
Dicio uses [Vosk](https://github.com/alphacep/vosk-api/) as its speech to text (`STT`) engine. In order to be able to run on every phone small models are employed, weighing `~50MB`. The download from [here](https://alphacephei.com/vosk/models) starts automatically whenever needed, so the app language can be changed seamlessly.
+Dicio exports vosk as a speech-to-text service to the android system. Other apps can query this by different ways:
+- [Via an intent](https://developer.android.com/reference/android/speech/RecognizerIntent), which shows up a dicio UI for speech input. The result is then provided to the requesting app (automatically of after user agreed as set in dicio settings).
+- [From background](https://developer.android.com/reference/android/speech/SpeechRecognizer), if the requesting app has the record audio permission and dicio is set as speech input within settings -> apps -> default apps -> assistant (the exact path may vary depending on the Android version)
+- If you want to use it as a "speech keyboard" (IME), you currently still need an app which use the Android speech-to-text-service and provides an IME (e.g. [this one](https://github.com/Kaljurand/K6nele))
## Contributing
@@ -57,7 +61,6 @@ When contributing keep in mind that other people may have **needs** and **views
If you want to translate Dicio to a new language you have to follow these **steps**:
-
Translate the strings used inside the app via Weblate. If your language isn't already there, add it with tool -> start new translation.
-
diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index 4f8cafe7..82fc06f3 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -1,46 +1,51 @@
+ android:installLocation="auto" >
+
-
-
-
+
+
+
+
+
+
+ tools:ignore="GoogleAppIndexingWarning" >
+ android:windowSoftInputMode="stateUnspecified|adjustResize" >
-
@@ -50,28 +55,44 @@
android:name="com.android.systemui.action_assist_icon"
android:resource="@mipmap/ic_launcher" />
-
-
-
-
+ android:windowSoftInputMode="adjustResize" >
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/app/src/main/java/org/stypox/dicio/MainActivity.java b/app/src/main/java/org/stypox/dicio/MainActivity.java
index 0c1fe170..1f1b40b3 100644
--- a/app/src/main/java/org/stypox/dicio/MainActivity.java
+++ b/app/src/main/java/org/stypox/dicio/MainActivity.java
@@ -1,8 +1,5 @@
package org.stypox.dicio;
-import static android.Manifest.permission.RECORD_AUDIO;
-import static android.content.pm.PackageManager.PERMISSION_GRANTED;
-
import android.content.Intent;
import android.content.SharedPreferences;
import android.os.Bundle;
@@ -13,23 +10,16 @@
import android.widget.ProgressBar;
import android.widget.ScrollView;
-import androidx.annotation.NonNull;
-import androidx.annotation.Nullable;
-import androidx.appcompat.app.ActionBarDrawerToggle;
-import androidx.appcompat.widget.SearchView;
-import androidx.appcompat.widget.Toolbar;
-import androidx.core.app.ActivityCompat;
-import androidx.core.view.GravityCompat;
-import androidx.drawerlayout.widget.DrawerLayout;
-import androidx.preference.PreferenceManager;
-
import com.google.android.material.floatingactionbutton.ExtendedFloatingActionButton;
import com.google.android.material.navigation.NavigationView;
+import org.dicio.skill.output.GraphicalOutputDevice;
+import org.dicio.skill.output.SpeechOutputDevice;
import org.stypox.dicio.eval.SkillEvaluator;
import org.stypox.dicio.eval.SkillRanker;
import org.stypox.dicio.input.InputDevice;
import org.stypox.dicio.input.SpeechInputDevice;
+import org.stypox.dicio.input.AndroidSttServiceInputDevice;
import org.stypox.dicio.input.ToolbarInputDevice;
import org.stypox.dicio.input.VoskInputDevice;
import org.stypox.dicio.input.stt_service.SttServiceActivity;
@@ -42,8 +32,19 @@
import org.stypox.dicio.skills.SkillHandler;
import org.stypox.dicio.util.BaseActivity;
import org.stypox.dicio.util.PermissionUtils;
-import org.dicio.skill.output.GraphicalOutputDevice;
-import org.dicio.skill.output.SpeechOutputDevice;
+
+import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import androidx.appcompat.app.ActionBarDrawerToggle;
+import androidx.appcompat.widget.SearchView;
+import androidx.appcompat.widget.Toolbar;
+import androidx.core.app.ActivityCompat;
+import androidx.core.view.GravityCompat;
+import androidx.drawerlayout.widget.DrawerLayout;
+import androidx.preference.PreferenceManager;
+
+import static android.Manifest.permission.RECORD_AUDIO;
+import static android.content.pm.PackageManager.PERMISSION_GRANTED;
public class MainActivity extends BaseActivity
implements NavigationView.OnNavigationItemSelectedListener {
@@ -304,6 +305,11 @@ private InputDevice buildPrimaryInputDevice() {
.getString(getString(R.string.pref_key_input_method), "");
if (preference.equals(getString(R.string.pref_val_input_method_text))) {
return new ToolbarInputDevice();
+ } else if (preference.equals(getString(R.string.pref_val_input_method_systemStt))) {
+ //TODO make a hint/data privacy warning etc. in preference when this one is chosen that
+ // the speech dicio records is given to a third party app according to system
+ // settings
+ return new AndroidSttServiceInputDevice(this);
} else { // default
return new VoskInputDevice(this);
}
diff --git a/app/src/main/java/org/stypox/dicio/input/AndroidSttServiceInputDevice.java b/app/src/main/java/org/stypox/dicio/input/AndroidSttServiceInputDevice.java
new file mode 100644
index 00000000..8a15a55b
--- /dev/null
+++ b/app/src/main/java/org/stypox/dicio/input/AndroidSttServiceInputDevice.java
@@ -0,0 +1,248 @@
+package org.stypox.dicio.input;
+
+import android.app.Activity;
+import android.content.Intent;
+import android.os.Bundle;
+import android.speech.RecognizerIntent;
+import android.speech.SpeechRecognizer;
+import android.util.Log;
+import android.widget.Toast;
+
+import org.stypox.dicio.R;
+
+import java.util.ArrayList;
+
+import androidx.annotation.StringRes;
+import androidx.preference.PreferenceManager;
+
+import static org.stypox.dicio.util.StringUtils.isNullOrEmpty;
+
+public class AndroidSttServiceInputDevice extends SpeechInputDevice
+ implements android.speech.RecognitionListener {
+
+ public static final String TAG = AndroidSttServiceInputDevice.class.getSimpleName();
+ private Activity activity;
+
+ private boolean startListeningOnLoaded = false;
+
+ private SpeechRecognizer speechRecognizer;
+ private boolean currentlyListening = false;
+
+
+ /////////////////////
+ // Exposed methods //
+ /////////////////////
+
+ public AndroidSttServiceInputDevice(final Activity activity) {
+ this.activity = activity;
+ }
+
+ @Override
+ public void load() {
+ load(false); // the user did not press on a button, so manual=false
+ }
+
+ /**
+ * @param manual if this is true and the model is not already downloaded, do not start
+ * downloading it. See {@link #tryToGetInput(boolean)}.
+ */
+ protected void load(final boolean manual) {
+ if (speechRecognizer == null) {
+ onLoading();
+ speechRecognizer = getRecognizer();
+ speechRecognizer.setRecognitionListener(this);
+
+ if (startListeningOnLoaded) {
+ startListeningOnLoaded = false;
+ tryToGetInput(manual);
+ } else {
+ onInactive();
+ }
+ }
+ }
+
+ /**
+ * initializes the recognizers by calling the appropritate
+ * {@link SpeechRecognizer}.createSpeechRecognizer() . Default is system provided recognizer.
+ * Overwrite this in case you want to specify.
+ * @return the {@link SpeechRecognizer}
+ */
+ protected SpeechRecognizer getRecognizer() {
+ return SpeechRecognizer.createSpeechRecognizer(activity);
+ }
+
+ /**
+ * Override this to specify which Intent shall be used in
+ * {@link SpeechRecognizer}.startListening()
+ * @return the {@link Intent} according to {@link RecognizerIntent}
+ */
+ protected Intent getRecognizerIntent() {
+ final Intent i = new Intent();
+ i.putExtra(RecognizerIntent.EXTRA_LANGUAGE, PreferenceManager
+ .getDefaultSharedPreferences(activity)
+ .getString(activity.getString(R.string.pref_key_language), "en"));
+ return i;
+ }
+
+ @Override
+ public void cleanup() {
+ super.cleanup();
+ cancelGettingInput();
+
+ activity = null;
+ }
+
+ @Override
+ public synchronized void tryToGetInput(final boolean manual) {
+ if (speechRecognizer == null) {
+ startListeningOnLoaded = true;
+ load(manual); // not loaded before, retry
+ return; // recognizer not ready
+ }
+
+ super.tryToGetInput(manual);
+
+ Log.d(TAG, "starting recognizer");
+
+ onLoading();
+ speechRecognizer.startListening(getRecognizerIntent());
+ currentlyListening = true;
+ }
+
+ @Override
+ public void cancelGettingInput() {
+ if (speechRecognizer != null && currentlyListening) {
+ //call stoplistening only if it is running! Otherwise ERROR_CLIENT will be reported
+ speechRecognizer.cancel();
+ }
+ startListeningOnLoaded = false;
+ }
+
+ /////////////////////
+ // Other utilities //
+ /////////////////////
+
+ protected void asyncMakeToast(@StringRes final int message) {
+ activity.runOnUiThread(() ->
+ Toast.makeText(activity, activity.getString(message), Toast.LENGTH_SHORT).show());
+ }
+
+
+ ///////////////////////////
+ // Recognition Callbacks //
+ ///////////////////////////
+
+ @Override
+ public void onReadyForSpeech(final Bundle bundle) {
+ Log.d(TAG, "onReadyForSpeech");
+ onListening();
+ currentlyListening = true;
+ }
+
+ @Override
+ public void onBeginningOfSpeech() {
+ //no usecase for dicio
+ Log.d(TAG, "onBeginningOfSpeech");
+ }
+
+ @Override
+ public void onRmsChanged(final float v) {
+ //no usecase for dicio
+ Log.d(TAG, "onRmsChanged");
+ }
+
+ @Override
+ public void onBufferReceived(final byte[] bytes) {
+ //no usecase for dicio
+ Log.d(TAG, "onBufferReceived");
+ }
+
+ @Override
+ public void onEndOfSpeech() {
+ Log.d(TAG, "onEndOfSpeech");
+ currentlyListening = false;
+ onInactive();
+ }
+
+ @Override
+ public void onError(final int i) {
+ Log.d(TAG, "onError called with error code = " + i);
+ switch (i) {
+ case SpeechRecognizer.ERROR_AUDIO:
+ notifyError(new Throwable("ERROR_AUDIO"));
+ break;
+ case SpeechRecognizer.ERROR_CLIENT:
+ notifyError(new Throwable("ERROR_CLIENT"));
+ break;
+ case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
+ notifyError(new Throwable("ERROR_INSUFFICIENT_PERMISSIONS"));
+ break;
+ case SpeechRecognizer.ERROR_LANGUAGE_NOT_SUPPORTED:
+ notifyError(new Throwable("ERROR_LANGUAGE_NOT_SUPPORTED"));
+ break;
+ case SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE:
+ notifyError(new Throwable("ERROR_LANGUAGE_UNAVAILABLE"));
+ break;
+ case SpeechRecognizer.ERROR_NETWORK:
+ notifyError(new Throwable("ERROR_NETWORK"));
+ break;
+ case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
+ notifyError(new Throwable("ERROR_NETWORK_TIMEOUT"));
+ break;
+ case SpeechRecognizer.ERROR_NO_MATCH:
+ Log.d(TAG, "ERROR_NO_MATCH");
+ notifyNoInputReceived();
+ break;
+ case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
+ notifyError(new Throwable("ERROR_RECOGNIZER_BUSY"));
+ break;
+ case SpeechRecognizer.ERROR_SERVER:
+ notifyError(new Throwable("ERROR_SERVER"));
+ break;
+ case SpeechRecognizer.ERROR_SERVER_DISCONNECTED:
+ notifyError(new Throwable("ERROR_SERVER_DISCONNECTED"));
+ break;
+ case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
+ notifyError(new Throwable("ERROR_SPEECH_TIMEOUT"));
+ break;
+ case SpeechRecognizer.ERROR_TOO_MANY_REQUESTS:
+ notifyError(new Throwable("ERROR_TOO_MANY_REQUESTS"));
+ break;
+ default:
+ Log.w(TAG, "onError called with unexpected error code = " + i);
+ notifyError(new Throwable("Unexpected error code = " + i));
+ }
+ //reset views
+ onEndOfSpeech(); // e.g. Google does not send this after error like No_Match
+
+
+ }
+
+ @Override
+ public void onResults(final Bundle bundle) {
+ final ArrayList results = bundle.getStringArrayList(
+ SpeechRecognizer.RESULTS_RECOGNITION);
+ Log.d(TAG, "onResult called with s = " + results.toString());
+ notifyInputReceived(results);
+ }
+
+ @Override
+ public void onPartialResults(final Bundle bundle) {
+ final ArrayList results = bundle.getStringArrayList(
+ SpeechRecognizer.RESULTS_RECOGNITION);
+ Log.d(TAG, "onPartialResult called with s = " + results.toString());
+ final String partialInput = results.get(0);
+ if (!isNullOrEmpty(partialInput)) {
+ notifyPartialInputReceived(partialInput);
+ }
+ }
+
+ @Override
+ public void onEvent(final int i, final Bundle bundle) {
+ //android docs: "Reserved for adding future events"
+ Log.d(TAG, "onEvent");
+ }
+
+
+
+}
diff --git a/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java b/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
index eb5011fb..877a1648 100644
--- a/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
+++ b/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
@@ -1,60 +1,48 @@
package org.stypox.dicio.input;
-import static org.stypox.dicio.util.LocaleUtils.LocaleResolutionResult;
-import static org.stypox.dicio.util.LocaleUtils.UnsupportedLocaleException;
-import static org.stypox.dicio.util.LocaleUtils.resolveSupportedLocale;
-import static org.stypox.dicio.util.StringUtils.isNullOrEmpty;
-
import android.app.Activity;
import android.app.DownloadManager;
import android.content.BroadcastReceiver;
+import android.content.ComponentName;
import android.content.Context;
import android.content.Intent;
import android.content.IntentFilter;
import android.content.SharedPreferences;
import android.net.Uri;
+import android.speech.SpeechRecognizer;
import android.util.Log;
-import android.widget.Toast;
-
-import androidx.annotation.Nullable;
-import androidx.annotation.StringRes;
-import androidx.core.os.LocaleListCompat;
-import androidx.preference.PreferenceManager;
-import org.stypox.dicio.BuildConfig;
import org.stypox.dicio.R;
import org.stypox.dicio.Sections;
-import org.json.JSONException;
-import org.json.JSONObject;
-import org.vosk.LibVosk;
-import org.vosk.LogLevel;
-import org.vosk.Model;
-import org.vosk.Recognizer;
-import org.vosk.android.RecognitionListener;
-import org.vosk.android.SpeechService;
+import org.stypox.dicio.input.stt_service.SttService;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
+import androidx.annotation.Nullable;
+import androidx.core.os.LocaleListCompat;
+import androidx.preference.PreferenceManager;
import io.reactivex.rxjava3.android.schedulers.AndroidSchedulers;
import io.reactivex.rxjava3.core.Completable;
import io.reactivex.rxjava3.disposables.CompositeDisposable;
import io.reactivex.rxjava3.schedulers.Schedulers;
-public class VoskInputDevice extends SpeechInputDevice {
+import static org.stypox.dicio.util.LocaleUtils.LocaleResolutionResult;
+import static org.stypox.dicio.util.LocaleUtils.UnsupportedLocaleException;
+import static org.stypox.dicio.util.LocaleUtils.resolveSupportedLocale;
+
+public class VoskInputDevice extends AndroidSttServiceInputDevice {
public static final String TAG = VoskInputDevice.class.getSimpleName();
public static final String MODEL_PATH = "/vosk-model";
public static final String MODEL_ZIP_FILENAME = "model.zip";
- public static final float SAMPLE_RATE = 44100.0f;
/**
* All small models from Vosk
@@ -92,18 +80,13 @@ public class VoskInputDevice extends SpeechInputDevice {
private final CompositeDisposable disposables = new CompositeDisposable();
@Nullable private BroadcastReceiver downloadingBroadcastReceiver = null;
private Long currentModelDownloadId = null;
- @Nullable private SpeechService speechService = null;
-
- private boolean currentlyInitializingRecognizer = false;
- private boolean startListeningOnLoaded = false;
- private boolean currentlyListening = false;
-
/////////////////////
// Exposed methods //
/////////////////////
public VoskInputDevice(final Activity activity) {
+ super(activity);
this.activity = activity;
}
@@ -116,84 +99,71 @@ public void load() {
* @param manual if this is true and the model is not already downloaded, do not start
* downloading it. See {@link #tryToGetInput(boolean)}.
*/
- private void load(final boolean manual) {
- if (speechService == null && !currentlyInitializingRecognizer) {
- if (new File(getModelDirectory(), "ivector").exists()) {
- // one directory is in the correct place, so everything should be ok
- Log.d(TAG, "Vosk model in place");
-
- currentlyInitializingRecognizer = true;
- onLoading();
-
- disposables.add(Completable.fromAction(this::initializeRecognizer)
- .subscribeOn(Schedulers.io())
- .observeOn(AndroidSchedulers.mainThread())
- .subscribe(() -> {
- currentlyInitializingRecognizer = false;
- if (startListeningOnLoaded) {
- startListeningOnLoaded = false;
- tryToGetInput(manual);
- } else {
- onInactive();
- }
- }, throwable -> {
- currentlyInitializingRecognizer = false;
- if ("Failed to initialize recorder. Microphone might be already in use."
- .equals(throwable.getMessage())) {
- notifyError(new UnableToAccessMicrophoneException());
- } else {
- notifyError(throwable);
- }
- onInactive();
- }));
-
- } else {
- Log.d(TAG, "Vosk model not in place");
- final DownloadManager downloadManager =
- (DownloadManager) activity.getSystemService(Context.DOWNLOAD_SERVICE);
-
- if (currentModelDownloadId == null) {
- Log.d(TAG, "Vosk model is not already being downloaded");
-
- if (manual) {
- // the model needs to be downloaded and no download has already started;
- // the user manually triggered the input device, so he surely wants the
- // model to be downloaded, so we can proceed
- onLoading();
- try {
- final LocaleResolutionResult result = resolveSupportedLocale(
- LocaleListCompat.create(Sections.getCurrentLocale()),
- MODEL_URLS.keySet());
- startDownloadingModel(downloadManager, result.supportedLocaleString);
- } catch (final UnsupportedLocaleException e) {
- asyncMakeToast(R.string.vosk_model_unsupported_language);
- e.printStackTrace();
- onRequiresDownload();
- }
+ protected void load(final boolean manual) {
+ if (new File(getModelDirectory(), "ivector").exists()) {
+ // one directory is in the correct place, so everything should be ok
+ Log.d(TAG, "Vosk model in place");
+ super.load(manual);
+ } else {
+ Log.d(TAG, "Vosk model not in place");
+ final DownloadManager downloadManager =
+ (DownloadManager) activity.getSystemService(Context.DOWNLOAD_SERVICE);
- } else {
- // loading the model would require downloading it, but the user didn't
- // explicitly tell the voice recognizer to download files, so notify them
- // that a download is required
+ if (currentModelDownloadId == null) {
+ Log.d(TAG, "Vosk model is not already being downloaded");
+
+ if (manual) {
+ // the model needs to be downloaded and no download has already started;
+ // the user manually triggered the input device, so he surely wants the
+ // model to be downloaded, so we can proceed
+ onLoading();
+ try {
+ final LocaleResolutionResult result = resolveSupportedLocale(
+ LocaleListCompat.create(Sections.getCurrentLocale()),
+ MODEL_URLS.keySet());
+ startDownloadingModel(downloadManager, result.supportedLocaleString);
+ } catch (final UnsupportedLocaleException e) {
+ asyncMakeToast(R.string.vosk_model_unsupported_language);
+ e.printStackTrace();
onRequiresDownload();
}
} else {
- Log.d(TAG, "Vosk model already being downloaded: " + currentModelDownloadId);
+ // loading the model would require downloading it, but the user didn't
+ // explicitly tell the voice recognizer to download files, so notify them
+ // that a download is required
+ onRequiresDownload();
}
+
+ } else {
+ Log.d(TAG, "Vosk model already being downloaded: " + currentModelDownloadId);
}
}
}
+ @Override
+ protected SpeechRecognizer getRecognizer() {
+ SpeechRecognizer sr = SpeechRecognizer.createSpeechRecognizer(activity,
+ new ComponentName(activity, SttService.class));
+ //additionally call startService so that service is not directly destroyed after
+ //speech recognizer is unbound (especially important if SttServiceActivity is
+ // only called from other apps. If dicio app is closed, service is destroyed anyway,
+ // too. Avoid destroyin in order to avoid re-initialization of SpeechService
+ //(observed when manually closed - check if this happens too when closed by system
+ // due to inactivity)
+ //works also when battery optimization is enabled
+ //TODO check long term behaviour with and without battery optimization
+ //TODO check how to call startService if neither Dicio Main app nor
+ // Dicios SttServiceActivity is called but directly
+ // SpeechRecognizer.createSpeechRecognizer by a 3rd party app
+ activity.startService(new Intent(activity, SttService.class));
+ return sr;
+ }
+
@Override
public void cleanup() {
super.cleanup();
disposables.clear();
- if (speechService != null) {
- speechService.stop();
- speechService.shutdown();
- speechService = null;
- }
if (currentModelDownloadId != null) {
final DownloadManager downloadManager =
@@ -209,117 +179,6 @@ public void cleanup() {
activity = null;
}
- @Override
- public synchronized void tryToGetInput(final boolean manual) {
- if (currentlyInitializingRecognizer) {
- startListeningOnLoaded = true;
- return;
- } else if (speechService == null) {
- startListeningOnLoaded = true;
- load(manual); // not loaded before, retry
- return; // recognizer not ready
- }
-
- if (currentlyListening) {
- return;
- }
- currentlyListening = true;
- super.tryToGetInput(manual);
-
- Log.d(TAG, "starting recognizer");
-
- speechService.startListening(new RecognitionListener() {
-
- @Override
- public void onPartialResult(final String s) {
- Log.d(TAG, "onPartialResult called with s = " + s);
- if (!currentlyListening) {
- return;
- }
-
- String partialInput = null;
- try {
- partialInput = new JSONObject(s).getString("partial");
- } catch (final JSONException e) {
- e.printStackTrace();
- }
-
- if (!isNullOrEmpty(partialInput)) {
- notifyPartialInputReceived(partialInput);
- }
- }
-
- @Override
- public void onResult(final String s) {
- Log.d(TAG, "onResult called with s = " + s);
- if (!currentlyListening) {
- return;
- }
-
- stopRecognizer();
-
- final ArrayList inputs = new ArrayList<>();
- try {
- final JSONObject jsonResult = new JSONObject(s);
- final int size = jsonResult.getJSONArray("alternatives").length();
- for (int i = 0; i < size; i++) {
- final String text = jsonResult.getJSONArray("alternatives")
- .getJSONObject(i).getString("text");
- if (!isNullOrEmpty(text)) {
- inputs.add(text);
- }
- }
- } catch (final JSONException e) {
- e.printStackTrace();
- }
-
- if (inputs.isEmpty()) {
- notifyNoInputReceived();
- } else {
- notifyInputReceived(inputs);
- }
- }
-
- @Override
- public void onFinalResult(final String s) {
- Log.d(TAG, "onFinalResult called with s = " + s);
- // TODO
- }
-
- @Override
- public void onError(final Exception e) {
- Log.d(TAG, "onError called");
- stopRecognizer();
- notifyError(e);
- }
-
- @Override
- public void onTimeout() {
- Log.d(TAG, "onTimeout called");
- stopRecognizer();
- notifyNoInputReceived();
- }
- });
- onListening();
- }
-
- @Override
- public void cancelGettingInput() {
- if (currentlyListening) {
- if (speechService != null) {
- speechService.stop();
- }
- notifyNoInputReceived();
-
- // call onInactive() only if we really were listening, so that the SpeechInputDevice
- // state icon is preserved if something different from "microphone on" was being shown
- onInactive();
- }
-
- startListeningOnLoaded = false;
- currentlyListening = false;
- }
-
/**
* Deletes the Vosk model downloaded in the {@link Context#getFilesDir()} if it exists. It also
* stops any Vosk model download currently in progress based on the id stored in settings.
@@ -337,31 +196,6 @@ public static void deleteCurrentModel(final Context context) {
}
- ////////////////////
- // Initialization //
- ////////////////////
-
- private synchronized void initializeRecognizer() throws IOException {
- Log.d(TAG, "initializing recognizer");
-
- LibVosk.setLogLevel(BuildConfig.DEBUG ? LogLevel.DEBUG : LogLevel.WARNINGS);
- final Model model = new Model(getModelDirectory().getAbsolutePath());
- final Recognizer recognizer = new Recognizer(model, SAMPLE_RATE);
- recognizer.setMaxAlternatives(5);
- this.speechService = new SpeechService(recognizer, SAMPLE_RATE);
- }
-
- private void stopRecognizer() {
- currentlyListening = false;
-
- if (speechService != null) {
- speechService.stop();
- }
-
- onInactive();
- }
-
-
////////////////////
// Model download //
////////////////////
@@ -560,13 +394,4 @@ private void updateCurrentDownloadId(final Context context, final Long id) {
}
}
-
- /////////////////////
- // Other utilities //
- /////////////////////
-
- private void asyncMakeToast(@StringRes final int message) {
- activity.runOnUiThread(() ->
- Toast.makeText(activity, activity.getString(message), Toast.LENGTH_SHORT).show());
- }
}
diff --git a/app/src/main/java/org/stypox/dicio/input/stt_service/MakeSoundPreference.java b/app/src/main/java/org/stypox/dicio/input/stt_service/MakeSoundPreference.java
new file mode 100644
index 00000000..49799a1c
--- /dev/null
+++ b/app/src/main/java/org/stypox/dicio/input/stt_service/MakeSoundPreference.java
@@ -0,0 +1,94 @@
+package org.stypox.dicio.input.stt_service;
+
+import android.content.Context;
+import android.content.SharedPreferences;
+import android.preference.PreferenceManager;
+import android.util.AttributeSet;
+
+import org.stypox.dicio.R;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import androidx.preference.MultiSelectListPreference;
+
+/**
+ * a MultiSelectListPreference which uses R.string.pref_key_stt_onbegin_nosound_entries as entries
+ * and entry values
+ */
+public class MakeSoundPreference extends MultiSelectListPreference {
+ final SharedPreferences preferences;
+ final String helperPrefKey;
+ final String[] ownPackageName = new String[1];
+
+ public MakeSoundPreference(@NonNull final Context context, @Nullable final AttributeSet attrs,
+ final int defStyleAttr, final int defStyleRes) {
+ super(context, attrs, defStyleAttr, defStyleRes);
+ preferences = PreferenceManager.getDefaultSharedPreferences(context);
+ helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+ ownPackageName[0] = context.getPackageName();
+ }
+
+ public MakeSoundPreference(@NonNull final Context context, @Nullable final AttributeSet attrs,
+ final int defStyleAttr) {
+ super(context, attrs, defStyleAttr);
+ preferences = PreferenceManager.getDefaultSharedPreferences(context);
+ helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+ ownPackageName[0] = context.getPackageName();
+ }
+
+ public MakeSoundPreference(@NonNull final Context context, @Nullable final AttributeSet attrs) {
+ super(context, attrs);
+ preferences = PreferenceManager.getDefaultSharedPreferences(context);
+ helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+ ownPackageName[0] = context.getPackageName();
+ }
+
+ public MakeSoundPreference(@NonNull final Context context) {
+ super(context);
+ preferences = PreferenceManager.getDefaultSharedPreferences(context);
+ helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+ ownPackageName[0] = context.getPackageName();
+ }
+
+ @Override
+ public CharSequence[] getEntries() {
+ final Set entries = preferences.getStringSet(helperPrefKey,
+ new HashSet<>(Arrays.asList(ownPackageName)));
+ final String[] back = new String[entries.size()];
+ int i = 0;
+ for (final String e: entries) {
+ back[i] = e;
+ i++;
+ }
+ return back;
+ }
+
+ @Override
+ public CharSequence[] getEntryValues() {
+ return getEntries();
+ }
+ //
+// protected void runtimePopulateEntries(Context context){
+// final SharedPreferences settings = PreferenceManager.getDefaultSharedPreferences(context);
+// settings.getStringSet()
+// final List entries = new ArrayList<>(Arrays.asList(getEntries()));
+// final List entriesValues = new ArrayList<>(Arrays.asList(getEntries()));
+// setEntries(entries.toArray(new CharSequence[]{}));
+// setEntryValues(entriesValues.toArray(new CharSequence[]{}));
+// }
+//
+// public void addEntry(CharSequence newEntry) {
+// final Set entries = new HashSet<>(Arrays.asList(getEntries()));
+// entries.add(newEntry);
+// setEntries(entries.toArray(new CharSequence[]{}));
+// }
+// public void addEntryValue(CharSequence newEntry) {
+// final List entryValues = new ArrayList<>(Arrays.asList(getEntries()));
+// entryValues.add(newEntry);
+// setEntryValues(entryValues.toArray(new CharSequence[]{}));
+// }
+}
diff --git a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
new file mode 100644
index 00000000..60a65f6b
--- /dev/null
+++ b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
@@ -0,0 +1,532 @@
+package org.stypox.dicio.input.stt_service;
+
+import android.content.Intent;
+import android.content.SharedPreferences;
+import android.media.Ringtone;
+import android.media.RingtoneManager;
+import android.net.Uri;
+import android.os.Build;
+import android.os.Bundle;
+import android.os.RemoteException;
+import android.speech.RecognitionService;
+import android.speech.RecognizerIntent;
+import android.speech.SpeechRecognizer;
+import android.util.Log;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.stypox.dicio.BuildConfig;
+import org.stypox.dicio.R;
+import org.stypox.dicio.error.ErrorInfo;
+import org.stypox.dicio.error.ErrorUtils;
+import org.stypox.dicio.error.UserAction;
+import org.vosk.LibVosk;
+import org.vosk.LogLevel;
+import org.vosk.Model;
+import org.vosk.Recognizer;
+import org.vosk.android.SpeechService;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import androidx.annotation.Nullable;
+import androidx.preference.PreferenceManager;
+import io.reactivex.rxjava3.android.schedulers.AndroidSchedulers;
+import io.reactivex.rxjava3.core.Completable;
+import io.reactivex.rxjava3.disposables.CompositeDisposable;
+import io.reactivex.rxjava3.schedulers.Schedulers;
+
+import static org.stypox.dicio.util.StringUtils.isNullOrEmpty;
+
+public class SttService extends RecognitionService {
+ protected class RecognitionListener implements org.vosk.android.RecognitionListener {
+ private boolean firstPartialResultReceived = false;
+
+ @Override
+ public void onPartialResult(final String s) {
+ Log.d(TAG, "onPartialResult called with s = " + s);
+
+ String partialInput = null;
+ try {
+ partialInput = new JSONObject(s).getString("partial");
+ } catch (final JSONException e) {
+ e.printStackTrace();
+ }
+
+ if (!isNullOrEmpty(partialInput)) {
+ if (!firstPartialResultReceived) {
+ firstPartialResultReceived = true;
+ try {
+ callback.beginningOfSpeech();
+ } catch (final RemoteException e) {
+ logRemoteException(e);
+ }
+ }
+ final String[] partialInputArray = {partialInput};
+ final Bundle partResult = new Bundle();
+ partResult.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION,
+ new ArrayList<>(Arrays.asList(partialInputArray)));
+ try {
+ callback.partialResults(partResult);
+ } catch (final RemoteException e) {
+ logRemoteException(e);
+ }
+ }
+ }
+
+ @Override
+ public void onResult(final String s) {
+ Log.d(TAG, "onResult called with s = " + s);
+
+ stopRecognizer();
+
+ final ArrayList inputs = new ArrayList<>();
+ float[] confidences = null;
+ try {
+ final JSONObject jsonResult = new JSONObject(s);
+ final JSONArray alternatives = jsonResult.getJSONArray("alternatives");
+ int size = alternatives.length();
+ for (int i = 0; i < size; i++) {
+ final String text = alternatives.getJSONObject(i).getString("text");
+ if (!isNullOrEmpty(text)) {
+ inputs.add(text);
+ }
+ }
+ //final size may change if empty entries exist
+ size = inputs.size();
+ confidences = new float[size];
+ for (int i = 0; i < size; i++) {
+ confidences[i] = (float) alternatives.getJSONObject(i)
+ .getDouble("confidence");
+ }
+
+ } catch (final JSONException e) {
+ e.printStackTrace();
+ }
+
+ if (inputs.isEmpty()) {
+ callbackErrorReport(SpeechRecognizer.ERROR_NO_MATCH);
+ } else {
+ final Bundle results = new Bundle();
+ results.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION, inputs);
+ results.putFloatArray(SpeechRecognizer.CONFIDENCE_SCORES, confidences);
+ try {
+ callback.results(results);
+ } catch (final RemoteException e) {
+ logRemoteException(e);
+ }
+ }
+ }
+
+ @Override
+ public void onFinalResult(final String s) {
+ Log.d(TAG, "onFinalResult called with s = " + s);
+ firstPartialResultReceived = false; //reset for next input
+ try {
+ //only notify endOfSpeech because s is currently always empty - even if onResult
+ // was not empty before
+ callback.endOfSpeech();
+ } catch (final RemoteException e) {
+ logRemoteException(e);
+ }
+ }
+
+ @Override
+ public void onError(final Exception e) {
+ Log.e(TAG, "onError", e);
+ showErrorNotification(e);
+ stopRecognizer();
+ callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
+ }
+
+ @Override
+ public void onTimeout() {
+ Log.d(TAG, "onTimeout called");
+ stopRecognizer();
+ callbackErrorReport(SpeechRecognizer.ERROR_SPEECH_TIMEOUT);
+ }
+ }
+
+ /**
+ docs of SpeechService
+ ...
+ */
+ @Nullable
+ private SpeechService speechService = null;
+ private Model model;
+ private long modelDownloadDate;
+ private boolean currentlyInitializingRecognizer = false;
+ public static final String MODEL_PATH = "/vosk-model";
+ public static final String TAG = SttService.class.getSimpleName();
+ private final CompositeDisposable disposables = new CompositeDisposable();
+ public static final float SAMPLE_RATE = 44100.0f;
+ private boolean currentlyListening = false;
+ private boolean startListeningOnLoaded = false;
+ private boolean onStartCommandCalled = false;
+ private Intent lastRequestedIntent = null;
+ Callback callback;
+
+ @Override
+ public void onCreate() {
+ super.onCreate();
+ LibVosk.setLogLevel(BuildConfig.DEBUG ? LogLevel.DEBUG : LogLevel.WARNINGS);
+ initialize();
+ Log.d(TAG, "onCreate");
+ }
+
+ @Override
+ public int onStartCommand(final Intent intent, final int flags, final int startId) {
+ Log.d(TAG, "onStartCommand");
+ onStartCommandCalled = true;
+ return super.onStartCommand(intent, flags, startId);
+ }
+
+ @Override
+ public boolean onUnbind(final Intent intent) {
+ Log.d(TAG, "onUnbind");
+ return super.onUnbind(intent);
+ }
+
+
+ @Override
+ public void onRebind(final Intent intent) {
+ Log.d(TAG, "onRebind");
+ super.onRebind(intent);
+ }
+
+
+ @Override
+ public void onDestroy() {
+ Log.d(TAG, "onDestroy");
+ disposables.clear();
+ shutdownSpeechService();
+ super.onDestroy();
+ }
+
+ @Override
+ protected void onStartListening(final Intent intent, final Callback newCallback) {
+ Log.d(TAG, "onStartListening");
+ Log.d(TAG, "onStartCommand called is " + onStartCommandCalled);
+ this.callback = newCallback;
+ //Regarding check permission: Actually it seems this is already done by the system interface
+ // (reports SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) , but it is
+ // explicitly recommended in the SpeechRecognizer documentation. However the way it is in
+ // the docs does not work here due to API Level for requested calls (and since Audio
+ // Recorder is not directly implemented here but by vosk library)
+ // https://developer.android.com/reference/android/speech/RecognitionService
+ // However even if there is a way for app without permission, not a security issue since
+ // stt service notifies user when speech input is started
+ final SharedPreferences preferences = PreferenceManager.getDefaultSharedPreferences(this);
+ final boolean makeSound = preferences.getBoolean(
+ getString(R.string.pref_key_stt_onlisten_sound), true);
+ if (makeSound && android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.M) {
+ final String callingPackageName = getPackageManager().getPackagesForUid(
+ newCallback.getCallingUid())[0];
+ final Set exceptedPackages = preferences.getStringSet(
+ getString(R.string.pref_key_stt_sound_onlisten), new HashSet<>());
+ if (exceptedPackages.contains(callingPackageName)) {
+ Log.i(TAG, "Suppressed stt onbegin sound for package " + callingPackageName);
+ } else {
+ final Uri notification = RingtoneManager.getDefaultUri(
+ RingtoneManager.TYPE_NOTIFICATION);
+ final Ringtone r = RingtoneManager.getRingtone(this, notification);
+ r.play();
+ final Set knownPackages = preferences.getStringSet(
+ getString(R.string.pref_key_stt_onlisten_sound_entries), new HashSet<>());
+ if (!knownPackages.contains(callingPackageName)) {
+ //add to preference entries to offer to user whether it shall be excepted
+ final HashSet extendedKnownPackages = new HashSet<>(knownPackages);
+ extendedKnownPackages.add(callingPackageName);
+ preferences.edit().putStringSet(
+ getString(R.string.pref_key_stt_onlisten_sound_entries),
+ extendedKnownPackages)
+ .apply();
+ }
+ }
+ }
+ if (speechService != null && !recogIntentExtrasEquals(lastRequestedIntent, intent)) {
+ shutdownSpeechService();
+ if (intent.hasExtra(RecognizerIntent.EXTRA_LANGUAGE)) {
+ //check if language change is the reason
+ Log.d(TAG, "requested language = "
+ + intent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE));
+ if (!lastRequestedIntent.hasExtra(RecognizerIntent.EXTRA_LANGUAGE)
+ || !lastRequestedIntent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE)
+ .equals(intent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE))) {
+ //Since at the moment only one language at the time is supported, just check
+ // whether the downloaded model has changed. Otherwise use the language which
+ // is installed anyway
+ if (getModelDirectory().lastModified() != modelDownloadDate) {
+ Log.d(TAG, "model last modified " + getModelDirectory().lastModified());
+ Log.d(TAG, "model_download_date " + modelDownloadDate);
+ model = null; //forces reloading
+ shutdownSpeechService(); //forces reloading
+ }
+ }
+ }
+ }
+ lastRequestedIntent = intent;
+
+ tryToGetInput();
+
+ }
+
+ /**
+ * in order to identify whether a new recognizer has to be loaded or not
+ * @return true if all Extras, which are supported by this STT service, are equal
+ */
+ protected boolean recogIntentExtrasEquals(final Intent i1, final Intent i2) {
+ final Bundle ie1 = i1.getExtras();
+ final Bundle ie2 = i2.getExtras();
+ final String[] supportedExtras = {RecognizerIntent.EXTRA_LANGUAGE,
+ RecognizerIntent.EXTRA_MAX_RESULTS};
+ for (final String key: supportedExtras) {
+ final Object extra1 = ie1.get(key);
+ final Object extra2 = ie2.get(key);
+ //return false if they are not equal or one (but noth both) is null
+ if (extra1 != null) {
+ if (!extra1.equals(extra2)) {
+ return false;
+ }
+ } else if (extra2 != null) {
+ return false;
+ }
+ }
+ return true;
+
+ //TODO support Intent Extras if possible with vosk
+ // EXTRA_LANGUAGE / EXTRA_LANGUAGE_PREFERENCE / EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE
+ // Further Extras which may be interesting
+ // EXTRA_LANGUAGE_MODEL / LANGUAGE_MODEL_FREE_FORM / LANGUAGE_MODEL_WEB_SEARCH
+ // EXTRA_SEGMENTED_SESSION
+ // EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS /
+ // EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS
+ // EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS
+ // EXTRA_AUDIO_SOURCE / EXTRA_AUDIO_SOURCE_CHANNEL_COUNT /
+ // EXTRA_AUDIO_SOURCE_ENCODING / EXTRA_AUDIO_SOURCE_SAMPLING_RATE
+ // EXTRA_BIASING_STRINGS
+ // EXTRA_ENABLE_BIASING_DEVICE_CONTEXT
+ }
+
+
+ @Override
+ protected void onCancel(final Callback newCallback) {
+ Log.d(TAG, "onCancel");
+ stopRecognizer();
+ }
+
+ @Override
+ protected void onStopListening(final Callback newCallback) {
+ Log.d(TAG, "onStopListening");
+ if (currentlyListening) {
+ stopRecognizer();
+ }
+ }
+
+
+
+
+
+ private void initialize() {
+ if (speechService == null && !currentlyInitializingRecognizer) {
+ if (new File(getModelDirectory(), "ivector").exists()) {
+ // one directory is in the correct place, so everything should be ok
+ Log.d(TAG, "Vosk model in place");
+
+ currentlyInitializingRecognizer = true;
+
+ disposables.add(Completable.fromAction(this::loadModel)
+ .subscribeOn(Schedulers.io())
+ .observeOn(AndroidSchedulers.mainThread())
+ .subscribe(() -> {
+ currentlyInitializingRecognizer = false;
+ if (startListeningOnLoaded) {
+ startListeningOnLoaded = false;
+ tryToGetInput();
+ }
+ }, throwable -> {
+ currentlyInitializingRecognizer = false;
+ showErrorNotification(throwable);
+ }));
+
+ } else {
+ if (callback != null) {
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+ callbackErrorReport(SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE);
+ } else {
+ callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
+ }
+ }
+ showErrorNotification(
+ new Throwable(getString(R.string.vosk_model_unsupported_language)));
+ }
+ }
+ }
+ public synchronized void tryToGetInput() {
+ if (currentlyInitializingRecognizer) {
+ startListeningOnLoaded = true;
+ return;
+ } else if (model == null) {
+ Log.w(TAG, "tryToGetInput model==null");
+ initialize(); //try to load anew
+ startListeningOnLoaded = true;
+ return; // recognizer not ready
+ } else if (getModelDirectory().lastModified() != modelDownloadDate) {
+ //if model has changed / updated / etc...
+ Log.i(TAG, "model directory modified date changed - load it anew");
+ Log.d(TAG, "model last modified " + getModelDirectory().lastModified());
+ Log.d(TAG, "model_download_date " + modelDownloadDate);
+ model = null; //reset
+ shutdownSpeechService();
+ initialize(); //load new one
+ startListeningOnLoaded = true;
+ return; // recognizer not ready
+ } else if (speechService == null) {
+ try {
+ loadSpeechService();
+ } catch (final IOException e) {
+ if ("Failed to initialize recorder. Microphone might be already in use."
+ .equals(e.getMessage())) {
+ callbackErrorReport(SpeechRecognizer.ERROR_AUDIO);
+ } else {
+ Log.e(TAG, "load()->initializeRecognizer", e);
+ showErrorNotification(e);
+ callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
+ }
+ return;
+ }
+ }
+ //(only one client can be connected via system to speech recognizer (otherwise
+ // ERROR_BUSY seems to be reported) - check whether currently listening checks are
+ // necessary at all) - on the other hand they do not harm
+ if (currentlyListening) {
+ if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+ callbackErrorReport(SpeechRecognizer.ERROR_TOO_MANY_REQUESTS);
+ } else {
+ //more generic
+ callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
+ }
+ return;
+ }
+
+ currentlyListening = true;
+ Log.d(TAG, "starting recognizer");
+
+ speechService.startListening(new RecognitionListener());
+
+ try {
+ callback.readyForSpeech(null);
+ } catch (final RemoteException e) {
+ logRemoteException(e);
+ }
+ }
+
+ private void logRemoteException(final RemoteException e) {
+ Log.e(TAG, "Remote exception on callback information", e);
+ showErrorNotification(e);
+ }
+
+ /**
+ * wrapper for
+ * calling {@link RecognitionService.Callback#error(int)} and catches the remote exception
+ * @param errorType see {@link RecognitionService.Callback#error(int)}
+ */
+ protected void callbackErrorReport(final int errorType) {
+ try {
+ callback.error(errorType);
+ } catch (final RemoteException e) {
+ logRemoteException(e);
+ } catch (final NullPointerException e) {
+ showErrorNotification(e);
+ }
+ }
+
+ private File getModelDirectory() {
+ return new File(this.getFilesDir(), MODEL_PATH);
+ }
+
+ protected void showErrorNotification(final Throwable t) {
+ final ErrorInfo ei = new ErrorInfo(t, UserAction.STT_SERVICE_SPEECH_TO_TEXT);
+ ErrorUtils.createNotification(this, ei);
+ }
+
+
+ ////////////////////
+ // Vosk Initialization //
+ ////////////////////
+
+ /**
+ * load the vosk model. Most time consuming procedure of recognizer intitializiation
+ */
+ private synchronized void loadModel() {
+ Log.d(TAG, "load Model");
+ final long t0 = System.currentTimeMillis();
+ model = new Model(getModelDirectory().getAbsolutePath());
+ modelDownloadDate = getModelDirectory().lastModified();
+ final long t1 = (System.currentTimeMillis() - t0);
+ Log.i(TAG, "Loading Model takes " + t1 + " ms");
+ }
+
+ /**
+ * load the recognizer. call this if a intent with new parameters (compared to last one) is
+ * received
+ */
+ private void loadSpeechService() throws IOException {
+ if (speechService != null) {
+ //first shutdown the old one, if a new one is requested
+ shutdownSpeechService();
+ }
+
+ final long t0 = System.currentTimeMillis();
+ final Recognizer recognizer = new Recognizer(model, SAMPLE_RATE);
+ if (lastRequestedIntent != null) {
+ recognizer.setMaxAlternatives(
+ lastRequestedIntent.getIntExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 5));
+
+ }
+ this.speechService = new SpeechService(recognizer, SAMPLE_RATE);
+ Log.i(TAG, "Loading SpeechService takes " + (System.currentTimeMillis() - t0) + " ms");
+ }
+
+ /**
+ * only shut down speech service
+ * this still keeps the language model in cache for faster start of speech service
+ */
+ protected void shutdownSpeechService() {
+ if (speechService != null) {
+ stopRecognizer();
+ speechService.shutdown();
+ speechService = null;
+ }
+ }
+
+ /**
+ * save to call if
+ */
+ private void stopRecognizer() {
+ if (speechService != null) {
+ speechService.stop(); //does nothing if recognition is not active.
+//TODO test whether some devices need shutdown call everytime in order to / conflict with performens if yes
+// speechService.shutdown();
+// speechService = null;
+ } else if (currentlyListening) {
+ //(actually currentlyListening should never be true at this point-however does not harm)
+ //means SpeechRecognizer.startListening was called, but endOfSpeech not yet
+ // make sure to free resources so that speech recognizer is not supposed to be busy
+ try {
+ callback.endOfSpeech();
+ } catch (final RemoteException e) {
+ logRemoteException(e);
+ }
+ }
+ currentlyListening = false;
+
+
+ }
+}
diff --git a/app/src/main/java/org/stypox/dicio/settings/IOFragment.java b/app/src/main/java/org/stypox/dicio/settings/IOFragment.java
index b0c5e19b..2f50cc16 100644
--- a/app/src/main/java/org/stypox/dicio/settings/IOFragment.java
+++ b/app/src/main/java/org/stypox/dicio/settings/IOFragment.java
@@ -2,11 +2,11 @@
import android.os.Bundle;
-import androidx.preference.PreferenceFragmentCompat;
-
import org.stypox.dicio.R;
import org.stypox.dicio.input.VoskInputDevice;
+import androidx.preference.PreferenceFragmentCompat;
+
public class IOFragment extends PreferenceFragmentCompat {
@Override
public void onCreatePreferences(final Bundle savedInstanceState, final String rootKey) {
@@ -20,11 +20,12 @@ public void onCreatePreferences(final Bundle savedInstanceState, final String ro
}
return true;
});
- findPreference(getString(R.string.pref_key_input_method))
- .setOnPreferenceChangeListener((preference, newValue) -> {
- VoskInputDevice.deleteCurrentModel(requireContext());
- return true;
- });
+//TODO Discuss whether this is needed. At least for debugging commented
+// findPreference(getString(R.string.pref_key_input_method))
+// .setOnPreferenceChangeListener((preference, newValue) -> {
+// VoskInputDevice.deleteCurrentModel(requireContext());
+// return true;
+// });
}
@Override
diff --git a/app/src/main/res/values/arrays.xml b/app/src/main/res/values/arrays.xml
index 3fc982af..c220794a 100644
--- a/app/src/main/res/values/arrays.xml
+++ b/app/src/main/res/values/arrays.xml
@@ -39,10 +39,12 @@
- @string/pref_input_method_vosk
- @string/pref_input_method_text
+ - @string/pref_input_method_systemStt
- @string/pref_val_input_method_vosk
- @string/pref_val_input_method_text
+ - @string/pref_val_input_method_systemStt
diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml
index a25938f2..2557f429 100644
--- a/app/src/main/res/values/strings.xml
+++ b/app/src/main/res/values/strings.xml
@@ -53,6 +53,7 @@
Input method
Choose the service to use to talk to Dicio - %1$s
Text box
+ System provided text-to-speech-service (speech is handled outside Dicio)
Vosk offline speech recognition
Speech output method
Choose the service Dicio should use to talk to you - %1$s
@@ -68,9 +69,17 @@
DuckDuckGo
Default city
Set the city to use for weather when you do not explicitly say one. The current behaviour is to get the location from IP info.
- Directly send result of speech to text service
+ Directly send result (for STT with dicio UI)
Automatically send speech result to requesting app when listening finishes
Wait for manual confirmation before sending speech result to requesting app
+ Sound
+ Play a sound when speech input starts
+ Sound on speech input start is disabled
+ Exceptions from sound notification
+ Disable sound per known application
+ Choose which applications can request speech input without sound notifications. Requires at least Android 6 (Marshmallow).
+
+
The skill \"%1$s\" needs these permissions to work: %2$s
Could not evaluate your request
Network error
diff --git a/app/src/main/res/values/strings_keys.xml b/app/src/main/res/values/strings_keys.xml
index dcbab320..b18008f2 100644
--- a/app/src/main/res/values/strings_keys.xml
+++ b/app/src/main/res/values/strings_keys.xml
@@ -9,6 +9,7 @@
input_method
text
vosk
+ sytemStt
speech_output_method
android
@@ -25,4 +26,9 @@
weather_default_city
stt_auto_finish
+
+ pref_key_stt_onlisten_sound
+ pref_key_stt_sound_onlisten
+ pref_key_stt_onlisten_sound_entries
+
\ No newline at end of file
diff --git a/app/src/main/res/xml/pref_io.xml b/app/src/main/res/xml/pref_io.xml
index 66ea4efc..55a32408 100644
--- a/app/src/main/res/xml/pref_io.xml
+++ b/app/src/main/res/xml/pref_io.xml
@@ -27,13 +27,32 @@
android:key="@string/pref_key_speech_output_method"
android:summary="@string/pref_speech_output_method_summary"
android:title="@string/pref_speech_output_method" />
+
+
+
-
+
+
\ No newline at end of file
diff --git a/app/src/main/res/xml/stt_service_metadata.xml b/app/src/main/res/xml/stt_service_metadata.xml
new file mode 100644
index 00000000..776b6335
--- /dev/null
+++ b/app/src/main/res/xml/stt_service_metadata.xml
@@ -0,0 +1,5 @@
+
+
+
\ No newline at end of file