mirror of
https://github.com/andreytkachenko/LocalSTT.git
synced 2024-11-22 09:26:23 +04:00
Added DeepSpeech RecognitionService
This commit is contained in:
parent
ea2d551bed
commit
920f3468b2
2
.gitignore
vendored
2
.gitignore
vendored
@ -9,3 +9,5 @@ build
|
|||||||
|
|
||||||
# JDT-specific (Eclipse Java Development Tools)
|
# JDT-specific (Eclipse Java Development Tools)
|
||||||
.classpath
|
.classpath
|
||||||
|
|
||||||
|
.vscode
|
31
app/assets.xml
Normal file
31
app/assets.xml
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project name="assets">
|
||||||
|
<property name="assets.list.name" value="assets.lst"/>
|
||||||
|
<property name="assets.dir" value="src/main/assets/sync"/>
|
||||||
|
<property name="assets.hash.type" value="md5"/>
|
||||||
|
<property name="assets.ctl.files"
|
||||||
|
value="**/*.${assets.hash.type},${assets.list.name}"/>
|
||||||
|
|
||||||
|
<fileset id="assets" dir="${assets.dir}" excludes="${assets.ctl.files}"/>
|
||||||
|
|
||||||
|
<target name="clean_assets">
|
||||||
|
<delete>
|
||||||
|
<fileset dir="${assets.dir}" includes="${assets.ctl.files}"/>
|
||||||
|
</delete>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="list">
|
||||||
|
<pathconvert
|
||||||
|
dirsep="/" pathsep="${line.separator}"
|
||||||
|
refid="assets" property="asset.list">
|
||||||
|
<map from="${basedir}/${assets.dir}/" to=""/>
|
||||||
|
</pathconvert>
|
||||||
|
<echo message="${asset.list}" file="${assets.dir}/${assets.list.name}"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="checksum">
|
||||||
|
<checksum algorithm="${assets.hash.type}">
|
||||||
|
<fileset refid="assets"/>
|
||||||
|
</checksum>
|
||||||
|
</target>
|
||||||
|
</project>
|
@ -5,6 +5,9 @@ repositories {
|
|||||||
maven {
|
maven {
|
||||||
url "https://dl.bintray.com/alphacep/vosk"
|
url "https://dl.bintray.com/alphacep/vosk"
|
||||||
}
|
}
|
||||||
|
maven {
|
||||||
|
url "https://jitpack.io"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
android {
|
android {
|
||||||
@ -31,4 +34,10 @@ dependencies {
|
|||||||
implementation 'com.alphacep:vosk-android:0.3.15'
|
implementation 'com.alphacep:vosk-android:0.3.15'
|
||||||
implementation 'androidx.appcompat:appcompat:1.2.0'
|
implementation 'androidx.appcompat:appcompat:1.2.0'
|
||||||
implementation 'com.google.code.gson:gson:2.8.6'
|
implementation 'com.google.code.gson:gson:2.8.6'
|
||||||
|
implementation 'org.mozilla.deepspeech:libdeepspeech:0.8.2'
|
||||||
|
implementation 'com.github.gkonovalov:android-vad:1.0.0'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ant.importBuild 'assets.xml'
|
||||||
|
preBuild.dependsOn(list, checksum)
|
||||||
|
clean.dependsOn(clean_assets)
|
@ -27,5 +27,23 @@
|
|||||||
android:name="android.speech"
|
android:name="android.speech"
|
||||||
android:resource="@xml/recognition_service" />
|
android:resource="@xml/recognition_service" />
|
||||||
</service>
|
</service>
|
||||||
|
|
||||||
|
<service
|
||||||
|
android:name=".DeepSpeechRecognitionService"
|
||||||
|
android:icon="@drawable/ic_service_trigger"
|
||||||
|
android:label="@string/deepspeech_recognition_service"
|
||||||
|
android:permission="android.permission.RECORD_AUDIO">
|
||||||
|
<intent-filter>
|
||||||
|
|
||||||
|
<!-- The constant value is defined at RecognitionService.SERVICE_INTERFACE. -->
|
||||||
|
<action android:name="android.speech.RecognitionService" />
|
||||||
|
|
||||||
|
<category android:name="android.intent.category.DEFAULT" />
|
||||||
|
</intent-filter>
|
||||||
|
|
||||||
|
<meta-data
|
||||||
|
android:name="android.speech"
|
||||||
|
android:resource="@xml/recognition_service" />
|
||||||
|
</service>
|
||||||
</application>
|
</application>
|
||||||
</manifest>
|
</manifest>
|
26
app/src/main/assets/sync/assets.lst
Normal file
26
app/src/main/assets/sync/assets.lst
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
deepspeech-catala/kenlm.scorer
|
||||||
|
deepspeech-catala/model.tflite
|
||||||
|
vosk-catala/README
|
||||||
|
vosk-catala/am/final.mdl
|
||||||
|
vosk-catala/am/tree
|
||||||
|
vosk-catala/conf/mfcc.conf
|
||||||
|
vosk-catala/conf/model.conf
|
||||||
|
vosk-catala/graph/Gr.fst
|
||||||
|
vosk-catala/graph/HCLr.fst
|
||||||
|
vosk-catala/graph/disambig_tid.int
|
||||||
|
vosk-catala/graph/phones/align_lexicon.int
|
||||||
|
vosk-catala/graph/phones/align_lexicon.txt
|
||||||
|
vosk-catala/graph/phones/disambig.int
|
||||||
|
vosk-catala/graph/phones/disambig.txt
|
||||||
|
vosk-catala/graph/phones/optional_silence.csl
|
||||||
|
vosk-catala/graph/phones/optional_silence.int
|
||||||
|
vosk-catala/graph/phones/optional_silence.txt
|
||||||
|
vosk-catala/graph/phones/silence.csl
|
||||||
|
vosk-catala/graph/phones/word_boundary.int
|
||||||
|
vosk-catala/graph/phones/word_boundary.txt
|
||||||
|
vosk-catala/ivector/final.dubm
|
||||||
|
vosk-catala/ivector/final.ie
|
||||||
|
vosk-catala/ivector/final.mat
|
||||||
|
vosk-catala/ivector/global_cmvn.stats
|
||||||
|
vosk-catala/ivector/online_cmvn.conf
|
||||||
|
vosk-catala/ivector/splice.conf
|
BIN
app/src/main/assets/sync/deepspeech-catala/kenlm.scorer
Normal file
BIN
app/src/main/assets/sync/deepspeech-catala/kenlm.scorer
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
d562825f02f2ba36cbd0a75a17e84e8d
|
BIN
app/src/main/assets/sync/deepspeech-catala/model.tflite
Normal file
BIN
app/src/main/assets/sync/deepspeech-catala/model.tflite
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
735b1327dc3c00af256af64be33cbed3
|
1
app/src/main/assets/sync/vosk-catala/README.md5
Normal file
1
app/src/main/assets/sync/vosk-catala/README.md5
Normal file
@ -0,0 +1 @@
|
|||||||
|
f49442fa8c9e15bfbb6379c788b3104f
|
@ -0,0 +1,176 @@
|
|||||||
|
package cat.oreilly.localstt;
|
||||||
|
|
||||||
|
import android.content.Intent;
|
||||||
|
import android.os.AsyncTask;
|
||||||
|
import android.os.Bundle;
|
||||||
|
import android.os.RemoteException;
|
||||||
|
import android.speech.RecognitionService;
|
||||||
|
import android.util.Log;
|
||||||
|
import android.media.AudioFormat;
|
||||||
|
import android.media.AudioRecord;
|
||||||
|
import android.media.MediaRecorder;
|
||||||
|
|
||||||
|
import org.kaldi.Assets;
|
||||||
|
import org.kaldi.RecognitionListener;
|
||||||
|
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class DeepSpeechRecognitionService extends RecognitionService implements RecognitionListener {
|
||||||
|
private final static String TAG = DeepSpeechRecognitionService.class.getSimpleName();
|
||||||
|
private DeepSpeechModel model;
|
||||||
|
private DeepSpeechService speechService;
|
||||||
|
|
||||||
|
private RecognitionService.Callback mCallback;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onStartListening(Intent intent, Callback callback) {
|
||||||
|
mCallback = callback;
|
||||||
|
Log.i(TAG, "onStartListening");
|
||||||
|
runRecognizerSetup(intent);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onCancel(Callback callback) {
|
||||||
|
Log.i(TAG, "onCancel");
|
||||||
|
results(new Bundle(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onStopListening(Callback callback) {
|
||||||
|
Log.i(TAG, "onStopListening");
|
||||||
|
results(new Bundle(), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void runRecognizerSetup(final Intent intent) {
|
||||||
|
new AsyncTask<Void, Void, Exception>() {
|
||||||
|
@Override
|
||||||
|
protected Exception doInBackground(Void... params) {
|
||||||
|
try {
|
||||||
|
Assets assets = new Assets(DeepSpeechRecognitionService.this);
|
||||||
|
File assetDir = assets.syncAssets();
|
||||||
|
|
||||||
|
model = new DeepSpeechModel(assetDir.toString() + "/deepspeech-catala/model.tflite");
|
||||||
|
model.enableExternalScorer(assetDir.toString() + "/deepspeech-catala/kenlm.scorer");
|
||||||
|
|
||||||
|
setupRecognizer();
|
||||||
|
} catch (IOException e) {
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void onPostExecute(Exception result) {
|
||||||
|
if (result != null) {
|
||||||
|
Log.e(TAG, "Failed to init recognizer " + result);
|
||||||
|
error(android.speech.SpeechRecognizer.ERROR_CLIENT);
|
||||||
|
} else {
|
||||||
|
readyForSpeech(new Bundle());
|
||||||
|
beginningOfSpeech();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}.execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onDestroy() {
|
||||||
|
super.onDestroy();
|
||||||
|
|
||||||
|
if (speechService != null) {
|
||||||
|
speechService.cancel();
|
||||||
|
speechService.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupRecognizer() throws IOException {
|
||||||
|
try {
|
||||||
|
Log.i(TAG, "Setting up recognizer");
|
||||||
|
DeepSpeechService speechService = new DeepSpeechService(this.model, 16000.0f);
|
||||||
|
speechService.addListener(this);
|
||||||
|
speechService.startListening();
|
||||||
|
} catch (IOException e) {
|
||||||
|
Log.e(TAG, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void readyForSpeech(Bundle bundle) {
|
||||||
|
try {
|
||||||
|
mCallback.readyForSpeech(bundle);
|
||||||
|
} catch (RemoteException e) {
|
||||||
|
// empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void results(Bundle bundle, boolean isFinal) {
|
||||||
|
if (speechService != null) {
|
||||||
|
speechService.cancel();
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
if (isFinal) {
|
||||||
|
mCallback.results(bundle);
|
||||||
|
} else {
|
||||||
|
mCallback.partialResults(bundle);
|
||||||
|
}
|
||||||
|
} catch (RemoteException e) {
|
||||||
|
// empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Bundle createResultsBundle(String hypothesis) {
|
||||||
|
ArrayList<String> hypotheses = new ArrayList<>();
|
||||||
|
hypotheses.add(hypothesis);
|
||||||
|
Bundle bundle = new Bundle();
|
||||||
|
bundle.putStringArrayList(android.speech.SpeechRecognizer.RESULTS_RECOGNITION, hypotheses);
|
||||||
|
return bundle;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void beginningOfSpeech() {
|
||||||
|
try {
|
||||||
|
mCallback.beginningOfSpeech();
|
||||||
|
} catch (RemoteException e) {
|
||||||
|
// empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void error(int errorCode) {
|
||||||
|
speechService.cancel();
|
||||||
|
try {
|
||||||
|
mCallback.error(errorCode);
|
||||||
|
} catch (RemoteException e) {
|
||||||
|
// empty
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onResult(String hypothesis) {
|
||||||
|
if (hypothesis != null) {
|
||||||
|
Log.i(TAG, hypothesis);
|
||||||
|
results(createResultsBundle(hypothesis), true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onPartialResult(String hypothesis) {
|
||||||
|
if (hypothesis != null) {
|
||||||
|
Log.i(TAG, hypothesis);
|
||||||
|
results(createResultsBundle(hypothesis), false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onError(Exception e) {
|
||||||
|
Log.e(TAG, e.getMessage());
|
||||||
|
error(android.speech.SpeechRecognizer.ERROR_CLIENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTimeout() {
|
||||||
|
speechService.cancel();
|
||||||
|
speechService.startListening();
|
||||||
|
}
|
||||||
|
}
|
313
app/src/main/java/cat/oreilly/localstt/DeepSpeechService.java
Normal file
313
app/src/main/java/cat/oreilly/localstt/DeepSpeechService.java
Normal file
@ -0,0 +1,313 @@
|
|||||||
|
// Copyright 2019 Alpha Cephei Inc.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package cat.oreilly.localstt;
|
||||||
|
|
||||||
|
import static java.lang.String.format;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
import android.media.AudioFormat;
|
||||||
|
import android.media.AudioRecord;
|
||||||
|
import android.media.MediaRecorder.AudioSource;
|
||||||
|
import android.os.Handler;
|
||||||
|
import android.os.Looper;
|
||||||
|
import android.util.Log;
|
||||||
|
|
||||||
|
import org.kaldi.RecognitionListener;
|
||||||
|
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
|
||||||
|
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechStreamingState;
|
||||||
|
|
||||||
|
import com.konovalov.vad.Vad;
|
||||||
|
import com.konovalov.vad.VadConfig;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Service that records audio in a thread, passes it to a recognizer and emits
|
||||||
|
* recognition results. Recognition events are passed to a client using
|
||||||
|
* {@link RecognitionListener}
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class DeepSpeechService {
|
||||||
|
|
||||||
|
protected static final String TAG = DeepSpeechService.class.getSimpleName();
|
||||||
|
|
||||||
|
private final DeepSpeechModel model;
|
||||||
|
private final DeepSpeechStreamingState streamContext;
|
||||||
|
private final Vad vad;
|
||||||
|
|
||||||
|
private final int sampleRate;
|
||||||
|
private final static float BUFFER_SIZE_SECONDS = 0.4f;
|
||||||
|
private int bufferSize;
|
||||||
|
private final AudioRecord recorder;
|
||||||
|
|
||||||
|
private Thread recognizerThread;
|
||||||
|
|
||||||
|
private final Handler mainHandler = new Handler(Looper.getMainLooper());
|
||||||
|
|
||||||
|
private final Collection<RecognitionListener> listeners = new HashSet<RecognitionListener>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates speech service. Service holds the AudioRecord object, so you need to
|
||||||
|
* call {@link release} in order to properly finalize it.
|
||||||
|
*
|
||||||
|
* @throws IOException thrown if audio recorder can not be created for some
|
||||||
|
* reason.
|
||||||
|
*/
|
||||||
|
public DeepSpeechService(DeepSpeechModel model, float sampleRate) throws IOException {
|
||||||
|
this.model = model;
|
||||||
|
this.sampleRate = (int) sampleRate;
|
||||||
|
this.streamContext = model.createStream();
|
||||||
|
|
||||||
|
vad = new Vad(VadConfig.newBuilder().setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K)
|
||||||
|
.setFrameSize(VadConfig.FrameSize.FRAME_SIZE_480).setMode(VadConfig.Mode.NORMAL).build());
|
||||||
|
|
||||||
|
bufferSize = Math.round(this.sampleRate * BUFFER_SIZE_SECONDS);
|
||||||
|
recorder = new AudioRecord(AudioSource.VOICE_RECOGNITION, this.sampleRate, AudioFormat.CHANNEL_IN_MONO,
|
||||||
|
AudioFormat.ENCODING_PCM_16BIT, bufferSize * 2);
|
||||||
|
|
||||||
|
if (recorder.getState() == AudioRecord.STATE_UNINITIALIZED) {
|
||||||
|
recorder.release();
|
||||||
|
throw new IOException("Failed to initialize recorder. Microphone might be already in use.");
|
||||||
|
}
|
||||||
|
Log.i(TAG, "DeepSpeechService initialized");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds listener.
|
||||||
|
*/
|
||||||
|
public void addListener(RecognitionListener listener) {
|
||||||
|
synchronized (listeners) {
|
||||||
|
listeners.add(listener);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes listener.
|
||||||
|
*/
|
||||||
|
public void removeListener(RecognitionListener listener) {
|
||||||
|
synchronized (listeners) {
|
||||||
|
listeners.remove(listener);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts recognition. Does nothing if recognition is active.
|
||||||
|
*
|
||||||
|
* @return true if recognition was actually started
|
||||||
|
*/
|
||||||
|
public boolean startListening() {
|
||||||
|
if (null != recognizerThread)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
recognizerThread = new RecognizerThread();
|
||||||
|
recognizerThread.start();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts recognition. After specified timeout listening stops and the
|
||||||
|
* endOfSpeech signals about that. Does nothing if recognition is active.
|
||||||
|
*
|
||||||
|
* @timeout - timeout in milliseconds to listen.
|
||||||
|
*
|
||||||
|
* @return true if recognition was actually started
|
||||||
|
*/
|
||||||
|
public boolean startListening(int timeout) {
|
||||||
|
if (null != recognizerThread)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
recognizerThread = new RecognizerThread(timeout);
|
||||||
|
recognizerThread.start();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean stopRecognizerThread() {
|
||||||
|
if (null == recognizerThread)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
recognizerThread.interrupt();
|
||||||
|
recognizerThread.join();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
// Restore the interrupted status.
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizerThread = null;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops recognition. All listeners should receive final result if there is any.
|
||||||
|
* Does nothing if recognition is not active.
|
||||||
|
*
|
||||||
|
* @return true if recognition was actually stopped
|
||||||
|
*/
|
||||||
|
public boolean stop() {
|
||||||
|
boolean result = stopRecognizerThread();
|
||||||
|
if (result) {
|
||||||
|
mainHandler.post(new ResultEvent(model.finishStream(streamContext), true));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cancels recognition. Listeners do not receive final result. Does nothing if
|
||||||
|
* recognition is not active.
|
||||||
|
*
|
||||||
|
* @return true if recognition was actually canceled
|
||||||
|
*/
|
||||||
|
public boolean cancel() {
|
||||||
|
Log.d(TAG, "#cancel");
|
||||||
|
boolean result = stopRecognizerThread();
|
||||||
|
this.model.freeModel(); // Reset recognizer state
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shutdown the recognizer and release the recorder
|
||||||
|
*/
|
||||||
|
public void shutdown() {
|
||||||
|
Log.d(TAG, "#shutdown");
|
||||||
|
this.model.freeModel();
|
||||||
|
recorder.release();
|
||||||
|
}
|
||||||
|
|
||||||
|
private final class RecognizerThread extends Thread {
|
||||||
|
|
||||||
|
private int remainingSamples;
|
||||||
|
private int timeoutSamples;
|
||||||
|
private final static int NO_TIMEOUT = -1;
|
||||||
|
|
||||||
|
public RecognizerThread(int timeout) {
|
||||||
|
if (timeout != NO_TIMEOUT)
|
||||||
|
this.timeoutSamples = timeout * sampleRate / 1000;
|
||||||
|
else
|
||||||
|
this.timeoutSamples = NO_TIMEOUT;
|
||||||
|
this.remainingSamples = this.timeoutSamples;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RecognizerThread() {
|
||||||
|
this(NO_TIMEOUT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
Log.i(TAG, "Start Recording...");
|
||||||
|
|
||||||
|
vad.start();
|
||||||
|
recorder.startRecording();
|
||||||
|
if (recorder.getRecordingState() == AudioRecord.RECORDSTATE_STOPPED) {
|
||||||
|
recorder.stop();
|
||||||
|
IOException ioe = new IOException("Failed to start recording. Microphone might be already in use.");
|
||||||
|
mainHandler.post(new OnErrorEvent(ioe));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
short[] buffer = new short[bufferSize];
|
||||||
|
int nread = recorder.read(buffer, 0, buffer.length);
|
||||||
|
boolean speechDetected = false;
|
||||||
|
boolean feedAudio = true;
|
||||||
|
|
||||||
|
while (!interrupted() && ((timeoutSamples == NO_TIMEOUT) || (remainingSamples > 0)) && feedAudio) {
|
||||||
|
|
||||||
|
if (nread < 0) {
|
||||||
|
throw new RuntimeException("error reading audio buffer");
|
||||||
|
} else {
|
||||||
|
Log.i(TAG, "Feeding audio");
|
||||||
|
model.feedAudioContent(streamContext, buffer, nread);
|
||||||
|
boolean isSpeech = vad.isSpeech(buffer);
|
||||||
|
if (isSpeech) {
|
||||||
|
Log.d(TAG, "Speech detected");
|
||||||
|
speechDetected = true;
|
||||||
|
}
|
||||||
|
if (speechDetected && !isSpeech) {
|
||||||
|
Log.d(TAG, "Silence detected");
|
||||||
|
feedAudio = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (timeoutSamples != NO_TIMEOUT) {
|
||||||
|
remainingSamples = remainingSamples - nread;
|
||||||
|
}
|
||||||
|
nread = recorder.read(buffer, 0, buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
mainHandler.post(new ResultEvent(model.finishStream(streamContext), true));
|
||||||
|
|
||||||
|
recorder.stop();
|
||||||
|
vad.stop();
|
||||||
|
|
||||||
|
// Remove all pending notifications.
|
||||||
|
mainHandler.removeCallbacksAndMessages(null);
|
||||||
|
|
||||||
|
// If we met timeout signal that speech ended
|
||||||
|
if (timeoutSamples != NO_TIMEOUT && remainingSamples <= 0) {
|
||||||
|
mainHandler.post(new TimeoutEvent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private abstract class RecognitionEvent implements Runnable {
|
||||||
|
public void run() {
|
||||||
|
RecognitionListener[] emptyArray = new RecognitionListener[0];
|
||||||
|
for (RecognitionListener listener : listeners.toArray(emptyArray))
|
||||||
|
execute(listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract void execute(RecognitionListener listener);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class ResultEvent extends RecognitionEvent {
|
||||||
|
protected final String hypothesis;
|
||||||
|
private final boolean finalResult;
|
||||||
|
|
||||||
|
ResultEvent(String hypothesis, boolean finalResult) {
|
||||||
|
this.hypothesis = hypothesis;
|
||||||
|
this.finalResult = finalResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void execute(RecognitionListener listener) {
|
||||||
|
if (finalResult)
|
||||||
|
listener.onResult(hypothesis);
|
||||||
|
else
|
||||||
|
listener.onPartialResult(hypothesis);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class OnErrorEvent extends RecognitionEvent {
|
||||||
|
private final Exception exception;
|
||||||
|
|
||||||
|
OnErrorEvent(Exception exception) {
|
||||||
|
this.exception = exception;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void execute(RecognitionListener listener) {
|
||||||
|
listener.onError(exception);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class TimeoutEvent extends RecognitionEvent {
|
||||||
|
@Override
|
||||||
|
protected void execute(RecognitionListener listener) {
|
||||||
|
listener.onTimeout();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -21,7 +21,7 @@ import java.util.ArrayList;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public class VoskRecognitionService extends RecognitionService implements RecognitionListener {
|
public class VoskRecognitionService extends RecognitionService implements RecognitionListener {
|
||||||
private final static String TAG = VoskRecognitionService.class.getName();
|
private final static String TAG = VoskRecognitionService.class.getSimpleName();
|
||||||
private KaldiRecognizer recognizer;
|
private KaldiRecognizer recognizer;
|
||||||
private SpeechService speechService;
|
private SpeechService speechService;
|
||||||
private Model model;
|
private Model model;
|
||||||
@ -38,13 +38,13 @@ public class VoskRecognitionService extends RecognitionService implements Recogn
|
|||||||
@Override
|
@Override
|
||||||
protected void onCancel(Callback callback) {
|
protected void onCancel(Callback callback) {
|
||||||
Log.i(TAG, "onCancel");
|
Log.i(TAG, "onCancel");
|
||||||
results(new Bundle());
|
results(new Bundle(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void onStopListening(Callback callback) {
|
protected void onStopListening(Callback callback) {
|
||||||
Log.i(TAG, "onStopListening");
|
Log.i(TAG, "onStopListening");
|
||||||
results(new Bundle());
|
results(new Bundle(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void runRecognizerSetup(final Intent intent) {
|
private void runRecognizerSetup(final Intent intent) {
|
||||||
@ -108,10 +108,14 @@ public class VoskRecognitionService extends RecognitionService implements Recogn
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void results(Bundle bundle) {
|
private void results(Bundle bundle, boolean isFinal) {
|
||||||
speechService.cancel();
|
|
||||||
try {
|
try {
|
||||||
|
if (isFinal) {
|
||||||
|
speechService.cancel();
|
||||||
mCallback.results(bundle);
|
mCallback.results(bundle);
|
||||||
|
} else {
|
||||||
|
mCallback.partialResults(bundle);
|
||||||
|
}
|
||||||
} catch (RemoteException e) {
|
} catch (RemoteException e) {
|
||||||
// empty
|
// empty
|
||||||
}
|
}
|
||||||
@ -149,7 +153,7 @@ public class VoskRecognitionService extends RecognitionService implements Recogn
|
|||||||
Gson gson = new Gson();
|
Gson gson = new Gson();
|
||||||
Map<String, String> map = gson.fromJson(hypothesis, Map.class);
|
Map<String, String> map = gson.fromJson(hypothesis, Map.class);
|
||||||
String text = map.get("text");
|
String text = map.get("text");
|
||||||
results(createResultsBundle(text));
|
results(createResultsBundle(text), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -160,7 +164,7 @@ public class VoskRecognitionService extends RecognitionService implements Recogn
|
|||||||
Gson gson = new Gson();
|
Gson gson = new Gson();
|
||||||
Map<String, String> map = gson.fromJson(hypothesis, Map.class);
|
Map<String, String> map = gson.fromJson(hypothesis, Map.class);
|
||||||
String text = map.get("partial");
|
String text = map.get("partial");
|
||||||
results(createResultsBundle(text));
|
results(createResultsBundle(text), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user