Remove DeepSpeechService and replace Catalan to Russian

This commit is contained in:
Andrey Tkachenko 2021-11-22 23:41:46 +04:00
parent e90a92e366
commit f3dadb94d6
83 changed files with 133 additions and 189656 deletions

View File

@ -12,6 +12,9 @@ repositories {
}
android {
lintOptions {
abortOnError false
}
compileSdkVersion 30
defaultConfig {
applicationId "cat.oreilly.localstt"
@ -36,10 +39,9 @@ dependencies {
implementation 'androidx.appcompat:appcompat:1.2.0'
implementation 'net.java.dev.jna:jna:5.8.0@aar'
implementation 'com.google.code.gson:gson:2.8.7'
implementation 'org.mozilla.deepspeech:libdeepspeech:0.8.2'
implementation 'com.github.gkonovalov:android-vad:1.0.0'
}
ant.importBuild 'assets.xml'
preBuild.dependsOn(list, checksum)
clean.dependsOn(clean_assets)
clean.dependsOn(clean_assets)

View File

@ -102,23 +102,5 @@
android:name="android.speech"
android:resource="@xml/recognition_service" />
</service>
<service
android:name=".DeepSpeechRecognitionService"
android:icon="@drawable/ic_service_trigger"
android:label="@string/deepspeech_recognition_service"
android:permission="android.permission.RECORD_AUDIO">
<intent-filter>
<!-- The constant value is defined at RecognitionService.SERVICE_INTERFACE. -->
<action android:name="android.speech.RecognitionService" />
<category android:name="android.intent.category.DEFAULT" />
</intent-filter>
<meta-data
android:name="android.speech"
android:resource="@xml/recognition_service" />
</service>
</application>
</manifest>
</manifest>

View File

@ -1,26 +1,14 @@
deepspeech-catala/kenlm.scorer
deepspeech-catala/model.tflite
vosk-catala/README
vosk-catala/am/final.mdl
vosk-catala/am/tree
vosk-catala/conf/mfcc.conf
vosk-catala/conf/model.conf
vosk-catala/graph/Gr.fst
vosk-catala/graph/HCLr.fst
vosk-catala/graph/disambig_tid.int
vosk-catala/graph/phones/align_lexicon.int
vosk-catala/graph/phones/align_lexicon.txt
vosk-catala/graph/phones/disambig.int
vosk-catala/graph/phones/disambig.txt
vosk-catala/graph/phones/optional_silence.csl
vosk-catala/graph/phones/optional_silence.int
vosk-catala/graph/phones/optional_silence.txt
vosk-catala/graph/phones/silence.csl
vosk-catala/graph/phones/word_boundary.int
vosk-catala/graph/phones/word_boundary.txt
vosk-catala/ivector/final.dubm
vosk-catala/ivector/final.ie
vosk-catala/ivector/final.mat
vosk-catala/ivector/global_cmvn.stats
vosk-catala/ivector/online_cmvn.conf
vosk-catala/ivector/splice.conf
vosk-model-small-ru-0.22/README
vosk-model-small-ru-0.22/am/final.mdl
vosk-model-small-ru-0.22/conf/mfcc.conf
vosk-model-small-ru-0.22/conf/model.conf
vosk-model-small-ru-0.22/graph/Gr.fst
vosk-model-small-ru-0.22/graph/HCLr.fst
vosk-model-small-ru-0.22/graph/disambig_tid.int
vosk-model-small-ru-0.22/graph/phones/word_boundary.int
vosk-model-small-ru-0.22/ivector/final.dubm
vosk-model-small-ru-0.22/ivector/final.ie
vosk-model-small-ru-0.22/ivector/final.mat
vosk-model-small-ru-0.22/ivector/global_cmvn.stats
vosk-model-small-ru-0.22/ivector/online_cmvn.conf
vosk-model-small-ru-0.22/ivector/splice.conf

View File

@ -1 +0,0 @@
d562825f02f2ba36cbd0a75a17e84e8d

View File

@ -1 +0,0 @@
735b1327dc3c00af256af64be33cbed3

Binary file not shown.

View File

@ -1 +0,0 @@
Catalan model for android

View File

@ -1 +0,0 @@
f49442fa8c9e15bfbb6379c788b3104f

View File

@ -1 +0,0 @@
0b98b3c582e789693996799f0f434008

View File

@ -1 +0,0 @@
baeff4e70c13f9a61e1987f4abc2f827

View File

@ -1,5 +0,0 @@
--use-energy=false
--num-mel-bins=20
--num-ceps=20
--low-freq=20
--high-freq=7600

View File

@ -1 +0,0 @@
e44d88fe84f60e0926bba88e46e556fd

View File

@ -1 +0,0 @@
702de9c65b1d27a709fb185046afc07f

View File

@ -1 +0,0 @@
494db51b272c42cb251a9831de70b4ee

View File

@ -1,13 +0,0 @@
7935
7936
7937
7938
7939
7940
7941
7942
7943
7944
7945
7946
7947

View File

@ -1 +0,0 @@
974869565e76c84c27f43558398e9531

View File

@ -1 +0,0 @@
d0cb5df30f5d30aa107468f200e793fc

View File

@ -1 +0,0 @@
0aeef7cac1b1be79377d4dc853ad0ad4

View File

@ -1,13 +0,0 @@
163
164
165
166
167
168
169
170
171
172
173
174
175

View File

@ -1 +0,0 @@
011b6746d416fdfb6e1e39732c7e174c

View File

@ -1,13 +0,0 @@
#0
#1
#2
#3
#4
#5
#6
#7
#8
#9
#10
#11
#12

View File

@ -1 +0,0 @@
b7494f81971a69678d4bab9994b72bc6

View File

@ -1 +0,0 @@
b026324c6904b2a9cb4b88d6d61c81d1

View File

@ -1 +0,0 @@
b026324c6904b2a9cb4b88d6d61c81d1

View File

@ -1 +0,0 @@
afb1c6c9240586b23a567660a6a3e0b3

View File

@ -1 +0,0 @@
1:2:3:4:5:6:7:8:9:10

View File

@ -1 +0,0 @@
2b78ed8a7acaa3f55a698ae07520bd7d

View File

@ -1 +0,0 @@
e4cd9b8790e4aa28441b87668c9ec025

View File

@ -1,162 +0,0 @@
SIL nonword
SIL_B begin
SIL_E end
SIL_I internal
SIL_S singleton
GBG nonword
GBG_B begin
GBG_E end
GBG_I internal
GBG_S singleton
a_B begin
a_E end
a_I internal
a_S singleton
ae_B begin
ae_E end
ae_I internal
ae_S singleton
ao_B begin
ao_E end
ao_I internal
ao_S singleton
b_B begin
b_E end
b_I internal
b_S singleton
bv_B begin
bv_E end
bv_I internal
bv_S singleton
c_B begin
c_E end
c_I internal
c_S singleton
ch_B begin
ch_E end
ch_I internal
ch_S singleton
d_B begin
d_E end
d_I internal
d_S singleton
dh_B begin
dh_E end
dh_I internal
dh_S singleton
e_B begin
e_E end
e_I internal
e_S singleton
ea_B begin
ea_E end
ea_I internal
ea_S singleton
ee_B begin
ee_E end
ee_I internal
ee_S singleton
f_B begin
f_E end
f_I internal
f_S singleton
g_B begin
g_E end
g_I internal
g_S singleton
gh_B begin
gh_E end
gh_I internal
gh_S singleton
i_B begin
i_E end
i_I internal
i_S singleton
j_B begin
j_E end
j_I internal
j_S singleton
k_B begin
k_E end
k_I internal
k_S singleton
l_B begin
l_E end
l_I internal
l_S singleton
ly_B begin
ly_E end
ly_I internal
ly_S singleton
m_B begin
m_E end
m_I internal
m_S singleton
n_B begin
n_E end
n_I internal
n_S singleton
ng_B begin
ng_E end
ng_I internal
ng_S singleton
ny_B begin
ny_E end
ny_I internal
ny_S singleton
o_B begin
o_E end
o_I internal
o_S singleton
p_B begin
p_E end
p_I internal
p_S singleton
r_B begin
r_E end
r_I internal
r_S singleton
rr_B begin
rr_E end
rr_I internal
rr_S singleton
s_B begin
s_E end
s_I internal
s_S singleton
sh_B begin
sh_E end
sh_I internal
sh_S singleton
t_B begin
t_E end
t_I internal
t_S singleton
ts_B begin
ts_E end
ts_I internal
ts_S singleton
u_B begin
u_E end
u_I internal
u_S singleton
uo_B begin
uo_E end
uo_I internal
uo_S singleton
v_B begin
v_E end
v_I internal
v_S singleton
w_B begin
w_E end
w_I internal
w_S singleton
y_B begin
y_E end
y_I internal
y_S singleton
z_B begin
z_E end
z_I internal
z_S singleton

View File

@ -1 +0,0 @@
9cdad8b3dbf2b6a314606de415aa2675

View File

@ -1 +0,0 @@
ec500c106381011668d6cd54bcb7188d

View File

@ -1 +0,0 @@
f200fec86080b21ae28eaac316217121

View File

@ -1 +0,0 @@
56f0cffbb086ad6e1d6d113b45699e1c

View File

@ -1,3 +0,0 @@
[
2.474308e+10 -1.161169e+09 -1.528423e+09 6.994948e+08 -2.601561e+09 -2.680039e+09 -3.566892e+09 -1.76595e+09 -1.415956e+09 1.758065e+08 -6.921373e+08 -1.202024e+08 -9.632083e+08 -9112779 -7.154146e+08 -3.029829e+08 -3.775555e+08 -7.510038e+07 -1.596595e+08 -2.124697e+07 3.384748e+08
1.910946e+12 1.188498e+11 8.577389e+10 1.048029e+11 1.063933e+11 1.20909e+11 1.355464e+11 9.016697e+10 9.036823e+10 7.144695e+10 6.582649e+10 5.484431e+10 4.500184e+10 3.308144e+10 2.427049e+10 1.593414e+10 1.093878e+10 6.789256e+09 4.383741e+09 2.393284e+09 0 ]

View File

@ -1 +0,0 @@
d85f5d6b22be9ec2cdce71ab88f2b048

View File

@ -0,0 +1,8 @@
Small Russian model for Vosk (Android, RPi, other small devices)
%WER 22.71 [ 9092 / 40042, 1124 ins, 1536 del, 6432 sub ] exp/chain_a/tdnn/decode_test_audiobooks_look_fast/wer_10_0.0
%WER 11.79 [ 5940 / 50394, 894 ins, 832 del, 4214 sub ] exp/chain_a/tdnn/decode_test_golos_crowd_look_fast/wer_11_0.0
%WER 21.34 [ 1789 / 8382, 173 ins, 440 del, 1176 sub ] exp/chain_a/tdnn/decode_test_golos_farfield_look_fast/wer_10_0.0
%WER 29.89 [ 5579 / 18666, 476 ins, 1550 del, 3553 sub ] exp/chain_a/tdnn/decode_test_sova_devices_look_fast/wer_10_0.0
%WER 31.97 [ 13588 / 42496, 1013 ins, 3640 del, 8935 sub ] exp/chain_a/tdnn/decode_test_youtube_look_fast/wer_9_0.0

View File

@ -0,0 +1 @@
e9be39aa30bfad71b8323b5b2fa91318

View File

@ -0,0 +1 @@
f8f35f33ca26e6315d7006a174593346

View File

@ -0,0 +1,7 @@
--sample-frequency=16000
--use-energy=false
--num-mel-bins=40
--num-ceps=40
--low-freq=20
--high-freq=7600
--allow-downsample=true

View File

@ -0,0 +1 @@
84a568eda381f44519975996aa86d8fe

View File

@ -0,0 +1 @@
cd7ae127a696ec4b1ac133702be98430

View File

@ -0,0 +1 @@
889b2452891887f0629f1828b23da682

View File

@ -0,0 +1,5 @@
9855
9856
9857
9858
9859

View File

@ -0,0 +1 @@
bb3be49f6b0acf9eac46238616a34837

View File

@ -160,3 +160,43 @@
160 end
161 internal
162 singleton
163 begin
164 end
165 internal
166 singleton
167 begin
168 end
169 internal
170 singleton
171 begin
172 end
173 internal
174 singleton
175 begin
176 end
177 internal
178 singleton
179 begin
180 end
181 internal
182 singleton
183 begin
184 end
185 internal
186 singleton
187 begin
188 end
189 internal
190 singleton
191 begin
192 end
193 internal
194 singleton
195 begin
196 end
197 internal
198 singleton
199 begin
200 end
201 internal
202 singleton

View File

@ -0,0 +1 @@
4472ca1d33a2efde57c8460501aba308

View File

@ -0,0 +1 @@
1ef0f4a8dbae2bcdecf064573f758f22

View File

@ -0,0 +1 @@
543d1b8f67dfed41de3def910fb384cc

View File

@ -0,0 +1 @@
10372e791b3eda1966f35fc615e81408

View File

@ -0,0 +1,3 @@
[
8.330133e+10 -4.600894e+09 -2.394861e+09 2.127165e+09 -9.355799e+09 -9.378007e+09 -1.302309e+10 -9.460417e+09 -9.260028e+09 -4.58608e+09 -5.287111e+09 -1.972033e+09 -6.090821e+09 -1.336419e+09 -5.214569e+09 -2.321841e+09 -3.889789e+09 -1.060202e+09 -2.065653e+09 -2.684904e+08 -7.4007e+08 -4587485 -1.315853e+08 -8597548 2.599227e+08 7.408538e+07 5.505751e+08 -1.161846e+07 5.138103e+08 -1.828159e+08 4.251498e+08 -2.901496e+07 6.469246e+08 2.489644e+08 6.289868e+08 2.490337e+08 3.38884e+08 -1.788837e+08 -2.536016e+08 -1.591728e+08 8.388078e+08
8.660994e+12 4.637783e+11 3.366465e+11 4.467952e+11 5.094759e+11 5.179353e+11 6.145244e+11 4.970492e+11 5.014889e+11 4.027981e+11 3.937422e+11 3.602942e+11 3.162307e+11 2.40687e+11 2.267307e+11 1.563018e+11 1.341105e+11 8.535779e+10 6.12398e+10 3.207774e+10 1.737325e+10 5.704115e+09 7.980573e+08 2.168777e+08 2.763352e+09 6.859176e+09 1.214891e+10 1.604714e+10 2.005353e+10 2.240119e+10 2.366007e+10 2.300222e+10 2.406182e+10 2.354406e+10 2.098983e+10 1.619869e+10 1.491578e+10 1.224871e+10 9.502735e+09 6.517532e+09 0 ]

View File

@ -0,0 +1 @@
df436bf906c4b0dc3716d2b5142a5c77

View File

@ -1,194 +0,0 @@
// Copyright 2020 Ciaran O'Reilly
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package cat.oreilly.localstt;
import android.content.Intent;
import android.os.Bundle;
import android.os.RemoteException;
import android.os.Handler;
import android.os.Looper;
import android.speech.RecognitionService;
import android.util.Log;
import org.vosk.android.RecognitionListener;
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
import java.io.File;
import java.util.ArrayList;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.io.IOException;
public class DeepSpeechRecognitionService extends RecognitionService implements RecognitionListener {
private final static String TAG = DeepSpeechRecognitionService.class.getSimpleName();
private final Handler handler = new Handler(Looper.getMainLooper());
private final Executor executor = Executors.newSingleThreadExecutor();
private DeepSpeechModel model;
private DeepSpeechService speechService;
private RecognitionService.Callback mCallback;
@Override
protected void onStartListening(Intent intent, Callback callback) {
mCallback = callback;
Log.i(TAG, "onStartListening");
runRecognizerSetup();
}
@Override
protected void onCancel(Callback callback) {
Log.i(TAG, "onCancel");
results(new Bundle(), true);
}
@Override
protected void onStopListening(Callback callback) {
Log.i(TAG, "onStopListening");
results(new Bundle(), true);
}
private void runRecognizerSetup() {
executor.execute(new Runnable() {
@Override
public void run() {
try {
Assets assets = new Assets(DeepSpeechRecognitionService.this);
File assetDir = assets.syncAssets();
model = new DeepSpeechModel(assetDir.toString() + "/deepspeech-catala/model.tflite");
model.enableExternalScorer(assetDir.toString() + "/deepspeech-catala/kenlm.scorer");
setupRecognizer();
} catch (Exception e) {
Log.e(TAG, "Failed to init recognizer ");
error(android.speech.SpeechRecognizer.ERROR_CLIENT);
}
handler.post(new Runnable() {
@Override
public void run() {
readyForSpeech(new Bundle());
beginningOfSpeech();
}
});
}
});
}
@Override
public void onDestroy() {
super.onDestroy();
if (speechService != null) {
speechService.cancel();
speechService.shutdown();
}
}
private void setupRecognizer() throws IOException {
try {
Log.i(TAG, "Setting up recognizer");
DeepSpeechService speechService = new DeepSpeechService(this.model, 16000.0f);
speechService.addListener(this);
speechService.startListening();
} catch (IOException e) {
Log.e(TAG, e.getMessage());
}
}
private void readyForSpeech(Bundle bundle) {
try {
mCallback.readyForSpeech(bundle);
} catch (RemoteException e) {
// empty
}
}
private void results(Bundle bundle, boolean isFinal) {
if (speechService != null) {
speechService.cancel();
}
try {
if (isFinal) {
mCallback.results(bundle);
} else {
mCallback.partialResults(bundle);
}
} catch (RemoteException e) {
// empty
}
}
private Bundle createResultsBundle(String hypothesis) {
ArrayList<String> hypotheses = new ArrayList<>();
hypotheses.add(hypothesis);
Bundle bundle = new Bundle();
bundle.putStringArrayList(android.speech.SpeechRecognizer.RESULTS_RECOGNITION, hypotheses);
return bundle;
}
private void beginningOfSpeech() {
try {
mCallback.beginningOfSpeech();
} catch (RemoteException e) {
// empty
}
}
private void error(int errorCode) {
speechService.cancel();
try {
mCallback.error(errorCode);
} catch (RemoteException e) {
// empty
}
}
@Override
public void onResult(String hypothesis) {
if (hypothesis != null) {
Log.i(TAG, hypothesis);
results(createResultsBundle(hypothesis), true);
}
}
@Override
public void onFinalResult(String hypothesis) {
if (hypothesis != null) {
Log.i(TAG, hypothesis);
results(createResultsBundle(hypothesis), true);
}
}
@Override
public void onPartialResult(String hypothesis) {
if (hypothesis != null) {
Log.i(TAG, hypothesis);
results(createResultsBundle(hypothesis), false);
}
}
@Override
public void onError(Exception e) {
Log.e(TAG, e.getMessage());
error(android.speech.SpeechRecognizer.ERROR_CLIENT);
}
@Override
public void onTimeout() {
speechService.cancel();
speechService.startListening();
}
}

View File

@ -1,312 +0,0 @@
// Copyright 2020 Ciaran O'Reilly
// Copyright 2019 Alpha Cephei Inc.
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package cat.oreilly.localstt;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder.AudioSource;
import android.os.Handler;
import android.os.Looper;
import android.util.Log;
import org.vosk.android.RecognitionListener;
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechStreamingState;
import com.konovalov.vad.Vad;
import com.konovalov.vad.VadConfig;
/**
* Service that records audio in a thread, passes it to a recognizer and emits
* recognition results. Recognition events are passed to a client using
* {@link RecognitionListener}
*
*/
public class DeepSpeechService {
protected static final String TAG = DeepSpeechService.class.getSimpleName();
private final DeepSpeechModel model;
private final DeepSpeechStreamingState streamContext;
private final Vad vad;
private final int sampleRate;
private final static float BUFFER_SIZE_SECONDS = 0.4f;
private int bufferSize;
private final AudioRecord recorder;
private Thread recognizerThread;
private final Handler mainHandler = new Handler(Looper.getMainLooper());
private final Collection<RecognitionListener> listeners = new HashSet<RecognitionListener>();
/**
* Creates speech service. Service holds the AudioRecord object, so you need to
* call {@link release} in order to properly finalize it.
*
* @throws IOException thrown if audio recorder can not be created for some
* reason.
*/
public DeepSpeechService(DeepSpeechModel model, float sampleRate) throws IOException {
this.model = model;
this.sampleRate = (int) sampleRate;
this.streamContext = model.createStream();
vad = new Vad(VadConfig.newBuilder().setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K)
.setFrameSize(VadConfig.FrameSize.FRAME_SIZE_480).setMode(VadConfig.Mode.NORMAL).build());
bufferSize = Math.round(this.sampleRate * BUFFER_SIZE_SECONDS);
recorder = new AudioRecord(AudioSource.VOICE_RECOGNITION, this.sampleRate, AudioFormat.CHANNEL_IN_MONO,
AudioFormat.ENCODING_PCM_16BIT, bufferSize * 2);
if (recorder.getState() == AudioRecord.STATE_UNINITIALIZED) {
recorder.release();
throw new IOException("Failed to initialize recorder. Microphone might be already in use.");
}
Log.i(TAG, "DeepSpeechService initialized");
}
/**
* Adds listener.
*/
public void addListener(RecognitionListener listener) {
synchronized (listeners) {
listeners.add(listener);
}
}
/**
* Removes listener.
*/
public void removeListener(RecognitionListener listener) {
synchronized (listeners) {
listeners.remove(listener);
}
}
/**
* Starts recognition. Does nothing if recognition is active.
*
* @return true if recognition was actually started
*/
public boolean startListening() {
if (null != recognizerThread)
return false;
recognizerThread = new RecognizerThread();
recognizerThread.start();
return true;
}
/**
* Starts recognition. After specified timeout listening stops and the
* endOfSpeech signals about that. Does nothing if recognition is active.
*
* @timeout - timeout in milliseconds to listen.
*
* @return true if recognition was actually started
*/
public boolean startListening(int timeout) {
if (null != recognizerThread)
return false;
recognizerThread = new RecognizerThread(timeout);
recognizerThread.start();
return true;
}
private boolean stopRecognizerThread() {
if (null == recognizerThread)
return false;
try {
recognizerThread.interrupt();
recognizerThread.join();
} catch (InterruptedException e) {
// Restore the interrupted status.
Thread.currentThread().interrupt();
}
recognizerThread = null;
return true;
}
/**
* Stops recognition. All listeners should receive final result if there is any.
* Does nothing if recognition is not active.
*
* @return true if recognition was actually stopped
*/
public boolean stop() {
boolean result = stopRecognizerThread();
if (result) {
mainHandler.post(new ResultEvent(model.finishStream(streamContext), true));
}
return result;
}
/**
* Cancels recognition. Listeners do not receive final result. Does nothing if
* recognition is not active.
*
* @return true if recognition was actually canceled
*/
public boolean cancel() {
Log.d(TAG, "#cancel");
boolean result = stopRecognizerThread();
this.model.freeModel(); // Reset recognizer state
return result;
}
/**
* Shutdown the recognizer and release the recorder
*/
public void shutdown() {
Log.d(TAG, "#shutdown");
this.model.freeModel();
recorder.release();
}
private final class RecognizerThread extends Thread {
private int remainingSamples;
private int timeoutSamples;
private final static int NO_TIMEOUT = -1;
public RecognizerThread(int timeout) {
if (timeout != NO_TIMEOUT)
this.timeoutSamples = timeout * sampleRate / 1000;
else
this.timeoutSamples = NO_TIMEOUT;
this.remainingSamples = this.timeoutSamples;
}
public RecognizerThread() {
this(NO_TIMEOUT);
}
@Override
public void run() {
Log.i(TAG, "Start Recording...");
vad.start();
recorder.startRecording();
if (recorder.getRecordingState() == AudioRecord.RECORDSTATE_STOPPED) {
recorder.stop();
IOException ioe = new IOException("Failed to start recording. Microphone might be already in use.");
mainHandler.post(new OnErrorEvent(ioe));
return;
}
short[] buffer = new short[bufferSize];
int nread = recorder.read(buffer, 0, buffer.length);
boolean speechDetected = false;
boolean feedAudio = true;
while (!interrupted() && ((timeoutSamples == NO_TIMEOUT) || (remainingSamples > 0)) && feedAudio) {
if (nread < 0) {
throw new RuntimeException("error reading audio buffer");
} else {
Log.i(TAG, "Feeding audio");
model.feedAudioContent(streamContext, buffer, nread);
boolean isSpeech = vad.isSpeech(buffer);
if (isSpeech) {
Log.d(TAG, "Speech detected");
speechDetected = true;
}
if (speechDetected && !isSpeech) {
Log.d(TAG, "Silence detected");
feedAudio = false;
}
}
if (timeoutSamples != NO_TIMEOUT) {
remainingSamples = remainingSamples - nread;
}
nread = recorder.read(buffer, 0, buffer.length);
}
mainHandler.post(new ResultEvent(model.finishStream(streamContext), true));
recorder.stop();
vad.stop();
// Remove all pending notifications.
mainHandler.removeCallbacksAndMessages(null);
// If we met timeout signal that speech ended
if (timeoutSamples != NO_TIMEOUT && remainingSamples <= 0) {
mainHandler.post(new TimeoutEvent());
}
}
}
private abstract class RecognitionEvent implements Runnable {
public void run() {
RecognitionListener[] emptyArray = new RecognitionListener[0];
for (RecognitionListener listener : listeners.toArray(emptyArray))
execute(listener);
}
protected abstract void execute(RecognitionListener listener);
}
private class ResultEvent extends RecognitionEvent {
protected final String hypothesis;
private final boolean finalResult;
ResultEvent(String hypothesis, boolean finalResult) {
this.hypothesis = hypothesis;
this.finalResult = finalResult;
}
@Override
protected void execute(RecognitionListener listener) {
if (finalResult)
listener.onResult(hypothesis);
else
listener.onPartialResult(hypothesis);
}
}
private class OnErrorEvent extends RecognitionEvent {
private final Exception exception;
OnErrorEvent(Exception exception) {
this.exception = exception;
}
@Override
protected void execute(RecognitionListener listener) {
listener.onError(exception);
}
}
private class TimeoutEvent extends RecognitionEvent {
@Override
protected void execute(RecognitionListener listener) {
listener.onTimeout();
}
}
}

View File

@ -79,7 +79,7 @@ public class VoskRecognitionService extends RecognitionService implements Recogn
LibVosk.setLogLevel(LogLevel.INFO);
Log.i(TAG, "Loading model");
model = new Model(assetDir.toString() + "/vosk-catala");
model = new Model(assetDir.toString() + "/vosk-model-small-ru-0.22");
}
setupRecognizer();

View File

@ -32,8 +32,10 @@
android:layout_marginBottom="20dp"
android:layout_marginLeft="16dp"
android:hint="@string/loading"
android:textColorHint="@color/colorPrimaryDark"
android:textColor="@color/colorPrimary"
android:id="@+id/text"
android:layout_centerInParent="true"
/>
</LinearLayout>
</RelativeLayout>
</RelativeLayout>

View File

@ -1,11 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<string name="app_name">LocalSTT</string>
<string name="vosk_recognition_service">Reconeixement Kaldi/Vosk</string>
<string name="deepspeech_recognition_service">Reconeixement DeepSpeech</string>
<string name="recognized">Reconegut: %1$s</string>
<string name="loading">Carregant...</string>
<string name="speaknow">Comença a parlar!</string>
</resources>

View File

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<color name="colorPrimary">#000000</color>
<color name="colorPrimaryDark">#3700B3</color>
<color name="colorAccent">#03DAC5</color>
<color name="colorBackground">#EEEEEE</color>
</resources>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<resources>
<string name="app_name">LocalSTT</string>
<string name="vosk_recognition_service">Сервис Kaldi/Vosk</string>
<string name="recognized">Распознано: %1$s</string>
<string name="loading">Загрузка...</string>
<string name="speaknow">Говорите!</string>
</resources>

View File

@ -0,0 +1,20 @@
<resources>
<!-- Base application theme. -->
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
<!-- Customize your theme here. -->
<item name="colorPrimary">@color/colorPrimary</item>
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
<item name="colorAccent">@color/colorAccent</item>
</style>
<style name="Theme.LocalSTT.Translucent" parent="Theme.AppCompat.DayNight.NoActionBar">
<item name="android:windowNoTitle">true</item>
<item name="windowActionBar">false</item>
<item name="android:windowBackground">@android:color/transparent</item>
<item name="android:colorBackgroundCacheHint">@null</item>
<item name="android:windowIsTranslucent">true</item>
<item name="android:windowAnimationStyle">@android:style/Animation</item>
</style>
</resources>

View File

@ -3,7 +3,6 @@
<string name="app_name">LocalSTT</string>
<string name="vosk_recognition_service">Kaldi/Vosk Recognizer</string>
<string name="deepspeech_recognition_service">Deepspeech Recognizer</string>
<string name="recognized">Recognized: %1$s</string>
<string name="loading">Loading...</string>
<string name="speaknow">Start speaking now!</string>

View File

@ -1,2 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<recognition-service xmlns:android="http://schemas.android.com/apk/res/android" />
<recognition-service xmlns:android="http://schemas.android.com/apk/res/android" />