mirror of
https://github.com/andreytkachenko/LocalSTT.git
synced 2024-11-24 02:16:21 +04:00
Remove DeepSpeechService and replace Catalan to Russian
This commit is contained in:
parent
e90a92e366
commit
f3dadb94d6
@ -12,6 +12,9 @@ repositories {
|
||||
}
|
||||
|
||||
android {
|
||||
lintOptions {
|
||||
abortOnError false
|
||||
}
|
||||
compileSdkVersion 30
|
||||
defaultConfig {
|
||||
applicationId "cat.oreilly.localstt"
|
||||
@ -36,10 +39,9 @@ dependencies {
|
||||
implementation 'androidx.appcompat:appcompat:1.2.0'
|
||||
implementation 'net.java.dev.jna:jna:5.8.0@aar'
|
||||
implementation 'com.google.code.gson:gson:2.8.7'
|
||||
implementation 'org.mozilla.deepspeech:libdeepspeech:0.8.2'
|
||||
implementation 'com.github.gkonovalov:android-vad:1.0.0'
|
||||
}
|
||||
|
||||
ant.importBuild 'assets.xml'
|
||||
preBuild.dependsOn(list, checksum)
|
||||
clean.dependsOn(clean_assets)
|
||||
clean.dependsOn(clean_assets)
|
||||
|
@ -102,23 +102,5 @@
|
||||
android:name="android.speech"
|
||||
android:resource="@xml/recognition_service" />
|
||||
</service>
|
||||
|
||||
<service
|
||||
android:name=".DeepSpeechRecognitionService"
|
||||
android:icon="@drawable/ic_service_trigger"
|
||||
android:label="@string/deepspeech_recognition_service"
|
||||
android:permission="android.permission.RECORD_AUDIO">
|
||||
<intent-filter>
|
||||
|
||||
<!-- The constant value is defined at RecognitionService.SERVICE_INTERFACE. -->
|
||||
<action android:name="android.speech.RecognitionService" />
|
||||
|
||||
<category android:name="android.intent.category.DEFAULT" />
|
||||
</intent-filter>
|
||||
|
||||
<meta-data
|
||||
android:name="android.speech"
|
||||
android:resource="@xml/recognition_service" />
|
||||
</service>
|
||||
</application>
|
||||
</manifest>
|
||||
</manifest>
|
||||
|
@ -1,26 +1,14 @@
|
||||
deepspeech-catala/kenlm.scorer
|
||||
deepspeech-catala/model.tflite
|
||||
vosk-catala/README
|
||||
vosk-catala/am/final.mdl
|
||||
vosk-catala/am/tree
|
||||
vosk-catala/conf/mfcc.conf
|
||||
vosk-catala/conf/model.conf
|
||||
vosk-catala/graph/Gr.fst
|
||||
vosk-catala/graph/HCLr.fst
|
||||
vosk-catala/graph/disambig_tid.int
|
||||
vosk-catala/graph/phones/align_lexicon.int
|
||||
vosk-catala/graph/phones/align_lexicon.txt
|
||||
vosk-catala/graph/phones/disambig.int
|
||||
vosk-catala/graph/phones/disambig.txt
|
||||
vosk-catala/graph/phones/optional_silence.csl
|
||||
vosk-catala/graph/phones/optional_silence.int
|
||||
vosk-catala/graph/phones/optional_silence.txt
|
||||
vosk-catala/graph/phones/silence.csl
|
||||
vosk-catala/graph/phones/word_boundary.int
|
||||
vosk-catala/graph/phones/word_boundary.txt
|
||||
vosk-catala/ivector/final.dubm
|
||||
vosk-catala/ivector/final.ie
|
||||
vosk-catala/ivector/final.mat
|
||||
vosk-catala/ivector/global_cmvn.stats
|
||||
vosk-catala/ivector/online_cmvn.conf
|
||||
vosk-catala/ivector/splice.conf
|
||||
vosk-model-small-ru-0.22/README
|
||||
vosk-model-small-ru-0.22/am/final.mdl
|
||||
vosk-model-small-ru-0.22/conf/mfcc.conf
|
||||
vosk-model-small-ru-0.22/conf/model.conf
|
||||
vosk-model-small-ru-0.22/graph/Gr.fst
|
||||
vosk-model-small-ru-0.22/graph/HCLr.fst
|
||||
vosk-model-small-ru-0.22/graph/disambig_tid.int
|
||||
vosk-model-small-ru-0.22/graph/phones/word_boundary.int
|
||||
vosk-model-small-ru-0.22/ivector/final.dubm
|
||||
vosk-model-small-ru-0.22/ivector/final.ie
|
||||
vosk-model-small-ru-0.22/ivector/final.mat
|
||||
vosk-model-small-ru-0.22/ivector/global_cmvn.stats
|
||||
vosk-model-small-ru-0.22/ivector/online_cmvn.conf
|
||||
vosk-model-small-ru-0.22/ivector/splice.conf
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
d562825f02f2ba36cbd0a75a17e84e8d
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
735b1327dc3c00af256af64be33cbed3
|
BIN
app/src/main/assets/sync/vosk-catala/.DS_Store
vendored
BIN
app/src/main/assets/sync/vosk-catala/.DS_Store
vendored
Binary file not shown.
@ -1 +0,0 @@
|
||||
Catalan model for android
|
@ -1 +0,0 @@
|
||||
f49442fa8c9e15bfbb6379c788b3104f
|
@ -1 +0,0 @@
|
||||
0b98b3c582e789693996799f0f434008
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
baeff4e70c13f9a61e1987f4abc2f827
|
@ -1,5 +0,0 @@
|
||||
--use-energy=false
|
||||
--num-mel-bins=20
|
||||
--num-ceps=20
|
||||
--low-freq=20
|
||||
--high-freq=7600
|
@ -1 +0,0 @@
|
||||
e44d88fe84f60e0926bba88e46e556fd
|
@ -1 +0,0 @@
|
||||
702de9c65b1d27a709fb185046afc07f
|
@ -1 +0,0 @@
|
||||
494db51b272c42cb251a9831de70b4ee
|
@ -1,13 +0,0 @@
|
||||
7935
|
||||
7936
|
||||
7937
|
||||
7938
|
||||
7939
|
||||
7940
|
||||
7941
|
||||
7942
|
||||
7943
|
||||
7944
|
||||
7945
|
||||
7946
|
||||
7947
|
@ -1 +0,0 @@
|
||||
974869565e76c84c27f43558398e9531
|
File diff suppressed because it is too large
Load Diff
@ -1 +0,0 @@
|
||||
d0cb5df30f5d30aa107468f200e793fc
|
@ -1 +0,0 @@
|
||||
0aeef7cac1b1be79377d4dc853ad0ad4
|
@ -1,13 +0,0 @@
|
||||
163
|
||||
164
|
||||
165
|
||||
166
|
||||
167
|
||||
168
|
||||
169
|
||||
170
|
||||
171
|
||||
172
|
||||
173
|
||||
174
|
||||
175
|
@ -1 +0,0 @@
|
||||
011b6746d416fdfb6e1e39732c7e174c
|
@ -1,13 +0,0 @@
|
||||
#0
|
||||
#1
|
||||
#2
|
||||
#3
|
||||
#4
|
||||
#5
|
||||
#6
|
||||
#7
|
||||
#8
|
||||
#9
|
||||
#10
|
||||
#11
|
||||
#12
|
@ -1 +0,0 @@
|
||||
b7494f81971a69678d4bab9994b72bc6
|
@ -1 +0,0 @@
|
||||
b026324c6904b2a9cb4b88d6d61c81d1
|
@ -1 +0,0 @@
|
||||
b026324c6904b2a9cb4b88d6d61c81d1
|
@ -1 +0,0 @@
|
||||
SIL
|
@ -1 +0,0 @@
|
||||
afb1c6c9240586b23a567660a6a3e0b3
|
@ -1 +0,0 @@
|
||||
1:2:3:4:5:6:7:8:9:10
|
@ -1 +0,0 @@
|
||||
2b78ed8a7acaa3f55a698ae07520bd7d
|
@ -1 +0,0 @@
|
||||
e4cd9b8790e4aa28441b87668c9ec025
|
@ -1,162 +0,0 @@
|
||||
SIL nonword
|
||||
SIL_B begin
|
||||
SIL_E end
|
||||
SIL_I internal
|
||||
SIL_S singleton
|
||||
GBG nonword
|
||||
GBG_B begin
|
||||
GBG_E end
|
||||
GBG_I internal
|
||||
GBG_S singleton
|
||||
a_B begin
|
||||
a_E end
|
||||
a_I internal
|
||||
a_S singleton
|
||||
ae_B begin
|
||||
ae_E end
|
||||
ae_I internal
|
||||
ae_S singleton
|
||||
ao_B begin
|
||||
ao_E end
|
||||
ao_I internal
|
||||
ao_S singleton
|
||||
b_B begin
|
||||
b_E end
|
||||
b_I internal
|
||||
b_S singleton
|
||||
bv_B begin
|
||||
bv_E end
|
||||
bv_I internal
|
||||
bv_S singleton
|
||||
c_B begin
|
||||
c_E end
|
||||
c_I internal
|
||||
c_S singleton
|
||||
ch_B begin
|
||||
ch_E end
|
||||
ch_I internal
|
||||
ch_S singleton
|
||||
d_B begin
|
||||
d_E end
|
||||
d_I internal
|
||||
d_S singleton
|
||||
dh_B begin
|
||||
dh_E end
|
||||
dh_I internal
|
||||
dh_S singleton
|
||||
e_B begin
|
||||
e_E end
|
||||
e_I internal
|
||||
e_S singleton
|
||||
ea_B begin
|
||||
ea_E end
|
||||
ea_I internal
|
||||
ea_S singleton
|
||||
ee_B begin
|
||||
ee_E end
|
||||
ee_I internal
|
||||
ee_S singleton
|
||||
f_B begin
|
||||
f_E end
|
||||
f_I internal
|
||||
f_S singleton
|
||||
g_B begin
|
||||
g_E end
|
||||
g_I internal
|
||||
g_S singleton
|
||||
gh_B begin
|
||||
gh_E end
|
||||
gh_I internal
|
||||
gh_S singleton
|
||||
i_B begin
|
||||
i_E end
|
||||
i_I internal
|
||||
i_S singleton
|
||||
j_B begin
|
||||
j_E end
|
||||
j_I internal
|
||||
j_S singleton
|
||||
k_B begin
|
||||
k_E end
|
||||
k_I internal
|
||||
k_S singleton
|
||||
l_B begin
|
||||
l_E end
|
||||
l_I internal
|
||||
l_S singleton
|
||||
ly_B begin
|
||||
ly_E end
|
||||
ly_I internal
|
||||
ly_S singleton
|
||||
m_B begin
|
||||
m_E end
|
||||
m_I internal
|
||||
m_S singleton
|
||||
n_B begin
|
||||
n_E end
|
||||
n_I internal
|
||||
n_S singleton
|
||||
ng_B begin
|
||||
ng_E end
|
||||
ng_I internal
|
||||
ng_S singleton
|
||||
ny_B begin
|
||||
ny_E end
|
||||
ny_I internal
|
||||
ny_S singleton
|
||||
o_B begin
|
||||
o_E end
|
||||
o_I internal
|
||||
o_S singleton
|
||||
p_B begin
|
||||
p_E end
|
||||
p_I internal
|
||||
p_S singleton
|
||||
r_B begin
|
||||
r_E end
|
||||
r_I internal
|
||||
r_S singleton
|
||||
rr_B begin
|
||||
rr_E end
|
||||
rr_I internal
|
||||
rr_S singleton
|
||||
s_B begin
|
||||
s_E end
|
||||
s_I internal
|
||||
s_S singleton
|
||||
sh_B begin
|
||||
sh_E end
|
||||
sh_I internal
|
||||
sh_S singleton
|
||||
t_B begin
|
||||
t_E end
|
||||
t_I internal
|
||||
t_S singleton
|
||||
ts_B begin
|
||||
ts_E end
|
||||
ts_I internal
|
||||
ts_S singleton
|
||||
u_B begin
|
||||
u_E end
|
||||
u_I internal
|
||||
u_S singleton
|
||||
uo_B begin
|
||||
uo_E end
|
||||
uo_I internal
|
||||
uo_S singleton
|
||||
v_B begin
|
||||
v_E end
|
||||
v_I internal
|
||||
v_S singleton
|
||||
w_B begin
|
||||
w_E end
|
||||
w_I internal
|
||||
w_S singleton
|
||||
y_B begin
|
||||
y_E end
|
||||
y_I internal
|
||||
y_S singleton
|
||||
z_B begin
|
||||
z_E end
|
||||
z_I internal
|
||||
z_S singleton
|
@ -1 +0,0 @@
|
||||
9cdad8b3dbf2b6a314606de415aa2675
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
ec500c106381011668d6cd54bcb7188d
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
f200fec86080b21ae28eaac316217121
|
Binary file not shown.
@ -1 +0,0 @@
|
||||
56f0cffbb086ad6e1d6d113b45699e1c
|
@ -1,3 +0,0 @@
|
||||
[
|
||||
2.474308e+10 -1.161169e+09 -1.528423e+09 6.994948e+08 -2.601561e+09 -2.680039e+09 -3.566892e+09 -1.76595e+09 -1.415956e+09 1.758065e+08 -6.921373e+08 -1.202024e+08 -9.632083e+08 -9112779 -7.154146e+08 -3.029829e+08 -3.775555e+08 -7.510038e+07 -1.596595e+08 -2.124697e+07 3.384748e+08
|
||||
1.910946e+12 1.188498e+11 8.577389e+10 1.048029e+11 1.063933e+11 1.20909e+11 1.355464e+11 9.016697e+10 9.036823e+10 7.144695e+10 6.582649e+10 5.484431e+10 4.500184e+10 3.308144e+10 2.427049e+10 1.593414e+10 1.093878e+10 6.789256e+09 4.383741e+09 2.393284e+09 0 ]
|
@ -1 +0,0 @@
|
||||
d85f5d6b22be9ec2cdce71ab88f2b048
|
8
app/src/main/assets/sync/vosk-model-small-ru-0.22/README
Normal file
8
app/src/main/assets/sync/vosk-model-small-ru-0.22/README
Normal file
@ -0,0 +1,8 @@
|
||||
Small Russian model for Vosk (Android, RPi, other small devices)
|
||||
|
||||
%WER 22.71 [ 9092 / 40042, 1124 ins, 1536 del, 6432 sub ] exp/chain_a/tdnn/decode_test_audiobooks_look_fast/wer_10_0.0
|
||||
%WER 11.79 [ 5940 / 50394, 894 ins, 832 del, 4214 sub ] exp/chain_a/tdnn/decode_test_golos_crowd_look_fast/wer_11_0.0
|
||||
%WER 21.34 [ 1789 / 8382, 173 ins, 440 del, 1176 sub ] exp/chain_a/tdnn/decode_test_golos_farfield_look_fast/wer_10_0.0
|
||||
%WER 29.89 [ 5579 / 18666, 476 ins, 1550 del, 3553 sub ] exp/chain_a/tdnn/decode_test_sova_devices_look_fast/wer_10_0.0
|
||||
%WER 31.97 [ 13588 / 42496, 1013 ins, 3640 del, 8935 sub ] exp/chain_a/tdnn/decode_test_youtube_look_fast/wer_9_0.0
|
||||
|
@ -0,0 +1 @@
|
||||
e9be39aa30bfad71b8323b5b2fa91318
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
f8f35f33ca26e6315d7006a174593346
|
@ -0,0 +1,7 @@
|
||||
--sample-frequency=16000
|
||||
--use-energy=false
|
||||
--num-mel-bins=40
|
||||
--num-ceps=40
|
||||
--low-freq=20
|
||||
--high-freq=7600
|
||||
--allow-downsample=true
|
@ -0,0 +1 @@
|
||||
84a568eda381f44519975996aa86d8fe
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
cd7ae127a696ec4b1ac133702be98430
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
889b2452891887f0629f1828b23da682
|
@ -0,0 +1,5 @@
|
||||
9855
|
||||
9856
|
||||
9857
|
||||
9858
|
||||
9859
|
@ -0,0 +1 @@
|
||||
bb3be49f6b0acf9eac46238616a34837
|
@ -160,3 +160,43 @@
|
||||
160 end
|
||||
161 internal
|
||||
162 singleton
|
||||
163 begin
|
||||
164 end
|
||||
165 internal
|
||||
166 singleton
|
||||
167 begin
|
||||
168 end
|
||||
169 internal
|
||||
170 singleton
|
||||
171 begin
|
||||
172 end
|
||||
173 internal
|
||||
174 singleton
|
||||
175 begin
|
||||
176 end
|
||||
177 internal
|
||||
178 singleton
|
||||
179 begin
|
||||
180 end
|
||||
181 internal
|
||||
182 singleton
|
||||
183 begin
|
||||
184 end
|
||||
185 internal
|
||||
186 singleton
|
||||
187 begin
|
||||
188 end
|
||||
189 internal
|
||||
190 singleton
|
||||
191 begin
|
||||
192 end
|
||||
193 internal
|
||||
194 singleton
|
||||
195 begin
|
||||
196 end
|
||||
197 internal
|
||||
198 singleton
|
||||
199 begin
|
||||
200 end
|
||||
201 internal
|
||||
202 singleton
|
@ -0,0 +1 @@
|
||||
4472ca1d33a2efde57c8460501aba308
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
1ef0f4a8dbae2bcdecf064573f758f22
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
543d1b8f67dfed41de3def910fb384cc
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
10372e791b3eda1966f35fc615e81408
|
@ -0,0 +1,3 @@
|
||||
[
|
||||
8.330133e+10 -4.600894e+09 -2.394861e+09 2.127165e+09 -9.355799e+09 -9.378007e+09 -1.302309e+10 -9.460417e+09 -9.260028e+09 -4.58608e+09 -5.287111e+09 -1.972033e+09 -6.090821e+09 -1.336419e+09 -5.214569e+09 -2.321841e+09 -3.889789e+09 -1.060202e+09 -2.065653e+09 -2.684904e+08 -7.4007e+08 -4587485 -1.315853e+08 -8597548 2.599227e+08 7.408538e+07 5.505751e+08 -1.161846e+07 5.138103e+08 -1.828159e+08 4.251498e+08 -2.901496e+07 6.469246e+08 2.489644e+08 6.289868e+08 2.490337e+08 3.38884e+08 -1.788837e+08 -2.536016e+08 -1.591728e+08 8.388078e+08
|
||||
8.660994e+12 4.637783e+11 3.366465e+11 4.467952e+11 5.094759e+11 5.179353e+11 6.145244e+11 4.970492e+11 5.014889e+11 4.027981e+11 3.937422e+11 3.602942e+11 3.162307e+11 2.40687e+11 2.267307e+11 1.563018e+11 1.341105e+11 8.535779e+10 6.12398e+10 3.207774e+10 1.737325e+10 5.704115e+09 7.980573e+08 2.168777e+08 2.763352e+09 6.859176e+09 1.214891e+10 1.604714e+10 2.005353e+10 2.240119e+10 2.366007e+10 2.300222e+10 2.406182e+10 2.354406e+10 2.098983e+10 1.619869e+10 1.491578e+10 1.224871e+10 9.502735e+09 6.517532e+09 0 ]
|
@ -0,0 +1 @@
|
||||
df436bf906c4b0dc3716d2b5142a5c77
|
@ -1,194 +0,0 @@
|
||||
// Copyright 2020 Ciaran O'Reilly
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
package cat.oreilly.localstt;
|
||||
|
||||
import android.content.Intent;
|
||||
import android.os.Bundle;
|
||||
import android.os.RemoteException;
|
||||
import android.os.Handler;
|
||||
import android.os.Looper;
|
||||
import android.speech.RecognitionService;
|
||||
import android.util.Log;
|
||||
|
||||
import org.vosk.android.RecognitionListener;
|
||||
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.io.IOException;
|
||||
|
||||
public class DeepSpeechRecognitionService extends RecognitionService implements RecognitionListener {
|
||||
private final static String TAG = DeepSpeechRecognitionService.class.getSimpleName();
|
||||
private final Handler handler = new Handler(Looper.getMainLooper());
|
||||
private final Executor executor = Executors.newSingleThreadExecutor();
|
||||
private DeepSpeechModel model;
|
||||
private DeepSpeechService speechService;
|
||||
|
||||
private RecognitionService.Callback mCallback;
|
||||
|
||||
@Override
|
||||
protected void onStartListening(Intent intent, Callback callback) {
|
||||
mCallback = callback;
|
||||
Log.i(TAG, "onStartListening");
|
||||
runRecognizerSetup();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onCancel(Callback callback) {
|
||||
Log.i(TAG, "onCancel");
|
||||
results(new Bundle(), true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onStopListening(Callback callback) {
|
||||
Log.i(TAG, "onStopListening");
|
||||
results(new Bundle(), true);
|
||||
}
|
||||
|
||||
private void runRecognizerSetup() {
|
||||
executor.execute(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
Assets assets = new Assets(DeepSpeechRecognitionService.this);
|
||||
File assetDir = assets.syncAssets();
|
||||
|
||||
model = new DeepSpeechModel(assetDir.toString() + "/deepspeech-catala/model.tflite");
|
||||
model.enableExternalScorer(assetDir.toString() + "/deepspeech-catala/kenlm.scorer");
|
||||
|
||||
setupRecognizer();
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Failed to init recognizer ");
|
||||
error(android.speech.SpeechRecognizer.ERROR_CLIENT);
|
||||
}
|
||||
|
||||
handler.post(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
readyForSpeech(new Bundle());
|
||||
beginningOfSpeech();
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onDestroy() {
|
||||
super.onDestroy();
|
||||
|
||||
if (speechService != null) {
|
||||
speechService.cancel();
|
||||
speechService.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
private void setupRecognizer() throws IOException {
|
||||
try {
|
||||
Log.i(TAG, "Setting up recognizer");
|
||||
DeepSpeechService speechService = new DeepSpeechService(this.model, 16000.0f);
|
||||
speechService.addListener(this);
|
||||
speechService.startListening();
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void readyForSpeech(Bundle bundle) {
|
||||
try {
|
||||
mCallback.readyForSpeech(bundle);
|
||||
} catch (RemoteException e) {
|
||||
// empty
|
||||
}
|
||||
}
|
||||
|
||||
private void results(Bundle bundle, boolean isFinal) {
|
||||
if (speechService != null) {
|
||||
speechService.cancel();
|
||||
}
|
||||
try {
|
||||
if (isFinal) {
|
||||
mCallback.results(bundle);
|
||||
} else {
|
||||
mCallback.partialResults(bundle);
|
||||
}
|
||||
} catch (RemoteException e) {
|
||||
// empty
|
||||
}
|
||||
}
|
||||
|
||||
private Bundle createResultsBundle(String hypothesis) {
|
||||
ArrayList<String> hypotheses = new ArrayList<>();
|
||||
hypotheses.add(hypothesis);
|
||||
Bundle bundle = new Bundle();
|
||||
bundle.putStringArrayList(android.speech.SpeechRecognizer.RESULTS_RECOGNITION, hypotheses);
|
||||
return bundle;
|
||||
}
|
||||
|
||||
private void beginningOfSpeech() {
|
||||
try {
|
||||
mCallback.beginningOfSpeech();
|
||||
} catch (RemoteException e) {
|
||||
// empty
|
||||
}
|
||||
}
|
||||
|
||||
private void error(int errorCode) {
|
||||
speechService.cancel();
|
||||
try {
|
||||
mCallback.error(errorCode);
|
||||
} catch (RemoteException e) {
|
||||
// empty
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onResult(String hypothesis) {
|
||||
if (hypothesis != null) {
|
||||
Log.i(TAG, hypothesis);
|
||||
results(createResultsBundle(hypothesis), true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFinalResult(String hypothesis) {
|
||||
if (hypothesis != null) {
|
||||
Log.i(TAG, hypothesis);
|
||||
results(createResultsBundle(hypothesis), true);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onPartialResult(String hypothesis) {
|
||||
if (hypothesis != null) {
|
||||
Log.i(TAG, hypothesis);
|
||||
results(createResultsBundle(hypothesis), false);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Exception e) {
|
||||
Log.e(TAG, e.getMessage());
|
||||
error(android.speech.SpeechRecognizer.ERROR_CLIENT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTimeout() {
|
||||
speechService.cancel();
|
||||
speechService.startListening();
|
||||
}
|
||||
}
|
@ -1,312 +0,0 @@
|
||||
// Copyright 2020 Ciaran O'Reilly
|
||||
// Copyright 2019 Alpha Cephei Inc.
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
package cat.oreilly.localstt;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
|
||||
import android.media.AudioFormat;
|
||||
import android.media.AudioRecord;
|
||||
import android.media.MediaRecorder.AudioSource;
|
||||
import android.os.Handler;
|
||||
import android.os.Looper;
|
||||
import android.util.Log;
|
||||
|
||||
import org.vosk.android.RecognitionListener;
|
||||
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechModel;
|
||||
import org.mozilla.deepspeech.libdeepspeech.DeepSpeechStreamingState;
|
||||
|
||||
import com.konovalov.vad.Vad;
|
||||
import com.konovalov.vad.VadConfig;
|
||||
|
||||
/**
|
||||
* Service that records audio in a thread, passes it to a recognizer and emits
|
||||
* recognition results. Recognition events are passed to a client using
|
||||
* {@link RecognitionListener}
|
||||
*
|
||||
*/
|
||||
public class DeepSpeechService {
|
||||
|
||||
protected static final String TAG = DeepSpeechService.class.getSimpleName();
|
||||
|
||||
private final DeepSpeechModel model;
|
||||
private final DeepSpeechStreamingState streamContext;
|
||||
private final Vad vad;
|
||||
|
||||
private final int sampleRate;
|
||||
private final static float BUFFER_SIZE_SECONDS = 0.4f;
|
||||
private int bufferSize;
|
||||
private final AudioRecord recorder;
|
||||
|
||||
private Thread recognizerThread;
|
||||
|
||||
private final Handler mainHandler = new Handler(Looper.getMainLooper());
|
||||
|
||||
private final Collection<RecognitionListener> listeners = new HashSet<RecognitionListener>();
|
||||
|
||||
/**
|
||||
* Creates speech service. Service holds the AudioRecord object, so you need to
|
||||
* call {@link release} in order to properly finalize it.
|
||||
*
|
||||
* @throws IOException thrown if audio recorder can not be created for some
|
||||
* reason.
|
||||
*/
|
||||
public DeepSpeechService(DeepSpeechModel model, float sampleRate) throws IOException {
|
||||
this.model = model;
|
||||
this.sampleRate = (int) sampleRate;
|
||||
this.streamContext = model.createStream();
|
||||
|
||||
vad = new Vad(VadConfig.newBuilder().setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K)
|
||||
.setFrameSize(VadConfig.FrameSize.FRAME_SIZE_480).setMode(VadConfig.Mode.NORMAL).build());
|
||||
|
||||
bufferSize = Math.round(this.sampleRate * BUFFER_SIZE_SECONDS);
|
||||
recorder = new AudioRecord(AudioSource.VOICE_RECOGNITION, this.sampleRate, AudioFormat.CHANNEL_IN_MONO,
|
||||
AudioFormat.ENCODING_PCM_16BIT, bufferSize * 2);
|
||||
|
||||
if (recorder.getState() == AudioRecord.STATE_UNINITIALIZED) {
|
||||
recorder.release();
|
||||
throw new IOException("Failed to initialize recorder. Microphone might be already in use.");
|
||||
}
|
||||
Log.i(TAG, "DeepSpeechService initialized");
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds listener.
|
||||
*/
|
||||
public void addListener(RecognitionListener listener) {
|
||||
synchronized (listeners) {
|
||||
listeners.add(listener);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes listener.
|
||||
*/
|
||||
public void removeListener(RecognitionListener listener) {
|
||||
synchronized (listeners) {
|
||||
listeners.remove(listener);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts recognition. Does nothing if recognition is active.
|
||||
*
|
||||
* @return true if recognition was actually started
|
||||
*/
|
||||
public boolean startListening() {
|
||||
if (null != recognizerThread)
|
||||
return false;
|
||||
|
||||
recognizerThread = new RecognizerThread();
|
||||
recognizerThread.start();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Starts recognition. After specified timeout listening stops and the
|
||||
* endOfSpeech signals about that. Does nothing if recognition is active.
|
||||
*
|
||||
* @timeout - timeout in milliseconds to listen.
|
||||
*
|
||||
* @return true if recognition was actually started
|
||||
*/
|
||||
public boolean startListening(int timeout) {
|
||||
if (null != recognizerThread)
|
||||
return false;
|
||||
|
||||
recognizerThread = new RecognizerThread(timeout);
|
||||
recognizerThread.start();
|
||||
return true;
|
||||
}
|
||||
|
||||
private boolean stopRecognizerThread() {
|
||||
if (null == recognizerThread)
|
||||
return false;
|
||||
|
||||
try {
|
||||
recognizerThread.interrupt();
|
||||
recognizerThread.join();
|
||||
} catch (InterruptedException e) {
|
||||
// Restore the interrupted status.
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
|
||||
recognizerThread = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops recognition. All listeners should receive final result if there is any.
|
||||
* Does nothing if recognition is not active.
|
||||
*
|
||||
* @return true if recognition was actually stopped
|
||||
*/
|
||||
public boolean stop() {
|
||||
boolean result = stopRecognizerThread();
|
||||
if (result) {
|
||||
mainHandler.post(new ResultEvent(model.finishStream(streamContext), true));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancels recognition. Listeners do not receive final result. Does nothing if
|
||||
* recognition is not active.
|
||||
*
|
||||
* @return true if recognition was actually canceled
|
||||
*/
|
||||
public boolean cancel() {
|
||||
Log.d(TAG, "#cancel");
|
||||
boolean result = stopRecognizerThread();
|
||||
this.model.freeModel(); // Reset recognizer state
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shutdown the recognizer and release the recorder
|
||||
*/
|
||||
public void shutdown() {
|
||||
Log.d(TAG, "#shutdown");
|
||||
this.model.freeModel();
|
||||
recorder.release();
|
||||
}
|
||||
|
||||
private final class RecognizerThread extends Thread {
|
||||
|
||||
private int remainingSamples;
|
||||
private int timeoutSamples;
|
||||
private final static int NO_TIMEOUT = -1;
|
||||
|
||||
public RecognizerThread(int timeout) {
|
||||
if (timeout != NO_TIMEOUT)
|
||||
this.timeoutSamples = timeout * sampleRate / 1000;
|
||||
else
|
||||
this.timeoutSamples = NO_TIMEOUT;
|
||||
this.remainingSamples = this.timeoutSamples;
|
||||
}
|
||||
|
||||
public RecognizerThread() {
|
||||
this(NO_TIMEOUT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
Log.i(TAG, "Start Recording...");
|
||||
|
||||
vad.start();
|
||||
recorder.startRecording();
|
||||
if (recorder.getRecordingState() == AudioRecord.RECORDSTATE_STOPPED) {
|
||||
recorder.stop();
|
||||
IOException ioe = new IOException("Failed to start recording. Microphone might be already in use.");
|
||||
mainHandler.post(new OnErrorEvent(ioe));
|
||||
return;
|
||||
}
|
||||
|
||||
short[] buffer = new short[bufferSize];
|
||||
int nread = recorder.read(buffer, 0, buffer.length);
|
||||
boolean speechDetected = false;
|
||||
boolean feedAudio = true;
|
||||
|
||||
while (!interrupted() && ((timeoutSamples == NO_TIMEOUT) || (remainingSamples > 0)) && feedAudio) {
|
||||
|
||||
if (nread < 0) {
|
||||
throw new RuntimeException("error reading audio buffer");
|
||||
} else {
|
||||
Log.i(TAG, "Feeding audio");
|
||||
model.feedAudioContent(streamContext, buffer, nread);
|
||||
boolean isSpeech = vad.isSpeech(buffer);
|
||||
if (isSpeech) {
|
||||
Log.d(TAG, "Speech detected");
|
||||
speechDetected = true;
|
||||
}
|
||||
if (speechDetected && !isSpeech) {
|
||||
Log.d(TAG, "Silence detected");
|
||||
feedAudio = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (timeoutSamples != NO_TIMEOUT) {
|
||||
remainingSamples = remainingSamples - nread;
|
||||
}
|
||||
nread = recorder.read(buffer, 0, buffer.length);
|
||||
}
|
||||
|
||||
mainHandler.post(new ResultEvent(model.finishStream(streamContext), true));
|
||||
|
||||
recorder.stop();
|
||||
vad.stop();
|
||||
|
||||
// Remove all pending notifications.
|
||||
mainHandler.removeCallbacksAndMessages(null);
|
||||
|
||||
// If we met timeout signal that speech ended
|
||||
if (timeoutSamples != NO_TIMEOUT && remainingSamples <= 0) {
|
||||
mainHandler.post(new TimeoutEvent());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private abstract class RecognitionEvent implements Runnable {
|
||||
public void run() {
|
||||
RecognitionListener[] emptyArray = new RecognitionListener[0];
|
||||
for (RecognitionListener listener : listeners.toArray(emptyArray))
|
||||
execute(listener);
|
||||
}
|
||||
|
||||
protected abstract void execute(RecognitionListener listener);
|
||||
}
|
||||
|
||||
private class ResultEvent extends RecognitionEvent {
|
||||
protected final String hypothesis;
|
||||
private final boolean finalResult;
|
||||
|
||||
ResultEvent(String hypothesis, boolean finalResult) {
|
||||
this.hypothesis = hypothesis;
|
||||
this.finalResult = finalResult;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void execute(RecognitionListener listener) {
|
||||
if (finalResult)
|
||||
listener.onResult(hypothesis);
|
||||
else
|
||||
listener.onPartialResult(hypothesis);
|
||||
}
|
||||
}
|
||||
|
||||
private class OnErrorEvent extends RecognitionEvent {
|
||||
private final Exception exception;
|
||||
|
||||
OnErrorEvent(Exception exception) {
|
||||
this.exception = exception;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void execute(RecognitionListener listener) {
|
||||
listener.onError(exception);
|
||||
}
|
||||
}
|
||||
|
||||
private class TimeoutEvent extends RecognitionEvent {
|
||||
@Override
|
||||
protected void execute(RecognitionListener listener) {
|
||||
listener.onTimeout();
|
||||
}
|
||||
}
|
||||
}
|
@ -79,7 +79,7 @@ public class VoskRecognitionService extends RecognitionService implements Recogn
|
||||
LibVosk.setLogLevel(LogLevel.INFO);
|
||||
|
||||
Log.i(TAG, "Loading model");
|
||||
model = new Model(assetDir.toString() + "/vosk-catala");
|
||||
model = new Model(assetDir.toString() + "/vosk-model-small-ru-0.22");
|
||||
}
|
||||
|
||||
setupRecognizer();
|
||||
|
@ -32,8 +32,10 @@
|
||||
android:layout_marginBottom="20dp"
|
||||
android:layout_marginLeft="16dp"
|
||||
android:hint="@string/loading"
|
||||
android:textColorHint="@color/colorPrimaryDark"
|
||||
android:textColor="@color/colorPrimary"
|
||||
android:id="@+id/text"
|
||||
android:layout_centerInParent="true"
|
||||
/>
|
||||
</LinearLayout>
|
||||
</RelativeLayout>
|
||||
</RelativeLayout>
|
||||
|
@ -1,11 +0,0 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
|
||||
<string name="app_name">LocalSTT</string>
|
||||
<string name="vosk_recognition_service">Reconeixement Kaldi/Vosk</string>
|
||||
<string name="deepspeech_recognition_service">Reconeixement DeepSpeech</string>
|
||||
<string name="recognized">Reconegut: %1$s</string>
|
||||
<string name="loading">Carregant...</string>
|
||||
<string name="speaknow">Comença a parlar!</string>
|
||||
|
||||
</resources>
|
7
app/src/main/res/values-ru/colors.xml
Normal file
7
app/src/main/res/values-ru/colors.xml
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<color name="colorPrimary">#000000</color>
|
||||
<color name="colorPrimaryDark">#3700B3</color>
|
||||
<color name="colorAccent">#03DAC5</color>
|
||||
<color name="colorBackground">#EEEEEE</color>
|
||||
</resources>
|
8
app/src/main/res/values-ru/strings.xml
Normal file
8
app/src/main/res/values-ru/strings.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources>
|
||||
<string name="app_name">LocalSTT</string>
|
||||
<string name="vosk_recognition_service">Сервис Kaldi/Vosk</string>
|
||||
<string name="recognized">Распознано: %1$s</string>
|
||||
<string name="loading">Загрузка...</string>
|
||||
<string name="speaknow">Говорите!</string>
|
||||
</resources>
|
20
app/src/main/res/values-ru/styles.xml
Normal file
20
app/src/main/res/values-ru/styles.xml
Normal file
@ -0,0 +1,20 @@
|
||||
<resources>
|
||||
|
||||
<!-- Base application theme. -->
|
||||
<style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
|
||||
<!-- Customize your theme here. -->
|
||||
<item name="colorPrimary">@color/colorPrimary</item>
|
||||
<item name="colorPrimaryDark">@color/colorPrimaryDark</item>
|
||||
<item name="colorAccent">@color/colorAccent</item>
|
||||
</style>
|
||||
|
||||
<style name="Theme.LocalSTT.Translucent" parent="Theme.AppCompat.DayNight.NoActionBar">
|
||||
<item name="android:windowNoTitle">true</item>
|
||||
<item name="windowActionBar">false</item>
|
||||
<item name="android:windowBackground">@android:color/transparent</item>
|
||||
<item name="android:colorBackgroundCacheHint">@null</item>
|
||||
<item name="android:windowIsTranslucent">true</item>
|
||||
<item name="android:windowAnimationStyle">@android:style/Animation</item>
|
||||
</style>
|
||||
|
||||
</resources>
|
@ -3,7 +3,6 @@
|
||||
|
||||
<string name="app_name">LocalSTT</string>
|
||||
<string name="vosk_recognition_service">Kaldi/Vosk Recognizer</string>
|
||||
<string name="deepspeech_recognition_service">Deepspeech Recognizer</string>
|
||||
<string name="recognized">Recognized: %1$s</string>
|
||||
<string name="loading">Loading...</string>
|
||||
<string name="speaknow">Start speaking now!</string>
|
||||
|
@ -1,2 +1,2 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<recognition-service xmlns:android="http://schemas.android.com/apk/res/android" />
|
||||
<recognition-service xmlns:android="http://schemas.android.com/apk/res/android" />
|
||||
|
Loading…
Reference in New Issue
Block a user