Fix CI #4
18
.drone.yml
Normal file
18
.drone.yml
Normal file
@ -0,0 +1,18 @@
|
||||
kind: pipeline
|
||||
name: default
|
||||
|
||||
steps:
|
||||
- name: submodules
|
||||
image: hub.aidev.ru/rust-blas
|
||||
commands:
|
||||
- git submodule update --init --recursive
|
||||
|
||||
- name: build
|
||||
image: hub.aidev.ru/rust-blas
|
||||
commands:
|
||||
- cargo build --verbose --all
|
||||
|
||||
- name: fmt-check
|
||||
image: hub.aidev.ru/rust-blas
|
||||
commands:
|
||||
- cargo fmt --all -- --check
|
@ -9,3 +9,4 @@
|
||||
- ssl (libssl-dev)
|
||||
- pkg-config (pkg-config)
|
||||
- *openfst (libfst-dev)
|
||||
|
||||
|
38
build.rs
38
build.rs
@ -1,14 +1,14 @@
|
||||
use std::{env, fs, io};
|
||||
use curl::easy::Easy;
|
||||
use flate2::read::GzDecoder;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use flate2::read::GzDecoder;
|
||||
use std::{env, fs, io};
|
||||
use tar::Archive;
|
||||
use curl::easy::Easy;
|
||||
use std::io::Write;
|
||||
|
||||
const OPENFST_SRC: &str = "https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.7.tar.gz";
|
||||
|
||||
fn download<P: AsRef<Path>>(source_url: &str, target_file: P) -> anyhow::Result<()> {
|
||||
fn download<P: AsRef<Path>>(source_url: &str, target_file: P) -> anyhow::Result<()> {
|
||||
let f = fs::File::create(&target_file)?;
|
||||
let mut writer = io::BufWriter::new(f);
|
||||
let mut easy = Easy::new();
|
||||
@ -34,7 +34,7 @@ fn extract<P1: AsRef<Path>, P2: AsRef<Path>>(filename: P1, outpath: P2) -> anyho
|
||||
let tar = GzDecoder::new(file);
|
||||
let mut archive = Archive::new(tar);
|
||||
archive.unpack(outpath.as_ref())?;
|
||||
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -59,7 +59,10 @@ fn main() {
|
||||
.header("cbits/vosk.h")
|
||||
.clang_arg("-I./resources/vosk-api/src/")
|
||||
.clang_arg("-I./resources/kaldi/src/")
|
||||
.clang_arg(format!("-I{}", openfst_dir.join("src/include").to_string_lossy()))
|
||||
.clang_arg(format!(
|
||||
"-I{}",
|
||||
openfst_dir.join("src/include").to_string_lossy()
|
||||
))
|
||||
.clang_arg("-std=c++14")
|
||||
.clang_arg("-x")
|
||||
.clang_arg("c++")
|
||||
@ -98,7 +101,7 @@ fn main() {
|
||||
.file(openfst_dir.join("src/extensions/ngram/bitmap-index.cc"))
|
||||
.file(openfst_dir.join("src/extensions/ngram/nthbit.cc"))
|
||||
.compile("libopenfst");
|
||||
|
||||
|
||||
cc::Build::new()
|
||||
.flag("-mno-avx")
|
||||
.warnings(false)
|
||||
@ -127,17 +130,14 @@ fn main() {
|
||||
.static_flag(true)
|
||||
.cpp(true)
|
||||
.define("HAVE_OPENBLAS", "true")
|
||||
|
||||
.include(openfst_dir.join("src/include"))
|
||||
.include("resources/kaldi/src")
|
||||
|
||||
// base
|
||||
.file("resources/kaldi/src/base/io-funcs.cc")
|
||||
.file("resources/kaldi/src/base/kaldi-error.cc")
|
||||
.file("resources/kaldi/src/base/kaldi-math.cc")
|
||||
.file("resources/kaldi/src/base/kaldi-utils.cc")
|
||||
// .file("resources/kaldi/src/base/timer.cc")
|
||||
|
||||
// matrix
|
||||
.file("resources/kaldi/src/matrix/kaldi-matrix.cc")
|
||||
.file("resources/kaldi/src/matrix/kaldi-vector.cc")
|
||||
@ -149,7 +149,6 @@ fn main() {
|
||||
.file("resources/kaldi/src/matrix/qr.cc")
|
||||
.file("resources/kaldi/src/matrix/srfft.cc")
|
||||
.file("resources/kaldi/src/matrix/compressed-matrix.cc")
|
||||
|
||||
// cuda
|
||||
.file("resources/kaldi/src/cudamatrix/cu-matrix.cc")
|
||||
.file("resources/kaldi/src/cudamatrix/cu-allocator.cc")
|
||||
@ -160,13 +159,11 @@ fn main() {
|
||||
.file("resources/kaldi/src/cudamatrix/cu-sp-matrix.cc")
|
||||
.file("resources/kaldi/src/cudamatrix/cu-rand.cc")
|
||||
.file("resources/kaldi/src/cudamatrix/cu-math.cc")
|
||||
|
||||
// fstext
|
||||
.file("resources/kaldi/src/fstext/context-fst.cc")
|
||||
.file("resources/kaldi/src/fstext/grammar-context-fst.cc")
|
||||
.file("resources/kaldi/src/fstext/kaldi-fst-io.cc")
|
||||
.file("resources/kaldi/src/fstext/push-special.cc")
|
||||
|
||||
.file("resources/kaldi/src/fstext/push-special.cc")
|
||||
// feat
|
||||
// .file("resources/kaldi/src/feat/feature-fbank.cc")
|
||||
.file("resources/kaldi/src/feat/feature-functions.cc")
|
||||
@ -182,17 +179,14 @@ fn main() {
|
||||
// .file("resources/kaldi/src/feat/wave-reader.cc")
|
||||
.file("resources/kaldi/src/feat/feature-plp.cc")
|
||||
.file("resources/kaldi/src/feat/feature-fbank.cc")
|
||||
|
||||
// transform
|
||||
.file("resources/kaldi/src/transform/cmvn.cc")
|
||||
|
||||
// lm
|
||||
// .file("resources/kaldi/src/lm/arpa-file-parser.cc")
|
||||
// .file("resources/kaldi/src/lm/arpa-lm-compiler.cc")
|
||||
.file("resources/kaldi/src/lm/const-arpa-lm.cc")
|
||||
// .file("resources/kaldi/src/lm/kaldi-rnnlm.cc")
|
||||
// .file("resources/kaldi/src/lm/mikolov-rnnlm-lib.cc")
|
||||
|
||||
// rnnlm
|
||||
// .file("resources/kaldi/src/rnnlm/rnnlm-compute-state.cc")
|
||||
// .file("resources/kaldi/src/rnnlm/rnnlm-core-compute.cc")
|
||||
@ -206,15 +200,12 @@ fn main() {
|
||||
// .file("resources/kaldi/src/rnnlm/sampler.cc")
|
||||
// .file("resources/kaldi/src/rnnlm/sampling-lm-estimate.cc")
|
||||
// .file("resources/kaldi/src/rnnlm/sampling-lm.cc")
|
||||
|
||||
// hmm
|
||||
.file("resources/kaldi/src/hmm/transition-model.cc")
|
||||
.file("resources/kaldi/src/hmm/hmm-topology.cc")
|
||||
.file("resources/kaldi/src/hmm/posterior.cc")
|
||||
|
||||
// gmm
|
||||
.file("resources/kaldi/src/gmm/diag-gmm.cc")
|
||||
|
||||
// decoder
|
||||
// .file("resources/kaldi/src/decoder/decodable-matrix.cc")
|
||||
// .file("resources/kaldi/src/decoder/decoder-wrappers.cc")
|
||||
@ -227,7 +218,6 @@ fn main() {
|
||||
// .file("resources/kaldi/src/decoder/lattice-simple-decoder.cc")
|
||||
// .file("resources/kaldi/src/decoder/simple-decoder.cc")
|
||||
// .file("resources/kaldi/src/decoder/training-graph-compiler.cc")
|
||||
|
||||
// nnet3
|
||||
.file("resources/kaldi/src/nnet3/am-nnet-simple.cc")
|
||||
.file("resources/kaldi/src/nnet3/attention.cc")
|
||||
@ -274,7 +264,6 @@ fn main() {
|
||||
.file("resources/kaldi/src/nnet3/nnet-tdnn-component.cc")
|
||||
// .file("resources/kaldi/src/nnet3/nnet-training.cc")
|
||||
.file("resources/kaldi/src/nnet3/nnet-utils.cc")
|
||||
|
||||
// lat
|
||||
// .file("resources/kaldi/src/lat/compose-lattice-pruned.cc")
|
||||
// .file("resources/kaldi/src/lat/confidence.cc")
|
||||
@ -287,7 +276,6 @@ fn main() {
|
||||
.file("resources/kaldi/src/lat/sausages.cc")
|
||||
.file("resources/kaldi/src/lat/word-align-lattice-lexicon.cc")
|
||||
.file("resources/kaldi/src/lat/word-align-lattice.cc")
|
||||
|
||||
// util
|
||||
.file("resources/kaldi/src/util/kaldi-holder.cc")
|
||||
.file("resources/kaldi/src/util/kaldi-io.cc")
|
||||
@ -298,10 +286,8 @@ fn main() {
|
||||
.file("resources/kaldi/src/util/simple-io-funcs.cc")
|
||||
.file("resources/kaldi/src/util/simple-options.cc")
|
||||
.file("resources/kaldi/src/util/text-utils.cc")
|
||||
|
||||
//ivector
|
||||
.file("resources/kaldi/src/ivector/ivector-extractor.cc")
|
||||
|
||||
// online2
|
||||
.file("resources/kaldi/src/online2/online-endpoint.cc")
|
||||
.file("resources/kaldi/src/online2/online-feature-pipeline.cc")
|
||||
|
@ -6,26 +6,30 @@ use audrey::sample::signal::{from_iter, Signal};
|
||||
|
||||
use std::fs::File;
|
||||
|
||||
const SAMPLE_RATE: u32 = 16000;
|
||||
const SAMPLE_RATE: u32 = 16000;
|
||||
|
||||
pub fn main() {
|
||||
let audio_file_path = std::env::args().nth(1)
|
||||
let audio_file_path = std::env::args()
|
||||
.nth(1)
|
||||
.expect("Please specify an audio file to run STT on");
|
||||
|
||||
let mut reader = audrey::open(audio_file_path).unwrap();
|
||||
let desc = reader.description();
|
||||
assert_eq!(1, desc.channel_count(),
|
||||
"The channel count is required to be one, at least for now");
|
||||
let desc = reader.description();
|
||||
assert_eq!(
|
||||
1,
|
||||
desc.channel_count(),
|
||||
"The channel count is required to be one, at least for now"
|
||||
);
|
||||
|
||||
let model = VoskModel::new("./models/en-small");
|
||||
let mut sess = model.create_session(Default::default());
|
||||
|
||||
|
||||
let mut buff: Vec<i16> = Vec::with_capacity(1600);
|
||||
let mut samples_reader = reader.samples();
|
||||
|
||||
loop {
|
||||
buff.clear();
|
||||
|
||||
|
||||
while let Some(s) = samples_reader.next() {
|
||||
buff.push(s.unwrap());
|
||||
if buff.len() >= 16000 {
|
||||
@ -62,12 +66,8 @@ pub fn main() {
|
||||
// conv.until_exhausted().map(|v| v[0]).collect()
|
||||
// };
|
||||
|
||||
|
||||
|
||||
|
||||
// audio_buf
|
||||
|
||||
|
||||
// FILE *wavin;
|
||||
// char buf[3200];
|
||||
// int nread, final;
|
||||
@ -91,5 +91,4 @@ pub fn main() {
|
||||
// vosk_recognizer_free(recognizer);
|
||||
// vosk_model_free(model);
|
||||
// return 0;
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ mod ffi {
|
||||
#![allow(non_camel_case_types)]
|
||||
#![allow(non_snake_case)]
|
||||
#![allow(dead_code)]
|
||||
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
|
||||
}
|
||||
|
||||
@ -22,4 +22,4 @@ mod transcription;
|
||||
pub use model::VoskModel;
|
||||
pub use session::{VoskSession, VoskSessionConfig, VoskSessionConfigBuilder};
|
||||
pub use speaker::SpeakerModel;
|
||||
pub use transcription::{TranscriptionResult, TranscriptionPartialResult, TranscriptionWord};
|
||||
pub use transcription::{TranscriptionPartialResult, TranscriptionResult, TranscriptionWord};
|
||||
|
12
src/model.rs
12
src/model.rs
@ -1,5 +1,5 @@
|
||||
use std::path::Path;
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::ffi;
|
||||
use crate::session::{VoskSession, VoskSessionConfig};
|
||||
@ -10,7 +10,9 @@ pub struct VoskModel {
|
||||
|
||||
impl VoskModel {
|
||||
pub fn new<P: AsRef<Path>>(root: P) -> Self {
|
||||
let root = unsafe { CString::from_vec_unchecked(root.as_ref().to_string_lossy().as_bytes().to_vec()) };
|
||||
let root = unsafe {
|
||||
CString::from_vec_unchecked(root.as_ref().to_string_lossy().as_bytes().to_vec())
|
||||
};
|
||||
|
||||
Self {
|
||||
inner: unsafe { ffi::vosk_model_new(root.as_c_str().as_ptr()) },
|
||||
@ -24,7 +26,9 @@ impl VoskModel {
|
||||
|
||||
#[inline]
|
||||
pub fn feed(&self, sess: &mut VoskSession, data: &[i16]) -> bool {
|
||||
unsafe { ffi::vosk_recognizer_accept_waveform_s(sess.inner, data.as_ptr(), data.len() as _) == 1 }
|
||||
unsafe {
|
||||
ffi::vosk_recognizer_accept_waveform_s(sess.inner, data.as_ptr(), data.len() as _) == 1
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -53,4 +57,4 @@ impl Drop for VoskModel {
|
||||
fn drop(&mut self) {
|
||||
unsafe { ffi::vosk_model_free(self.inner) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use crate::ffi;
|
||||
use std::ffi::CString;
|
||||
use std::path::{PathBuf};
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub struct VoskSessionConfigBuilder {
|
||||
spk_root: Option<CString>,
|
||||
@ -18,7 +18,9 @@ impl VoskSessionConfigBuilder {
|
||||
}
|
||||
|
||||
pub fn spk_root<P: Into<PathBuf>>(&mut self, root: P) -> &mut Self {
|
||||
self.spk_root = Some(unsafe { CString::from_vec_unchecked(root.into().to_string_lossy().as_bytes().to_vec()) });
|
||||
self.spk_root = Some(unsafe {
|
||||
CString::from_vec_unchecked(root.into().to_string_lossy().as_bytes().to_vec())
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
@ -52,7 +54,7 @@ impl Default for VoskSessionConfig {
|
||||
Self {
|
||||
spk_root: None,
|
||||
grammar: None,
|
||||
freq: 16000.0
|
||||
freq: 16000.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -79,7 +81,7 @@ impl VoskSessionConfig {
|
||||
}
|
||||
|
||||
pub struct VoskSession {
|
||||
pub(crate) inner: *mut ffi::VoskRecognizer
|
||||
pub(crate) inner: *mut ffi::VoskRecognizer,
|
||||
}
|
||||
|
||||
impl VoskSession {
|
||||
@ -96,7 +98,7 @@ impl VoskSession {
|
||||
// }
|
||||
} else {
|
||||
VoskSession {
|
||||
inner: unsafe { ffi::vosk_recognizer_new(model as *mut ffi::VoskModel, cfg.freq) }
|
||||
inner: unsafe { ffi::vosk_recognizer_new(model as *mut ffi::VoskModel, cfg.freq) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +1,18 @@
|
||||
use crate::ffi;
|
||||
use std::path::Path;
|
||||
use std::ffi::CString;
|
||||
use std::path::Path;
|
||||
|
||||
pub struct SpeakerModel {
|
||||
pub(crate) inner: *mut ffi::VoskSpkModel
|
||||
pub(crate) inner: *mut ffi::VoskSpkModel,
|
||||
}
|
||||
|
||||
impl SpeakerModel {
|
||||
pub fn new(root: &Path) -> Self {
|
||||
let root = unsafe { CString::from_vec_unchecked(root.to_string_lossy().as_bytes().to_vec()) };
|
||||
let root =
|
||||
unsafe { CString::from_vec_unchecked(root.to_string_lossy().as_bytes().to_vec()) };
|
||||
|
||||
Self {
|
||||
inner: unsafe { ffi::vosk_spk_model_new(root.as_c_str().as_ptr()) }
|
||||
inner: unsafe { ffi::vosk_spk_model_new(root.as_c_str().as_ptr()) },
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -20,4 +21,4 @@ impl Drop for SpeakerModel {
|
||||
fn drop(&mut self) {
|
||||
unsafe { ffi::vosk_spk_model_free(self.inner) }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use serde::{Serialize, Deserialize};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct TranscriptionWord {
|
||||
@ -18,4 +18,4 @@ pub struct TranscriptionResult {
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct TranscriptionPartialResult {
|
||||
pub partial: String,
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user