diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..9f085bb --- /dev/null +++ b/.drone.yml @@ -0,0 +1,18 @@ +kind: pipeline +name: default + +steps: +- name: submodules + image: hub.aidev.ru/rust-blas + commands: + - git submodule update --init --recursive + +- name: build + image: hub.aidev.ru/rust-blas + commands: + - cargo build --verbose --all + +- name: fmt-check + image: hub.aidev.ru/rust-blas + commands: + - cargo fmt --all -- --check diff --git a/README.md b/README.md index 50664bd..c67b782 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,4 @@ - ssl (libssl-dev) - pkg-config (pkg-config) - *openfst (libfst-dev) + diff --git a/build.rs b/build.rs index 921cd27..4307930 100644 --- a/build.rs +++ b/build.rs @@ -1,14 +1,14 @@ -use std::{env, fs, io}; +use curl::easy::Easy; +use flate2::read::GzDecoder; +use std::io::Write; use std::path::{Path, PathBuf}; use std::process::Command; -use flate2::read::GzDecoder; +use std::{env, fs, io}; use tar::Archive; -use curl::easy::Easy; -use std::io::Write; const OPENFST_SRC: &str = "https://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.7.tar.gz"; -fn download>(source_url: &str, target_file: P) -> anyhow::Result<()> { +fn download>(source_url: &str, target_file: P) -> anyhow::Result<()> { let f = fs::File::create(&target_file)?; let mut writer = io::BufWriter::new(f); let mut easy = Easy::new(); @@ -34,7 +34,7 @@ fn extract, P2: AsRef>(filename: P1, outpath: P2) -> anyho let tar = GzDecoder::new(file); let mut archive = Archive::new(tar); archive.unpack(outpath.as_ref())?; - + Ok(()) } @@ -59,7 +59,10 @@ fn main() { .header("cbits/vosk.h") .clang_arg("-I./resources/vosk-api/src/") .clang_arg("-I./resources/kaldi/src/") - .clang_arg(format!("-I{}", openfst_dir.join("src/include").to_string_lossy())) + .clang_arg(format!( + "-I{}", + openfst_dir.join("src/include").to_string_lossy() + )) .clang_arg("-std=c++14") .clang_arg("-x") .clang_arg("c++") @@ -98,7 +101,7 @@ fn main() { .file(openfst_dir.join("src/extensions/ngram/bitmap-index.cc")) .file(openfst_dir.join("src/extensions/ngram/nthbit.cc")) .compile("libopenfst"); - + cc::Build::new() .flag("-mno-avx") .warnings(false) @@ -127,17 +130,14 @@ fn main() { .static_flag(true) .cpp(true) .define("HAVE_OPENBLAS", "true") - .include(openfst_dir.join("src/include")) .include("resources/kaldi/src") - // base .file("resources/kaldi/src/base/io-funcs.cc") .file("resources/kaldi/src/base/kaldi-error.cc") .file("resources/kaldi/src/base/kaldi-math.cc") .file("resources/kaldi/src/base/kaldi-utils.cc") // .file("resources/kaldi/src/base/timer.cc") - // matrix .file("resources/kaldi/src/matrix/kaldi-matrix.cc") .file("resources/kaldi/src/matrix/kaldi-vector.cc") @@ -149,7 +149,6 @@ fn main() { .file("resources/kaldi/src/matrix/qr.cc") .file("resources/kaldi/src/matrix/srfft.cc") .file("resources/kaldi/src/matrix/compressed-matrix.cc") - // cuda .file("resources/kaldi/src/cudamatrix/cu-matrix.cc") .file("resources/kaldi/src/cudamatrix/cu-allocator.cc") @@ -160,13 +159,11 @@ fn main() { .file("resources/kaldi/src/cudamatrix/cu-sp-matrix.cc") .file("resources/kaldi/src/cudamatrix/cu-rand.cc") .file("resources/kaldi/src/cudamatrix/cu-math.cc") - // fstext .file("resources/kaldi/src/fstext/context-fst.cc") .file("resources/kaldi/src/fstext/grammar-context-fst.cc") .file("resources/kaldi/src/fstext/kaldi-fst-io.cc") - .file("resources/kaldi/src/fstext/push-special.cc") - + .file("resources/kaldi/src/fstext/push-special.cc") // feat // .file("resources/kaldi/src/feat/feature-fbank.cc") .file("resources/kaldi/src/feat/feature-functions.cc") @@ -182,17 +179,14 @@ fn main() { // .file("resources/kaldi/src/feat/wave-reader.cc") .file("resources/kaldi/src/feat/feature-plp.cc") .file("resources/kaldi/src/feat/feature-fbank.cc") - // transform .file("resources/kaldi/src/transform/cmvn.cc") - // lm // .file("resources/kaldi/src/lm/arpa-file-parser.cc") // .file("resources/kaldi/src/lm/arpa-lm-compiler.cc") .file("resources/kaldi/src/lm/const-arpa-lm.cc") // .file("resources/kaldi/src/lm/kaldi-rnnlm.cc") // .file("resources/kaldi/src/lm/mikolov-rnnlm-lib.cc") - // rnnlm // .file("resources/kaldi/src/rnnlm/rnnlm-compute-state.cc") // .file("resources/kaldi/src/rnnlm/rnnlm-core-compute.cc") @@ -206,15 +200,12 @@ fn main() { // .file("resources/kaldi/src/rnnlm/sampler.cc") // .file("resources/kaldi/src/rnnlm/sampling-lm-estimate.cc") // .file("resources/kaldi/src/rnnlm/sampling-lm.cc") - // hmm .file("resources/kaldi/src/hmm/transition-model.cc") .file("resources/kaldi/src/hmm/hmm-topology.cc") .file("resources/kaldi/src/hmm/posterior.cc") - // gmm .file("resources/kaldi/src/gmm/diag-gmm.cc") - // decoder // .file("resources/kaldi/src/decoder/decodable-matrix.cc") // .file("resources/kaldi/src/decoder/decoder-wrappers.cc") @@ -227,7 +218,6 @@ fn main() { // .file("resources/kaldi/src/decoder/lattice-simple-decoder.cc") // .file("resources/kaldi/src/decoder/simple-decoder.cc") // .file("resources/kaldi/src/decoder/training-graph-compiler.cc") - // nnet3 .file("resources/kaldi/src/nnet3/am-nnet-simple.cc") .file("resources/kaldi/src/nnet3/attention.cc") @@ -274,7 +264,6 @@ fn main() { .file("resources/kaldi/src/nnet3/nnet-tdnn-component.cc") // .file("resources/kaldi/src/nnet3/nnet-training.cc") .file("resources/kaldi/src/nnet3/nnet-utils.cc") - // lat // .file("resources/kaldi/src/lat/compose-lattice-pruned.cc") // .file("resources/kaldi/src/lat/confidence.cc") @@ -287,7 +276,6 @@ fn main() { .file("resources/kaldi/src/lat/sausages.cc") .file("resources/kaldi/src/lat/word-align-lattice-lexicon.cc") .file("resources/kaldi/src/lat/word-align-lattice.cc") - // util .file("resources/kaldi/src/util/kaldi-holder.cc") .file("resources/kaldi/src/util/kaldi-io.cc") @@ -298,10 +286,8 @@ fn main() { .file("resources/kaldi/src/util/simple-io-funcs.cc") .file("resources/kaldi/src/util/simple-options.cc") .file("resources/kaldi/src/util/text-utils.cc") - //ivector .file("resources/kaldi/src/ivector/ivector-extractor.cc") - // online2 .file("resources/kaldi/src/online2/online-endpoint.cc") .file("resources/kaldi/src/online2/online-feature-pipeline.cc") diff --git a/examples/demo.rs b/examples/demo.rs index 1932312..fc1e706 100644 --- a/examples/demo.rs +++ b/examples/demo.rs @@ -6,26 +6,30 @@ use audrey::sample::signal::{from_iter, Signal}; use std::fs::File; -const SAMPLE_RATE: u32 = 16000; +const SAMPLE_RATE: u32 = 16000; pub fn main() { - let audio_file_path = std::env::args().nth(1) + let audio_file_path = std::env::args() + .nth(1) .expect("Please specify an audio file to run STT on"); let mut reader = audrey::open(audio_file_path).unwrap(); - let desc = reader.description(); - assert_eq!(1, desc.channel_count(), - "The channel count is required to be one, at least for now"); + let desc = reader.description(); + assert_eq!( + 1, + desc.channel_count(), + "The channel count is required to be one, at least for now" + ); let model = VoskModel::new("./models/en-small"); let mut sess = model.create_session(Default::default()); - + let mut buff: Vec = Vec::with_capacity(1600); let mut samples_reader = reader.samples(); loop { buff.clear(); - + while let Some(s) = samples_reader.next() { buff.push(s.unwrap()); if buff.len() >= 16000 { @@ -62,12 +66,8 @@ pub fn main() { // conv.until_exhausted().map(|v| v[0]).collect() // }; - - - // audio_buf - // FILE *wavin; // char buf[3200]; // int nread, final; @@ -91,5 +91,4 @@ pub fn main() { // vosk_recognizer_free(recognizer); // vosk_model_free(model); // return 0; - -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 6163db0..05c16ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ mod ffi { #![allow(non_camel_case_types)] #![allow(non_snake_case)] #![allow(dead_code)] - + include!(concat!(env!("OUT_DIR"), "/bindings.rs")); } @@ -22,4 +22,4 @@ mod transcription; pub use model::VoskModel; pub use session::{VoskSession, VoskSessionConfig, VoskSessionConfigBuilder}; pub use speaker::SpeakerModel; -pub use transcription::{TranscriptionResult, TranscriptionPartialResult, TranscriptionWord}; \ No newline at end of file +pub use transcription::{TranscriptionPartialResult, TranscriptionResult, TranscriptionWord}; diff --git a/src/model.rs b/src/model.rs index 0efd0a6..d0cf4c9 100644 --- a/src/model.rs +++ b/src/model.rs @@ -1,5 +1,5 @@ -use std::path::Path; use std::ffi::{CStr, CString}; +use std::path::Path; use crate::ffi; use crate::session::{VoskSession, VoskSessionConfig}; @@ -10,7 +10,9 @@ pub struct VoskModel { impl VoskModel { pub fn new>(root: P) -> Self { - let root = unsafe { CString::from_vec_unchecked(root.as_ref().to_string_lossy().as_bytes().to_vec()) }; + let root = unsafe { + CString::from_vec_unchecked(root.as_ref().to_string_lossy().as_bytes().to_vec()) + }; Self { inner: unsafe { ffi::vosk_model_new(root.as_c_str().as_ptr()) }, @@ -24,7 +26,9 @@ impl VoskModel { #[inline] pub fn feed(&self, sess: &mut VoskSession, data: &[i16]) -> bool { - unsafe { ffi::vosk_recognizer_accept_waveform_s(sess.inner, data.as_ptr(), data.len() as _) == 1 } + unsafe { + ffi::vosk_recognizer_accept_waveform_s(sess.inner, data.as_ptr(), data.len() as _) == 1 + } } #[inline] @@ -53,4 +57,4 @@ impl Drop for VoskModel { fn drop(&mut self) { unsafe { ffi::vosk_model_free(self.inner) } } -} \ No newline at end of file +} diff --git a/src/session.rs b/src/session.rs index 81cf3d1..39137cc 100644 --- a/src/session.rs +++ b/src/session.rs @@ -1,6 +1,6 @@ use crate::ffi; use std::ffi::CString; -use std::path::{PathBuf}; +use std::path::PathBuf; pub struct VoskSessionConfigBuilder { spk_root: Option, @@ -18,7 +18,9 @@ impl VoskSessionConfigBuilder { } pub fn spk_root>(&mut self, root: P) -> &mut Self { - self.spk_root = Some(unsafe { CString::from_vec_unchecked(root.into().to_string_lossy().as_bytes().to_vec()) }); + self.spk_root = Some(unsafe { + CString::from_vec_unchecked(root.into().to_string_lossy().as_bytes().to_vec()) + }); self } @@ -52,7 +54,7 @@ impl Default for VoskSessionConfig { Self { spk_root: None, grammar: None, - freq: 16000.0 + freq: 16000.0, } } } @@ -79,7 +81,7 @@ impl VoskSessionConfig { } pub struct VoskSession { - pub(crate) inner: *mut ffi::VoskRecognizer + pub(crate) inner: *mut ffi::VoskRecognizer, } impl VoskSession { @@ -96,7 +98,7 @@ impl VoskSession { // } } else { VoskSession { - inner: unsafe { ffi::vosk_recognizer_new(model as *mut ffi::VoskModel, cfg.freq) } + inner: unsafe { ffi::vosk_recognizer_new(model as *mut ffi::VoskModel, cfg.freq) }, } } } diff --git a/src/speaker.rs b/src/speaker.rs index 5c9f3a0..48e26da 100644 --- a/src/speaker.rs +++ b/src/speaker.rs @@ -1,17 +1,18 @@ use crate::ffi; -use std::path::Path; use std::ffi::CString; +use std::path::Path; pub struct SpeakerModel { - pub(crate) inner: *mut ffi::VoskSpkModel + pub(crate) inner: *mut ffi::VoskSpkModel, } impl SpeakerModel { pub fn new(root: &Path) -> Self { - let root = unsafe { CString::from_vec_unchecked(root.to_string_lossy().as_bytes().to_vec()) }; + let root = + unsafe { CString::from_vec_unchecked(root.to_string_lossy().as_bytes().to_vec()) }; Self { - inner: unsafe { ffi::vosk_spk_model_new(root.as_c_str().as_ptr()) } + inner: unsafe { ffi::vosk_spk_model_new(root.as_c_str().as_ptr()) }, } } } @@ -20,4 +21,4 @@ impl Drop for SpeakerModel { fn drop(&mut self) { unsafe { ffi::vosk_spk_model_free(self.inner) } } -} \ No newline at end of file +} diff --git a/src/transcription.rs b/src/transcription.rs index 29e1c30..b342334 100644 --- a/src/transcription.rs +++ b/src/transcription.rs @@ -1,4 +1,4 @@ -use serde::{Serialize, Deserialize}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct TranscriptionWord { @@ -18,4 +18,4 @@ pub struct TranscriptionResult { #[derive(Serialize, Deserialize, Debug, Clone)] pub struct TranscriptionPartialResult { pub partial: String, -} \ No newline at end of file +}