From 765a1eccfa840d78ccf81b789e7f3e3840c50bd8 Mon Sep 17 00:00:00 2001 From: Andrey Tkachenko Date: Wed, 17 Jul 2019 18:08:06 +0400 Subject: [PATCH] NativeBlas --- Cargo.lock | 74 ++++- Cargo.toml | 2 +- yarnn-examples/mnist/Cargo.toml | 3 +- yarnn-examples/mnist/src/main.rs | 15 +- yarnn-examples/vgg16-demo/src/main.rs | 2 +- .../src}/img2col.rs | 0 yarnn-native-blas/src/lib.rs | 291 +++++++++++++++--- yarnn/src/lib.rs | 2 +- yarnn/src/native/conv2d/mod.rs | 2 - yarnn/src/native/mod.rs | 187 ++++++----- 10 files changed, 454 insertions(+), 124 deletions(-) rename {yarnn/src/native/conv2d => yarnn-native-blas/src}/img2col.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 17d52f4..b81e831 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,6 +25,24 @@ dependencies = [ "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "blas" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "blas-sys 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", + "num-complex 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "blas-sys" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "bumpalo" version = "2.5.0" @@ -44,6 +62,24 @@ dependencies = [ "ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "cblas" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", + "num-complex 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "cblas-sys" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.59 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "cc" version = "1.0.37" @@ -142,6 +178,28 @@ dependencies = [ "version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "num-complex" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-traits" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "openblas-src" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "ppv-lite86" version = "0.2.5" @@ -358,6 +416,7 @@ dependencies = [ "mnist 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "yarnn 0.1.0", "yarnn-model-mnist 0.1.0", + "yarnn-native-blas 0.1.0", ] [[package]] @@ -392,17 +451,27 @@ dependencies = [ "yarnn 0.1.0", ] -[[patch.unused]] +[[package]] name = "yarnn-native-blas" version = "0.1.0" +dependencies = [ + "blas 0.20.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cblas 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "openblas-src 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "yarnn 0.1.0", +] [metadata] "checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf" "checksum backtrace 0.3.32 (registry+https://github.com/rust-lang/crates.io-index)" = "18b50f5258d1a9ad8396d2d345827875de4261b158124d4c819d9b351454fae5" "checksum backtrace-sys 0.1.30 (registry+https://github.com/rust-lang/crates.io-index)" = "5b3a000b9c543553af61bc01cbfc403b04b5caa9e421033866f2e98061eb3e61" +"checksum blas 0.20.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e4b3b6399f2fe6e70f780b06e278a64770aae273e0005c3860f94fc2f5f01ba7" +"checksum blas-sys 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "13b1b279ceb25d7c4faaea95a5f7addbe7d8c34f9462044bd8e630cebcfc2440" "checksum bumpalo 2.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2cd43d82f27d68911e6ee11ee791fb248f138f5d69424dc02e098d4f152b0b05" "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101" +"checksum cblas 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d82f331add33eceb4c41cb28d878049b96f56577016daf190831e94e4aece5db" +"checksum cblas-sys 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b6feecd82cce51b0204cf063f0041d69f24ce83f680d87514b004248e7b0fa65" "checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d" "checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" "checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2" @@ -416,6 +485,9 @@ version = "0.1.0" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" "checksum mnist 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "25f19bfda80095b4294000bbb50506f028149ed0ddb7fabf46ebb673b91626bc" "checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +"checksum num-complex 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fcb0cf31fb3ff77e6d2a6ebd6800df7fdcd106f2ad89113c9130bcd07f93dffc" +"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32" +"checksum openblas-src 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3533e568814bee9620fcc529158408384404bae5b277c73c73d66ca03fceb7" "checksum ppv-lite86 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "e3cbf9f658cdb5000fcf6f362b8ea2ba154b9f146a61c7a20d647034c6b6561b" "checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" "checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" diff --git a/Cargo.toml b/Cargo.toml index 11884b0..b984418 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ members = [ "yarnn", - # "yarnn-native-blas", + "yarnn-native-blas", "yarnn-models/mnist", "yarnn-models/vgg16", diff --git a/yarnn-examples/mnist/Cargo.toml b/yarnn-examples/mnist/Cargo.toml index 6f3cd42..1fc8c17 100644 --- a/yarnn-examples/mnist/Cargo.toml +++ b/yarnn-examples/mnist/Cargo.toml @@ -7,4 +7,5 @@ edition = "2018" [dependencies] mnist = "0.4.0" yarnn = "0.1.0" -yarnn-model-mnist = "0.1.0" \ No newline at end of file +yarnn-model-mnist = "0.1.0" +yarnn-native-blas = "0.1.0" \ No newline at end of file diff --git a/yarnn-examples/mnist/src/main.rs b/yarnn-examples/mnist/src/main.rs index 6bf9fe7..363eb70 100644 --- a/yarnn-examples/mnist/src/main.rs +++ b/yarnn-examples/mnist/src/main.rs @@ -1,8 +1,9 @@ use yarnn::prelude::*; -use yarnn::native::{Native, NativeTensorF32}; +use yarnn::native::{Native, NativeTensor}; use yarnn_model_mnist::*; use yarnn::losses::CrossEntropyLoss; use yarnn::optimizers::Adam; +use yarnn_native_blas::NativeBlas; use mnist::{Mnist, MnistBuilder}; fn calc_accuracy>(back: &B, pred: &B::Tensor, targets: &[u8]) -> f32 { @@ -38,7 +39,7 @@ fn calc_accuracy>(back: &B, pred: &B::Tensor, targets: &[u8]) - fn main() { const BATCH_SIZE: usize = 64; - let backend = Native; + let backend: NativeBlas> = Default::default(); let optimizer = Adam::default(); // let mut model = MnistDenseModel::new(28, 28, 1); @@ -57,14 +58,14 @@ fn main() { .label_format_digit() .finalize(); - let mut inputs = NativeTensorF32::new((BATCH_SIZE as u32, 1, 28, 28)); - let mut targets = NativeTensorF32::new((BATCH_SIZE as u32, 10)); - let mut deltas = NativeTensorF32::new((BATCH_SIZE as u32, 10)); + let mut inputs = NativeTensor::new((BATCH_SIZE as u32, 1, 28, 28)); + let mut targets = NativeTensor::new((BATCH_SIZE as u32, 10)); + let mut deltas = NativeTensor::new((BATCH_SIZE as u32, 10)); let test_count = 1000; - let mut inputs0 = NativeTensorF32::new((test_count as u32, 1, 28, 28)); - let mut targets0 = NativeTensorF32::new((test_count as u32, 10)); + let mut inputs0 = NativeTensor::new((test_count as u32, 1, 28, 28)); + let mut targets0 = NativeTensor::new((test_count as u32, 10)); let mut tmp = vec![0u8; 10 * test_count]; diff --git a/yarnn-examples/vgg16-demo/src/main.rs b/yarnn-examples/vgg16-demo/src/main.rs index a0100c5..ee22b7b 100644 --- a/yarnn-examples/vgg16-demo/src/main.rs +++ b/yarnn-examples/vgg16-demo/src/main.rs @@ -4,7 +4,7 @@ use yarnn_model_vgg16::Vgg16Model; fn main() { - let vgg16: Vgg16Model> = Vgg16Model::new(224, 224, 3); + let vgg16: Vgg16Model, Adam<_, _>> = Vgg16Model::new(224, 224, 3); println!("{}", vgg16); } diff --git a/yarnn/src/native/conv2d/img2col.rs b/yarnn-native-blas/src/img2col.rs similarity index 100% rename from yarnn/src/native/conv2d/img2col.rs rename to yarnn-native-blas/src/img2col.rs diff --git a/yarnn-native-blas/src/lib.rs b/yarnn-native-blas/src/lib.rs index 862c06c..c38faba 100644 --- a/yarnn-native-blas/src/lib.rs +++ b/yarnn-native-blas/src/lib.rs @@ -1,56 +1,105 @@ +mod img2col; + use yarnn::backend::*; +use yarnn::native::*; +use yarnn::tensor::*; +use std::marker::PhantomData; extern crate openblas_src; -pub struct NativeBlas { - inner: B, -} - -impl Native for NativeBlas {} - -impl NativeBlas - where N: NativeTensor, +pub struct NativeBlas + where N: NativeNumber, B: NativeBackend { - pub fn new(native: B) -> Self { + inner: B, + _m: PhantomData +} + +impl Default for NativeBlas + where N: NativeNumber, + B: NativeBackend +{ + fn default() -> Self { Self { - inner: native + inner: Default::default(), + _m: Default::default(), + } + } +} + +impl NativeBackend for NativeBlas + where N: NativeNumber, + B: NativeBackend +{ + #[inline] + fn read_tensor<'a>(&self, t: &'a Self::Tensor) -> &'a [N] { + self.inner.read_tensor(t) + } + + #[inline] + fn write_tensor<'a>(&self, t: &'a mut Self::Tensor) -> &'a mut [N] { + self.inner.write_tensor(t) + } +} + +impl NativeBlas + where N: NativeNumber, + B: NativeBackend +{ + pub fn new(inner: B) -> Self { + Self { + inner, + _m: Default::default() } } } impl Backend for NativeBlas - where N: NativeTensor, + where N: NativeNumber, B: NativeBackend { type Tensor = B::Tensor; + #[inline] fn store_tensor_f32(&self, t: &Self::Tensor, data: &mut [f32]) { - + self.inner.store_tensor_f32(t, data) } + + #[inline] fn load_tensor_u8(&self, t: &mut Self::Tensor, data: &[u8]) { - + self.inner.load_tensor_u8(t, data) } + + #[inline] fn load_tensor_f32(&self, t: &mut Self::Tensor, data: &[f32]) { - + self.inner.load_tensor_f32(t, data) } + + #[inline] fn scalar_f32(&self, val: f32) -> N { - + N::from_f32(val) } + + #[inline] fn fill_scalar(&self, t: &mut Self::Tensor, scalar: N) { - + self.inner.fill_scalar(t, scalar) } + + #[inline] fn fill_random(&self, t: &mut Self::Tensor, from: N, to: N) { - + self.inner.fill_random(t, from, to) } - fn print_tensor(&self, t: &Self::Tensor) { + #[inline] + fn print_tensor(&self, t: &Self::Tensor) { + self.inner.print_tensor(t) } } impl BackendGemm for NativeBlas where B: NativeBackend { + #[inline] fn matmul(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) { let a_shape = a.shape(); let b_shape = b.shape(); @@ -62,21 +111,22 @@ impl BackendGemm for NativeBlas assert_eq!(a_shape.dims, 2); assert_eq!(b_shape.dims, 2); - let m = a_shape.get(0) as usize; - let n = b_shape.get(1) as usize; - let k = b_shape.get(0) as usize; + let m = a_shape.get(0) as i32; + let n = b_shape.get(1) as i32; + let k = b_shape.get(0) as i32; unsafe { blas::sgemm('N' as u8, 'N' as u8, n, m, k, 1.0, - b.read(), n, - a.read(), k, + self.read_tensor(b), n, + self.read_tensor(a), k, 0.0, - &mut dst.write(), n); + self.write_tensor(dst), n); } } + #[inline] fn matmul_nt(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) { let a_shape = a.shape(); let b_shape = b.shape(); @@ -88,21 +138,22 @@ impl BackendGemm for NativeBlas assert_eq!(a_shape.dims, 2); assert_eq!(b_shape.dims, 2); - let m = a_shape.get(0) as usize; - let n = b_shape.get(0) as usize; - let k = b_shape.get(1) as usize; + let m = a_shape.get(0) as i32; + let n = b_shape.get(0) as i32; + let k = b_shape.get(1) as i32; unsafe { blas::sgemm('T' as u8, 'N' as u8, n, m, k, 1.0, - b.read(), k, - a.read(), k, + self.read_tensor(b), k, + self.read_tensor(a), k, 0.0, - &mut dst.write(), n); + self.write_tensor(dst), n); } } + #[inline] fn matmul_tn(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) { let a_shape = a.shape(); let b_shape = b.shape(); @@ -114,21 +165,22 @@ impl BackendGemm for NativeBlas assert_eq!(a_shape.dims, 2); assert_eq!(b_shape.dims, 2); - let m = a_shape.get(1) as usize; - let n = b_shape.get(1) as usize; - let k = b_shape.get(0) as usize; + let m = a_shape.get(1) as i32; + let n = b_shape.get(1) as i32; + let k = b_shape.get(0) as i32; unsafe { blas::sgemm('N' as u8, 'T' as u8, n, m, k, 1.0, - b.read(), n, - a.read(), m, + self.read_tensor(b), n, + self.read_tensor(a), m, 0.0, - &mut dst.write(), n); + self.write_tensor(dst), n); } } + #[inline] fn matmul_tt(&self, _dst: &mut Self::Tensor, _a: &Self::Tensor, _b: &Self::Tensor) { unimplemented!(); } @@ -137,6 +189,7 @@ impl BackendGemm for NativeBlas impl BackendAxpy for NativeBlas where B: NativeBackend { + #[inline] fn axpy(&self, dst: &mut Self::Tensor, scale: f32, x: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -146,9 +199,9 @@ impl BackendAxpy for NativeBlas blas::saxpy( dst_size as i32, scale, - x.read(), + self.read_tensor(x), 1, - dst.write(), + self.write_tensor(dst), 1 ); } @@ -158,6 +211,7 @@ impl BackendAxpy for NativeBlas impl BackendScale for NativeBlas where B: NativeBackend { + #[inline] fn scale(&self, dst: &mut Self::Tensor, scale: f32) { let dst_size = dst.shape().size(); @@ -165,9 +219,166 @@ impl BackendScale for NativeBlas blas::sscal( dst_size as i32, scale, - dst.write(), + self.write_tensor(dst), 1 ); } } -} \ No newline at end of file +} + + +impl + BackendSigmoid> BackendSigmoid for NativeBlas { + #[inline] + fn sigmoid(&self, dst: &mut Self::Tensor, data: &Self::Tensor) { + self.inner.sigmoid(dst, data) + } + + #[inline] + fn sigmoid_grad(&self, dst: &mut Self::Tensor, z: &Self::Tensor, d: &Self::Tensor) { + self.inner.sigmoid_grad(dst, z, d) + } +} + +impl + BackendReLu> BackendReLu for NativeBlas { + #[inline] + fn relu(&self, dst: &mut Self::Tensor, data: &Self::Tensor) { + self.inner.relu(dst, data) + } + + #[inline] + fn relu_grad(&self, dst: &mut Self::Tensor, z: &Self::Tensor, d: &Self::Tensor) { + self.inner.relu_grad(dst, z, d) + } +} + +impl + BackendBias> BackendBias for NativeBlas { + #[inline] + fn bias_add(&self, dst: &mut Self::Tensor, biases: &Self::Tensor) { + self.inner.bias_add(dst, biases) + } + + #[inline] + fn bias_grad(&self, dbiases: &mut Self::Tensor, deltas: &Self::Tensor) { + self.inner.bias_grad(dbiases, deltas) + } +} + +impl + BackendMse> BackendMse for NativeBlas { + #[inline] + fn scaled_square_diff(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor, scale: f32) { + self.inner.scaled_square_diff(dst, a, b, scale) + } + + #[inline] + fn scaled_diff(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor, scale: f32) { + self.inner.scaled_diff(dst, a, b, scale) + } +} + +impl + BackendAxpys> BackendAxpys for NativeBlas { + #[inline] + fn axpys(&self, dst: &mut Self::Tensor, scale: f32, a: &Self::Tensor) { + self.inner.axpys(dst, scale, a) + } +} + +impl + BackendAdd> BackendAdd for NativeBlas { + #[inline] + fn add(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { + self.inner.add(dst, a) + } +} + +impl + BackendSub> BackendSub for NativeBlas { + #[inline] + fn sub(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) { + self.inner.sub(dst, a, b) + } +} + +impl + BackendMul> BackendMul for NativeBlas { + #[inline] + fn mul(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { + self.inner.mul(dst, a) + } +} + + +impl + BackendCopy> BackendCopy for NativeBlas { + #[inline] + fn copy(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { + self.inner.copy(dst, a) + } +} + +impl + BackendMaximum> BackendMaximum for NativeBlas { + #[inline] + fn maximum(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { + self.inner.maximum(dst, a) + } +} + + +impl + BackendAdam> BackendAdam for NativeBlas { + #[inline] + fn adam_p(&self, dst: &mut Self::Tensor, lr: f32, moms: &Self::Tensor, vels: &Self::Tensor, eps: f32) { + self.inner.adam_p(dst, lr, moms, vels, eps) + } +} + +impl + BackendSoftmax> BackendSoftmax for NativeBlas { + #[inline] + fn softmax(&self, y: &mut Self::Tensor, x: &Self::Tensor) { + self.inner.softmax(y, x) + } +} + +impl + BackendConv2d> BackendConv2d for NativeBlas { + type Context = (); + + #[inline] + fn conv2d_forward(&self, y: &mut Self::Tensor, x: &Self::Tensor, w: &Self::Tensor, conv_info: &Conv2dInfo) { + self.inner.conv2d_forward(y, x, w, conv_info) + } + + #[inline] + fn conv2d_backward_input(&self, dx: &mut Self::Tensor, dy: &Self::Tensor, w: &Self::Tensor, conv_info: &Conv2dInfo) { + self.inner.conv2d_backward_input(dx, dy, w, conv_info) + } + + #[inline] + fn conv2d_backward_filter(&self, dw: &mut Self::Tensor, x: &Self::Tensor, dy: &Self::Tensor, conv_info: &Conv2dInfo) { + self.inner.conv2d_backward_filter(dw, x, dy, conv_info) + } +} + +impl + BackendMaxPool2d> BackendMaxPool2d for NativeBlas { + #[inline] + fn max_pool2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) { + self.inner.max_pool2d(y, x, conv_info) + } + + #[inline] + fn max_pool2d_backprop(&self, dx: &mut Self::Tensor, dy: &Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) { + self.inner.max_pool2d_backprop(dx, dy, x, conv_info) + } +} + +impl + BackendAvgPool2d> BackendAvgPool2d for NativeBlas { + #[inline] + fn avg_pool2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) { + self.inner.avg_pool2d(y, x, conv_info) + } + + #[inline] + fn avg_pool2d_backprop(&self, dx: &mut Self::Tensor, dy: &Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) { + self.inner.avg_pool2d_backprop(dx, dy, x, conv_info) + } +} + +impl + BackendPaddingCopy2d> BackendPaddingCopy2d for NativeBlas { + #[inline] + fn copy_with_padding2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, y_paddings: (u32, u32), x_paddings: (u32, u32)) { + self.inner.copy_with_padding2d(y, x, y_paddings, x_paddings) + } +} diff --git a/yarnn/src/lib.rs b/yarnn/src/lib.rs index 26977ab..7bebf45 100644 --- a/yarnn/src/lib.rs +++ b/yarnn/src/lib.rs @@ -24,4 +24,4 @@ pub mod prelude { pub use super::loss::*; pub use super::tensor::*; pub use super::layer::*; -} +} \ No newline at end of file diff --git a/yarnn/src/native/conv2d/mod.rs b/yarnn/src/native/conv2d/mod.rs index e7cd6fd..c17552c 100644 --- a/yarnn/src/native/conv2d/mod.rs +++ b/yarnn/src/native/conv2d/mod.rs @@ -1,10 +1,8 @@ mod kernel_3x3; mod kernel_5x5; -mod img2col; pub use self::kernel_3x3::*; pub use self::kernel_5x5::*; -pub use self::img2col::*; #[allow(dead_code)] pub fn valid_conv2d(y: &mut [f32], x: &[f32], w: &[f32], alpha: f32, diff --git a/yarnn/src/native/mod.rs b/yarnn/src/native/mod.rs index c0b1b03..c83f0f5 100644 --- a/yarnn/src/native/mod.rs +++ b/yarnn/src/native/mod.rs @@ -14,28 +14,72 @@ use core::fmt::Write; use rand_distr::{Normal, Distribution}; -pub struct NativeTensorF32 { - shape: TensorShape, - ptr: Option> +pub trait NativeNumber: Copy { + fn from_f32(val: f32) -> Self; + fn from_f64(val: f64) -> Self; + fn from_i64(val: i64) -> Self; + fn from_i32(val: i32) -> Self; + fn from_i16(val: i16) -> Self; + fn from_i8(val: i8) -> Self; + fn from_u64(val: u64) -> Self; + fn from_u32(val: u32) -> Self; + fn from_u16(val: u16) -> Self; + fn from_u8(val: u8) -> Self; } -impl NativeTensorF32 { - pub fn read(&self) -> &[f32] { +impl NativeNumber for f32 { + fn from_f32(val: f32) -> Self { val as f32 } + fn from_f64(val: f64) -> Self { val as f32 } + fn from_i64(val: i64) -> Self { val as f32 } + fn from_i32(val: i32) -> Self { val as f32 } + fn from_i16(val: i16) -> Self { val as f32 } + fn from_i8(val: i8) -> Self { val as f32 } + fn from_u64(val: u64) -> Self { val as f32 } + fn from_u32(val: u32) -> Self { val as f32 } + fn from_u16(val: u16) -> Self { val as f32 } + fn from_u8(val: u8) -> Self { val as f32 } +} + +impl NativeNumber for f64 { + fn from_f32(val: f32) -> Self { val as f64 } + fn from_f64(val: f64) -> Self { val as f64 } + fn from_i64(val: i64) -> Self { val as f64 } + fn from_i32(val: i32) -> Self { val as f64 } + fn from_i16(val: i16) -> Self { val as f64 } + fn from_i8(val: i8) -> Self { val as f64 } + fn from_u64(val: u64) -> Self { val as f64 } + fn from_u32(val: u32) -> Self { val as f64 } + fn from_u16(val: u16) -> Self { val as f64 } + fn from_u8(val: u8) -> Self { val as f64 } +} + +pub trait NativeBackend: Backend + Default { + fn read_tensor<'a>(&self, t: &'a Self::Tensor) -> &'a [N]; + fn write_tensor<'a>(&self, t: &'a mut Self::Tensor) -> &'a mut [N]; +} + +pub struct NativeTensor { + shape: TensorShape, + ptr: Option> +} + +impl NativeTensor { + pub fn read(&self) -> &[N] { self.ptr.as_ref().unwrap() } - pub fn write(&mut self) -> &mut [f32] { + pub fn write(&mut self) -> &mut [N] { if self.ptr.is_none() { - self.ptr = Some(vec![0.0; self.shape.size()].into_boxed_slice()); + self.ptr = Some(vec![N::from_f32(0.0); self.shape.size()].into_boxed_slice()); } return self.ptr.as_mut().unwrap() } } -impl Tensor for NativeTensorF32 { +impl Tensor for NativeTensor { fn new>(shape: S) -> Self { - NativeTensorF32 { + NativeTensor { shape: shape.into(), ptr: None, } @@ -46,24 +90,16 @@ impl Tensor for NativeTensorF32 { } fn resize(&mut self, shape: TensorShape) { - self.ptr = if let Some(ptr) = self.ptr.take() { - let size = self.shape.size(); - let raw = Box::into_raw(ptr) as *mut f32; - let mut data = unsafe {Vec::from_raw_parts(raw, size, size)}; - data.resize(shape.size(), 0.0); - - Some(data.into_boxed_slice()) - } else { - None - }; + self.ptr = None; self.shape = shape; } } -pub struct Native; +#[derive(Default)] +pub struct Native(core::marker::PhantomData); -impl Native { - fn fmt_tensor(&self, t: &NativeTensorF32, f: &mut String) -> fmt::Result { +impl Native { + fn fmt_tensor(&self, t: &NativeTensor, f: &mut String) -> fmt::Result { let strides = t.shape.default_strides(); let last_idx = strides.dims - 1; writeln!(f, "default stridses {} {}", t.shape.default_strides(), last_idx)?; @@ -97,8 +133,8 @@ impl Native { } } -impl Backend for Native { - type Tensor = NativeTensorF32; +impl Backend for Native { + type Tensor = NativeTensor; fn store_tensor_f32(&self, t: &Self::Tensor, data: &mut [f32]) { let size = t.shape().size(); @@ -170,8 +206,19 @@ impl Backend for Native { } } +impl NativeBackend for Native { + #[inline] + fn read_tensor<'a>(&self, t: &'a Self::Tensor) -> &'a [f32] { + t.read() + } -impl BackendGemm for Native { + #[inline] + fn write_tensor<'a>(&self, t: &'a mut Self::Tensor) -> &'a mut [f32] { + t.write() + } +} + +impl BackendGemm for Native { fn matmul(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) { let a_shape = a.shape(); let b_shape = b.shape(); @@ -251,7 +298,7 @@ impl BackendGemm for Native { } } -impl BackendSigmoid for Native { +impl BackendSigmoid for Native { fn sigmoid(&self, dst: &mut Self::Tensor, data: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -281,7 +328,7 @@ impl BackendSigmoid for Native { } } -impl BackendReLu for Native { +impl BackendReLu for Native { fn relu(&self, dst: &mut Self::Tensor, data: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -321,7 +368,7 @@ impl BackendReLu for Native { } } -impl BackendBias for Native { +impl BackendBias for Native { fn bias_add(&self, dst: &mut Self::Tensor, biases: &Self::Tensor) { let biases_shape = biases.shape(); let dst_shape = dst.shape().clone(); @@ -391,7 +438,7 @@ impl BackendBias for Native { } } -impl BackendScale for Native { +impl BackendScale for Native { fn scale(&self, dst: &mut Self::Tensor, scale: f32) { let dst_size = dst.shape().size(); let dst_s = &mut dst.write()[0 .. dst_size]; @@ -402,7 +449,7 @@ impl BackendScale for Native { } } -impl BackendMse for Native { +impl BackendMse for Native { fn scaled_square_diff(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor, scale: f32) { let a_size = a.shape().size(); let b_size = b.shape().size(); @@ -440,7 +487,7 @@ impl BackendMse for Native { } } -impl BackendAxpy for Native { +impl BackendAxpy for Native { default fn axpy(&self, dst: &mut Self::Tensor, scale: f32, a: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -455,7 +502,7 @@ impl BackendAxpy for Native { } } -impl BackendAxpys for Native { +impl BackendAxpys for Native { fn axpys(&self, dst: &mut Self::Tensor, scale: f32, a: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -470,7 +517,7 @@ impl BackendAxpys for Native { } } -impl BackendAdd for Native { +impl BackendAdd for Native { fn add(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -485,7 +532,7 @@ impl BackendAdd for Native { } } -impl BackendSub for Native { +impl BackendSub for Native { fn sub(&self, dst: &mut Self::Tensor, a: &Self::Tensor, b: &Self::Tensor) { let a_size = a.shape().size(); let b_size = b.shape().size(); @@ -505,7 +552,7 @@ impl BackendSub for Native { } -impl BackendMul for Native { +impl BackendMul for Native { fn mul(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -521,7 +568,7 @@ impl BackendMul for Native { } -impl BackendCopy for Native { +impl BackendCopy for Native { fn copy(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { let size = dst.shape().size(); @@ -536,7 +583,7 @@ impl BackendCopy for Native { } } -impl BackendMaximum for Native { +impl BackendMaximum for Native { fn maximum(&self, dst: &mut Self::Tensor, a: &Self::Tensor) { let dst_size = dst.shape().size(); @@ -552,7 +599,7 @@ impl BackendMaximum for Native { } -impl BackendAdam for Native { +impl BackendAdam for Native { fn adam_p(&self, dst: &mut Self::Tensor, lr: f32, moms: &Self::Tensor, vels: &Self::Tensor, eps: f32) { let dst_size = dst.shape().size(); @@ -569,7 +616,7 @@ impl BackendAdam for Native { } } -impl BackendSoftmax for Native { +impl BackendSoftmax for Native { fn softmax(&self, y: &mut Self::Tensor, x: &Self::Tensor) { let y_shape = y.shape(); let x_shape = x.shape(); @@ -619,7 +666,7 @@ impl BackendSoftmax for Native { } } -impl BackendConv2d for Native { +impl BackendConv2d for Native { type Context = (); fn conv2d_forward(&self, y: &mut Self::Tensor, x: &Self::Tensor, w: &Self::Tensor, conv_info: &Conv2dInfo) { @@ -742,7 +789,7 @@ impl BackendConv2d for Native { } } -impl BackendMaxPool2d for Native { +impl BackendMaxPool2d for Native { fn max_pool2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) { let x_shape = &x.shape().as_slice()[0..4]; let y_shape = &y.shape().as_slice()[0..4]; @@ -845,7 +892,7 @@ impl BackendMaxPool2d for Native { } } -impl BackendAvgPool2d for Native { +impl BackendAvgPool2d for Native { fn avg_pool2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, conv_info: &Conv2dInfo) { let x_shape = &x.shape().as_slice()[0..4]; let y_shape = &y.shape().as_slice()[0..4]; @@ -899,7 +946,7 @@ impl BackendAvgPool2d for Native { } } -impl BackendPaddingCopy2d for Native { +impl BackendPaddingCopy2d for Native { fn copy_with_padding2d(&self, y: &mut Self::Tensor, x: &Self::Tensor, y_paddings: (u32, u32), x_paddings: (u32, u32)) { let y_shape = &y.shape().as_slice()[0..4]; let x_shape = &x.shape().as_slice()[0..4]; @@ -964,16 +1011,16 @@ impl BackendPaddingCopy2d for Native { #[cfg(test)] mod tests { use crate::backend::*; - use super::{Native, NativeTensorF32}; + use super::{Native, NativeTensor}; use crate::tensor::Tensor; #[test] fn test_copy_with_padding2d() { - let bac = Native; - let mut a1 = NativeTensorF32::new((1, 1, 3, 3)); - let mut b1 = NativeTensorF32::new((1, 1, 5, 5)); - let mut a2 = NativeTensorF32::new((1, 1, 5, 5)); - let mut b2 = NativeTensorF32::new((1, 1, 3, 3)); + let bac: Native = Default::default(); + let mut a1 = NativeTensor::new((1, 1, 3, 3)); + let mut b1 = NativeTensor::new((1, 1, 5, 5)); + let mut a2 = NativeTensor::new((1, 1, 5, 5)); + let mut b2 = NativeTensor::new((1, 1, 3, 3)); bac.load_tensor_u8(&mut a1, &[ 1, 2, 3, @@ -1014,9 +1061,9 @@ mod tests { #[test] fn test_softmax() { - let bac = Native; - let mut a = NativeTensorF32::new((3, 3)); - let mut b = NativeTensorF32::new((3, 3)); + let bac: Native = Default::default(); + let mut a = NativeTensor::new((3, 3)); + let mut b = NativeTensor::new((3, 3)); bac.load_tensor_u8(&mut a, &[ 1,2,3, @@ -1037,10 +1084,10 @@ mod tests { #[test] fn test_matmul() { - let bac = Native; - let mut a = NativeTensorF32::new((2, 3)); - let mut b = NativeTensorF32::new((3, 4)); - let mut c = NativeTensorF32::new((2, 4)); + let bac: Native = Default::default(); + let mut a = NativeTensor::new((2, 3)); + let mut b = NativeTensor::new((3, 4)); + let mut c = NativeTensor::new((2, 4)); bac.load_tensor_u8(&mut a, &[ 1,2,3, @@ -1065,10 +1112,10 @@ mod tests { #[test] fn test_matmul_nt() { - let bac = Native; - let mut a = NativeTensorF32::new((2, 3)); - let mut b = NativeTensorF32::new((4, 3)); - let mut c = NativeTensorF32::new((2, 4)); + let bac: Native = Default::default(); + let mut a = NativeTensor::new((2, 3)); + let mut b = NativeTensor::new((4, 3)); + let mut c = NativeTensor::new((2, 4)); bac.load_tensor_u8(&mut a, &[ 1,2,3, @@ -1095,10 +1142,10 @@ mod tests { #[test] fn test_matmul_tn() { - let bac = Native; - let mut a = NativeTensorF32::new((8, 5)); - let mut b = NativeTensorF32::new((8, 3)); - let mut c = NativeTensorF32::new((5, 3)); + let bac: Native = Default::default(); + let mut a = NativeTensor::new((8, 5)); + let mut b = NativeTensor::new((8, 3)); + let mut c = NativeTensor::new((5, 3)); bac.load_tensor_u8(&mut a, &[ 0, 1, 2, 3, 4, @@ -1138,10 +1185,10 @@ mod tests { #[test] fn test_axpy() { - let bac = Native; + let bac: Native = Default::default(); - let mut a = NativeTensorF32::new((2, 2)); - let mut b = NativeTensorF32::new((2, 2)); + let mut a = NativeTensor::new((2, 2)); + let mut b = NativeTensor::new((2, 2)); bac.load_tensor_u8(&mut a, &[1, 2, 3, 4]); bac.load_tensor_u8(&mut b, &[1, 2, 3, 4]); @@ -1155,10 +1202,10 @@ mod tests { #[test] fn test_add() { - let bac = Native; + let bac: Native = Default::default(); - let mut a = NativeTensorF32::new((2, 2)); - let mut b = NativeTensorF32::new((2, 2)); + let mut a = NativeTensor::new((2, 2)); + let mut b = NativeTensor::new((2, 2)); bac.load_tensor_u8(&mut a, &[1, 2, 3, 4]); bac.load_tensor_u8(&mut b, &[1, 2, 3, 4]);