From 68ddf3b1bc9eaa875496bd3013611df4ca899e21 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Wed, 29 Apr 2020 17:14:59 +0000 Subject: [PATCH] rust: Add function that directly hashes 32-bit units This is almost two times as fast, apparently, however it requires the input to be aligned to 32 bit and be a multiple of 32 bits. This should be special-cased. --- rust/cryptest/src/main.rs | 22 ++++++++++++++++++-- rust/k210-shared/src/soc/sha256.rs | 33 +++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/rust/cryptest/src/main.rs b/rust/cryptest/src/main.rs index fb4e382..9df6a3e 100644 --- a/rust/cryptest/src/main.rs +++ b/rust/cryptest/src/main.rs @@ -551,12 +551,14 @@ fn main() -> ! { // (this is shorter than the given test vector as it is the maximum that the SHA256 engine // supports, 65536 SHA blocks) { - let time_start = clock(); let expected = hex!("929156a9422e05b71655509e8e9e7292d65d540a7342c94df3e121cedd407dfe"); let s = b"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno"; + let s_u32 = [0x64636261, 0x68676665, 0x65646362, 0x69686766, 0x66656463, 0x6a696867, 0x67666564, 0x6b6a6968, 0x68676665, 0x6c6b6a69, 0x69686766, 0x6d6c6b6a, 0x6a696867, 0x6e6d6c6b, 0x6b6a6968, 0x6f6e6d6c]; // let size = 16_777_216 * s.len(); let size = 65_535 * s.len(); - write!(stdout, "SHA256 ({} bytes): ", size).unwrap(); + + write!(stdout, "SHA256 hw ({} bytes): ", size).unwrap(); + let time_start = clock(); let mut sha = SHA256Ctx::new(sha256, size); sha.update(s.iter().cycle().take(size)); let sha_out = sha.finish(); @@ -569,6 +571,22 @@ fn main() -> ! { write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap(); writeln!(stdout).unwrap(); + write!(stdout, "SHA256 hw, 32bit ({} bytes): ", size).unwrap(); + let time_start = clock(); + let mut sha = SHA256Ctx::new(sha256, size); + sha.update32(s_u32.iter().cycle().take(size / 4)); + let sha_out = sha.finish(); + let time_end = clock(); + if sha_out == expected { + write!(stdout, "MATCH").unwrap(); + } else { + write!(stdout, "MISMATCH").unwrap(); + } + write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap(); + writeln!(stdout).unwrap(); + + // Yet another thing to try would be DMA? + // Software write!(stdout, "SHA256 sw ({} bytes): ", size).unwrap(); let time_start = clock(); diff --git a/rust/k210-shared/src/soc/sha256.rs b/rust/k210-shared/src/soc/sha256.rs index 45f2d30..6f97abf 100644 --- a/rust/k210-shared/src/soc/sha256.rs +++ b/rust/k210-shared/src/soc/sha256.rs @@ -39,21 +39,44 @@ impl <'a> SHA256Ctx<'a> { /** Update SHA256 computation with new data. */ pub fn update<'b, X>(&mut self, data: X) where X: IntoIterator { + let mut block = self.block; + let mut ptr = self.ptr; for &v in data { - let copy_ofs = self.ptr % 4; - self.block |= (v as u32) << (copy_ofs * 8); - self.ptr += 1; + let copy_ofs = ptr % 4; + block |= (v as u32) << (copy_ofs * 8); + ptr += 1; if copy_ofs == 3 { unsafe { while self.sha.function_reg_1.read().fifo_in_full().bit() { atomic::compiler_fence(Ordering::SeqCst) } - self.sha.data_in.write(|w| w.bits(self.block)); + self.sha.data_in.write(|w| w.bits(block)); } - self.block = 0; + block = 0; } } + self.block = block; + self.ptr = ptr; + } + + /** Update SHA256 computation with new data (32 bit little-endian, must be four-aligned in + * the data stream). This is roughly two times faster than byte by byte using `update`. + */ + pub fn update32<'b, X>(&mut self, data: X) + where X: IntoIterator { + assert!((self.ptr & 3) == 0); + let mut ptr = self.ptr; + for &v in data { + unsafe { + while self.sha.function_reg_1.read().fifo_in_full().bit() { + atomic::compiler_fence(Ordering::SeqCst) + } + self.sha.data_in.write(|w| w.bits(v)); + } + ptr += 4; + } + self.ptr = ptr; } /** Finish up SHA256 computation. */