rust: Add function that directly hashes 32-bit units

This is almost two times as fast, apparently, however it requires
the input to be aligned to 32 bit and be a multiple of 32 bits.
This should be special-cased.
This commit is contained in:
Wladimir J. van der Laan 2020-04-29 17:14:59 +00:00
parent 9d9f67692b
commit 68ddf3b1bc
2 changed files with 48 additions and 7 deletions

View File

@ -551,12 +551,14 @@ fn main() -> ! {
// (this is shorter than the given test vector as it is the maximum that the SHA256 engine
// supports, 65536 SHA blocks)
{
let time_start = clock();
let expected = hex!("929156a9422e05b71655509e8e9e7292d65d540a7342c94df3e121cedd407dfe");
let s = b"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno";
let s_u32 = [0x64636261, 0x68676665, 0x65646362, 0x69686766, 0x66656463, 0x6a696867, 0x67666564, 0x6b6a6968, 0x68676665, 0x6c6b6a69, 0x69686766, 0x6d6c6b6a, 0x6a696867, 0x6e6d6c6b, 0x6b6a6968, 0x6f6e6d6c];
// let size = 16_777_216 * s.len();
let size = 65_535 * s.len();
write!(stdout, "SHA256 ({} bytes): ", size).unwrap();
write!(stdout, "SHA256 hw ({} bytes): ", size).unwrap();
let time_start = clock();
let mut sha = SHA256Ctx::new(sha256, size);
sha.update(s.iter().cycle().take(size));
let sha_out = sha.finish();
@ -569,6 +571,22 @@ fn main() -> ! {
write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap();
writeln!(stdout).unwrap();
write!(stdout, "SHA256 hw, 32bit ({} bytes): ", size).unwrap();
let time_start = clock();
let mut sha = SHA256Ctx::new(sha256, size);
sha.update32(s_u32.iter().cycle().take(size / 4));
let sha_out = sha.finish();
let time_end = clock();
if sha_out == expected {
write!(stdout, "MATCH").unwrap();
} else {
write!(stdout, "MISMATCH").unwrap();
}
write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap();
writeln!(stdout).unwrap();
// Yet another thing to try would be DMA?
// Software
write!(stdout, "SHA256 sw ({} bytes): ", size).unwrap();
let time_start = clock();

View File

@ -39,21 +39,44 @@ impl <'a> SHA256Ctx<'a> {
/** Update SHA256 computation with new data. */
pub fn update<'b, X>(&mut self, data: X)
where X: IntoIterator<Item = &'b u8> {
let mut block = self.block;
let mut ptr = self.ptr;
for &v in data {
let copy_ofs = self.ptr % 4;
self.block |= (v as u32) << (copy_ofs * 8);
self.ptr += 1;
let copy_ofs = ptr % 4;
block |= (v as u32) << (copy_ofs * 8);
ptr += 1;
if copy_ofs == 3 {
unsafe {
while self.sha.function_reg_1.read().fifo_in_full().bit() {
atomic::compiler_fence(Ordering::SeqCst)
}
self.sha.data_in.write(|w| w.bits(self.block));
self.sha.data_in.write(|w| w.bits(block));
}
self.block = 0;
block = 0;
}
}
self.block = block;
self.ptr = ptr;
}
/** Update SHA256 computation with new data (32 bit little-endian, must be four-aligned in
* the data stream). This is roughly two times faster than byte by byte using `update`.
*/
pub fn update32<'b, X>(&mut self, data: X)
where X: IntoIterator<Item = &'b u32> {
assert!((self.ptr & 3) == 0);
let mut ptr = self.ptr;
for &v in data {
unsafe {
while self.sha.function_reg_1.read().fifo_in_full().bit() {
atomic::compiler_fence(Ordering::SeqCst)
}
self.sha.data_in.write(|w| w.bits(v));
}
ptr += 4;
}
self.ptr = ptr;
}
/** Finish up SHA256 computation. */