rust: Add function that directly hashes 32-bit units

This is almost two times as fast, apparently, however it requires the input to be aligned to 32 bit and be a multiple of 32 bits. This should be special-cased.
2024-11-22 01:16:20 +04:00 · 2020-04-29 17:14:59 +00:00 · 2020-04-29 17:14:59 +00:00 · 68ddf3b1bc
commit 68ddf3b1bc
parent 9d9f67692b
2 changed files with 48 additions and 7 deletions
--- a/rust/cryptest/src/main.rs
+++ b/rust/cryptest/src/main.rs
@ -551,12 +551,14 @@ fn main() -> ! {
    // (this is shorter than the given test vector as it is the maximum that the SHA256 engine
    // supports, 65536 SHA blocks)
    {
-        let time_start = clock();
        let expected = hex!("929156a9422e05b71655509e8e9e7292d65d540a7342c94df3e121cedd407dfe");
        let s = b"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno";
+        let s_u32 = [0x64636261, 0x68676665, 0x65646362, 0x69686766, 0x66656463, 0x6a696867, 0x67666564, 0x6b6a6968, 0x68676665, 0x6c6b6a69, 0x69686766, 0x6d6c6b6a, 0x6a696867, 0x6e6d6c6b, 0x6b6a6968, 0x6f6e6d6c];
        // let size = 16_777_216 * s.len();
        let size = 65_535 * s.len();
-        write!(stdout, "SHA256 ({} bytes): ", size).unwrap();
+
+        write!(stdout, "SHA256 hw ({} bytes): ", size).unwrap();
+        let time_start = clock();
        let mut sha = SHA256Ctx::new(sha256, size);
        sha.update(s.iter().cycle().take(size));
        let sha_out = sha.finish();
@ -569,6 +571,22 @@ fn main() -> ! {
        write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap();
        writeln!(stdout).unwrap();

+        write!(stdout, "SHA256 hw, 32bit ({} bytes): ", size).unwrap();
+        let time_start = clock();
+        let mut sha = SHA256Ctx::new(sha256, size);
+        sha.update32(s_u32.iter().cycle().take(size / 4));
+        let sha_out = sha.finish();
+        let time_end = clock();
+        if sha_out == expected {
+            write!(stdout, "MATCH").unwrap();
+        } else {
+            write!(stdout, "MISMATCH").unwrap();
+        }
+        write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap();
+        writeln!(stdout).unwrap();
+
+        // Yet another thing to try would be DMA?
+
        // Software
        write!(stdout, "SHA256 sw ({} bytes): ", size).unwrap();
        let time_start = clock();
--- a/rust/k210-shared/src/soc/sha256.rs
+++ b/rust/k210-shared/src/soc/sha256.rs
@ -39,21 +39,44 @@ impl <'a> SHA256Ctx<'a> {
    /** Update SHA256 computation with new data. */
    pub fn update<'b, X>(&mut self, data: X)
        where X: IntoIterator<Item = &'b u8> {
+        let mut block = self.block;
+        let mut ptr = self.ptr;
        for &v in data {
-            let copy_ofs = self.ptr % 4;
-            self.block |= (v as u32) << (copy_ofs * 8);
-            self.ptr += 1;
+            let copy_ofs = ptr % 4;
+            block |= (v as u32) << (copy_ofs * 8);
+            ptr += 1;

            if copy_ofs == 3 {
                unsafe {
                    while self.sha.function_reg_1.read().fifo_in_full().bit() {
                        atomic::compiler_fence(Ordering::SeqCst)
                    }
-                    self.sha.data_in.write(|w| w.bits(self.block));
+                    self.sha.data_in.write(|w| w.bits(block));
                }
-                self.block = 0;
+                block = 0;
            }
        }
+        self.block = block;
+        self.ptr = ptr;
+    }
+
+    /** Update SHA256 computation with new data (32 bit little-endian, must be four-aligned in
+     * the data stream). This is roughly two times faster than byte by byte using `update`.
+     */
+    pub fn update32<'b, X>(&mut self, data: X)
+        where X: IntoIterator<Item = &'b u32> {
+        assert!((self.ptr & 3) == 0);
+        let mut ptr = self.ptr;
+        for &v in data {
+            unsafe {
+                while self.sha.function_reg_1.read().fifo_in_full().bit() {
+                    atomic::compiler_fence(Ordering::SeqCst)
+                }
+                self.sha.data_in.write(|w| w.bits(v));
+            }
+            ptr += 4;
+        }
+        self.ptr = ptr;
    }

    /** Finish up SHA256 computation. */