From 68ddf3b1bc9eaa875496bd3013611df4ca899e21 Mon Sep 17 00:00:00 2001
From: "Wladimir J. van der Laan" <laanwj@protonmail.com>
Date: Wed, 29 Apr 2020 17:14:59 +0000
Subject: [PATCH] rust: Add function that directly hashes 32-bit units

This is almost two times as fast, apparently, however it requires
the input to be aligned to 32 bit and be a multiple of 32 bits.
This should be special-cased.
---
 rust/cryptest/src/main.rs          | 22 ++++++++++++++++++--
 rust/k210-shared/src/soc/sha256.rs | 33 +++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 7 deletions(-)
diff --git a/rust/cryptest/src/main.rs b/rust/cryptest/src/main.rs
index fb4e382..9df6a3e 100644
--- a/rust/cryptest/src/main.rs
+++ b/rust/cryptest/src/main.rs
@@ -551,12 +551,14 @@ fn main() -> ! {
     // (this is shorter than the given test vector as it is the maximum that the SHA256 engine
     // supports, 65536 SHA blocks)
     {
-        let time_start = clock();
         let expected = hex!("929156a9422e05b71655509e8e9e7292d65d540a7342c94df3e121cedd407dfe");
         let s = b"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhijklmno";
+        let s_u32 = [0x64636261, 0x68676665, 0x65646362, 0x69686766, 0x66656463, 0x6a696867, 0x67666564, 0x6b6a6968, 0x68676665, 0x6c6b6a69, 0x69686766, 0x6d6c6b6a, 0x6a696867, 0x6e6d6c6b, 0x6b6a6968, 0x6f6e6d6c];
         // let size = 16_777_216 * s.len();
         let size = 65_535 * s.len();
-        write!(stdout, "SHA256 ({} bytes): ", size).unwrap();
+
+        write!(stdout, "SHA256 hw ({} bytes): ", size).unwrap();
+        let time_start = clock();
         let mut sha = SHA256Ctx::new(sha256, size);
         sha.update(s.iter().cycle().take(size));
         let sha_out = sha.finish();
@@ -569,6 +571,22 @@ fn main() -> ! {
         write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap();
         writeln!(stdout).unwrap();
 
+        write!(stdout, "SHA256 hw, 32bit ({} bytes): ", size).unwrap();
+        let time_start = clock();
+        let mut sha = SHA256Ctx::new(sha256, size);
+        sha.update32(s_u32.iter().cycle().take(size / 4));
+        let sha_out = sha.finish();
+        let time_end = clock();
+        if sha_out == expected {
+            write!(stdout, "MATCH").unwrap();
+        } else {
+            write!(stdout, "MISMATCH").unwrap();
+        }
+        write!(stdout, " ({} kB/s)", (size as u64) * 1_000 / (time_end - time_start)).unwrap();
+        writeln!(stdout).unwrap();
+
+        // Yet another thing to try would be DMA?
+
         // Software
         write!(stdout, "SHA256 sw ({} bytes): ", size).unwrap();
         let time_start = clock();
diff --git a/rust/k210-shared/src/soc/sha256.rs b/rust/k210-shared/src/soc/sha256.rs
index 45f2d30..6f97abf 100644
--- a/rust/k210-shared/src/soc/sha256.rs
+++ b/rust/k210-shared/src/soc/sha256.rs
@@ -39,21 +39,44 @@ impl <'a> SHA256Ctx<'a> {
     /** Update SHA256 computation with new data. */
     pub fn update<'b, X>(&mut self, data: X)
         where X: IntoIterator<Item = &'b u8> {
+        let mut block = self.block;
+        let mut ptr = self.ptr;
         for &v in data {
-            let copy_ofs = self.ptr % 4;
-            self.block |= (v as u32) << (copy_ofs * 8);
-            self.ptr += 1;
+            let copy_ofs = ptr % 4;
+            block |= (v as u32) << (copy_ofs * 8);
+            ptr += 1;
 
             if copy_ofs == 3 {
                 unsafe {
                     while self.sha.function_reg_1.read().fifo_in_full().bit() {
                         atomic::compiler_fence(Ordering::SeqCst)
                     }
-                    self.sha.data_in.write(|w| w.bits(self.block));
+                    self.sha.data_in.write(|w| w.bits(block));
                 }
-                self.block = 0;
+                block = 0;
             }
         }
+        self.block = block;
+        self.ptr = ptr;
+    }
+
+    /** Update SHA256 computation with new data (32 bit little-endian, must be four-aligned in
+     * the data stream). This is roughly two times faster than byte by byte using `update`.
+     */
+    pub fn update32<'b, X>(&mut self, data: X)
+        where X: IntoIterator<Item = &'b u32> {
+        assert!((self.ptr & 3) == 0);
+        let mut ptr = self.ptr;
+        for &v in data {
+            unsafe {
+                while self.sha.function_reg_1.read().fifo_in_full().bit() {
+                    atomic::compiler_fence(Ordering::SeqCst)
+                }
+                self.sha.data_in.write(|w| w.bits(v));
+            }
+            ptr += 4;
+        }
+        self.ptr = ptr;
     }
 
     /** Finish up SHA256 computation. */