From e79a8329ea0aca0da932559e8683e4acd784886a Mon Sep 17 00:00:00 2001 From: Andrey Tkachenko Date: Fri, 1 Nov 2024 14:25:58 +0400 Subject: [PATCH] initial --- .gitignore | 1 + Cargo.lock | 1781 ++++++++++++++++++++++++++ Cargo.toml | 7 + examples/align.rs | 1 + src/alignment.rs | 20 + src/alignment/block_matching.rs | 96 ++ src/alignment/gauss_pyramid.rs | 71 + src/alignment/local_search.rs | 114 ++ src/alignment/upsample_alignments.rs | 263 ++++ src/gat.rs | 69 + src/image.rs | 133 ++ src/image/convolution.rs | 61 + src/image/downsample.rs | 65 + src/image/padding.rs | 20 + src/kernels.rs | 385 ++++++ src/lib.rs | 6 + src/merge.rs | 411 ++++++ src/misc.rs | 5 + 18 files changed, 3509 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 examples/align.rs create mode 100644 src/alignment.rs create mode 100644 src/alignment/block_matching.rs create mode 100644 src/alignment/gauss_pyramid.rs create mode 100644 src/alignment/local_search.rs create mode 100644 src/alignment/upsample_alignments.rs create mode 100644 src/gat.rs create mode 100644 src/image.rs create mode 100644 src/image/convolution.rs create mode 100644 src/image/downsample.rs create mode 100644 src/image/padding.rs create mode 100644 src/kernels.rs create mode 100644 src/lib.rs create mode 100644 src/merge.rs create mode 100644 src/misc.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..3e5f62d --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1781 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "ash" +version = "0.38.0+1.3.281" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb44936d800fea8f016d7f2311c6a4f97aebd5dc86f09906139ec848cf3a46f" +dependencies = [ + "libloading", +] + +[[package]] +name = "async-channel" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89b47800b0be77592da0afd425cc03468052844aff33b84e33cc696f64e77b6a" +dependencies = [ + "concurrent-queue", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener", + "event-listener-strategy", + "pin-project-lite", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bit-set" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0481a0e032742109b1133a095184ee93d88f3dc9e0d28a5d033dc77a073f44f" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c54ff287cfc0a34f38a6b832ea1bd8e448a330b3e40a50859e6488bee07f22" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytemuck" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcfcc3cd946cb52f0bbfdbbcfa2f4e24f75ebb6c0e1002f7c25904fada18b9ec" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "com" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e17887fd17353b65b1b2ef1c526c83e26cd72e74f598a8dc1bee13a48f3d9f6" +dependencies = [ + "com_macros", +] + +[[package]] +name = "com_macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d375883580a668c7481ea6631fc1a8863e33cc335bf56bfad8d7e6d4b04b13a5" +dependencies = [ + "com_macros_support", + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "com_macros_support" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad899a1087a9296d5644792d7cb72b8e34c1bec8e7d4fbc002230169a6e8710c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "libc", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "cubecl" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "cubecl-core", + "cubecl-cuda", + "cubecl-hip", + "cubecl-linalg", + "cubecl-runtime", + "cubecl-wgpu", +] + +[[package]] +name = "cubecl-common" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "derive-new", + "embassy-futures", + "futures-lite", + "getrandom", + "log", + "portable-atomic", + "rand", + "serde", + "spin", + "web-time", +] + +[[package]] +name = "cubecl-core" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-macros", + "cubecl-runtime", + "derive-new", + "half", + "log", + "num-traits", + "paste", + "serde", +] + +[[package]] +name = "cubecl-cpp" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-runtime", + "derive-new", + "half", + "log", +] + +[[package]] +name = "cubecl-cuda" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-cpp", + "cubecl-runtime", + "cudarc", + "derive-new", + "half", + "log", +] + +[[package]] +name = "cubecl-hip" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "bytemuck", + "cubecl-common", + "cubecl-core", + "cubecl-cpp", + "cubecl-hip-sys", + "cubecl-runtime", + "derive-new", + "half", + "log", +] + +[[package]] +name = "cubecl-hip-sys" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2553766b483a28dd7db67cc4be9c61a7aa8cc7f02b3b8059ffdaeea1d8c8590e" +dependencies = [ + "libc", +] + +[[package]] +name = "cubecl-linalg" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "bytemuck", + "cubecl-core", + "cubecl-runtime", + "half", +] + +[[package]] +name = "cubecl-macros" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "cubecl-common", + "darling", + "derive-new", + "ident_case", + "prettyplease", + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "cubecl-runtime" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "async-channel", + "async-lock", + "cfg_aliases 0.2.1", + "cubecl-common", + "derive-new", + "dirs", + "hashbrown 0.14.5", + "log", + "md5", + "sanitize-filename", + "serde", + "serde_json", + "spin", + "wasm-bindgen-futures", +] + +[[package]] +name = "cubecl-wgpu" +version = "0.4.0" +source = "git+https://github.com/tracel-ai/cubecl.git#99404b1e29946832a42b72a5c26d4cf42c67692e" +dependencies = [ + "async-channel", + "bytemuck", + "cfg_aliases 0.2.1", + "cubecl-common", + "cubecl-core", + "cubecl-runtime", + "derive-new", + "hashbrown 0.14.5", + "log", + "web-time", + "wgpu", +] + +[[package]] +name = "cudarc" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38cd60a9a42ec83a2ed7effb0b1f073270264ea99da7acfc44f7e8d74dee0384" +dependencies = [ + "libloading", +] + +[[package]] +name = "d3d12" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdbd1f579714e3c809ebd822c81ef148b1ceaeb3d535352afc73fd0c4c6a0017" +dependencies = [ + "bitflags 2.6.0", + "libloading", + "winapi", +] + +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.85", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "derive-new" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + +[[package]] +name = "document-features" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6969eaabd2421f8a2775cfd2471a2b634372b4a25d41e3bd647b79912850a0" +dependencies = [ + "litrs", +] + +[[package]] +name = "embassy-futures" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f878075b9794c1e4ac788c95b728f26aa6366d32eeb10c7051389f898f7d067" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "event-listener" +version = "5.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" +dependencies = [ + "event-listener", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-lite" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f1fa2f9765705486b33fd2acf1577f8ec449c2ba1f318ae5447697b7c08d210" +dependencies = [ + "fastrand", + "futures-core", + "futures-io", + "parking", + "pin-project-lite", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi", + "wasm-bindgen", +] + +[[package]] +name = "gl_generator" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d" +dependencies = [ + "khronos_api", + "log", + "xml-rs", +] + +[[package]] +name = "glow" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1" +dependencies = [ + "js-sys", + "slotmap", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "glutin_wgl_sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4e1951bbd9434a81aa496fe59ccc2235af3820d27b85f9314e279609211e2c" +dependencies = [ + "gl_generator", +] + +[[package]] +name = "gpu-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" +dependencies = [ + "bitflags 2.6.0", + "gpu-alloc-types", +] + +[[package]] +name = "gpu-alloc-types" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "gpu-allocator" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd4240fc91d3433d5e5b0fc5b67672d771850dc19bbee03c1381e19322803d7" +dependencies = [ + "log", + "presser", + "thiserror", + "winapi", + "windows", +] + +[[package]] +name = "gpu-descriptor" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c08c1f623a8d0b722b8b99f821eb0ba672a1618f0d3b16ddbee1cedd2dd8557" +dependencies = [ + "bitflags 2.6.0", + "gpu-descriptor-types", + "hashbrown 0.14.5", +] + +[[package]] +name = "gpu-descriptor-types" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "bytemuck", + "cfg-if", + "crunchy", + "num-traits", + "serde", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" + +[[package]] +name = "hassle-rs" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af2a7e73e1f34c48da31fb668a907f250794837e08faa144fd24f0b8b741e890" +dependencies = [ + "bitflags 2.6.0", + "com", + "libc", + "libloading", + "thiserror", + "widestring", + "winapi", +] + +[[package]] +name = "hexf-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.0", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jni-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "khronos-egl" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76" +dependencies = [ + "libc", + "libloading", + "pkg-config", +] + +[[package]] +name = "khronos_api" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.161" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" + +[[package]] +name = "libloading" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +dependencies = [ + "cfg-if", + "windows-targets 0.52.6", +] + +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.6.0", + "libc", +] + +[[package]] +name = "litrs" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "metal" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21" +dependencies = [ + "bitflags 2.6.0", + "block", + "core-graphics-types", + "foreign-types", + "log", + "objc", + "paste", +] + +[[package]] +name = "naga" +version = "22.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bd5a652b6faf21496f2cfd88fc49989c8db0825d1f6746b1a71a6ede24a63ad" +dependencies = [ + "arrayvec", + "bit-set", + "bitflags 2.6.0", + "cfg_aliases 0.1.1", + "codespan-reporting", + "hexf-parse", + "indexmap", + "log", + "rustc-hash", + "spirv", + "termcolor", + "thiserror", + "unicode-xid", +] + +[[package]] +name = "ndk-sys" +version = "0.5.0+25.2.9519653" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691" +dependencies = [ + "jni-sys", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pin-project-lite" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "portable-atomic" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "presser" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" + +[[package]] +name = "prettyplease" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +dependencies = [ + "proc-macro2", + "syn 2.0.85", +] + +[[package]] +name = "proc-macro2" +version = "1.0.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "profiling" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afbdc74edc00b6f6a218ca6a5364d6226a259d4b8ea1af4a0ea063f27e179f4d" + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "range-alloc" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8a99fddc9f0ba0a85884b8d14e3592853e787d581ca1816c91349b10e4eeab" + +[[package]] +name = "raw-window-handle" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" + +[[package]] +name = "rax" +version = "0.1.0" +dependencies = [ + "cubecl", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "renderdoc-sys" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "sanitize-filename" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603" +dependencies = [ + "lazy_static", + "regex", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "serde" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.214" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "slotmap" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a" +dependencies = [ + "version_check", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", + "portable-atomic", +] + +[[package]] +name = "spirv" +version = "0.3.0+sdk-1.3.268.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.85", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "web-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wgpu" +version = "22.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d1c4ba43f80542cf63a0a6ed3134629ae73e8ab51e4b765a67f3aa062eb433" +dependencies = [ + "arrayvec", + "cfg_aliases 0.1.1", + "document-features", + "js-sys", + "log", + "naga", + "parking_lot", + "profiling", + "raw-window-handle", + "smallvec", + "static_assertions", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "wgpu-core", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-core" +version = "22.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0348c840d1051b8e86c3bcd31206080c5e71e5933dabd79be1ce732b0b2f089a" +dependencies = [ + "arrayvec", + "bit-vec", + "bitflags 2.6.0", + "cfg_aliases 0.1.1", + "document-features", + "indexmap", + "log", + "naga", + "once_cell", + "parking_lot", + "profiling", + "raw-window-handle", + "rustc-hash", + "smallvec", + "thiserror", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-hal" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6bbf4b4de8b2a83c0401d9e5ae0080a2792055f25859a02bf9be97952bbed4f" +dependencies = [ + "android_system_properties", + "arrayvec", + "ash", + "bit-set", + "bitflags 2.6.0", + "block", + "cfg_aliases 0.1.1", + "core-graphics-types", + "d3d12", + "glow", + "glutin_wgl_sys", + "gpu-alloc", + "gpu-allocator", + "gpu-descriptor", + "hassle-rs", + "js-sys", + "khronos-egl", + "libc", + "libloading", + "log", + "metal", + "naga", + "ndk-sys", + "objc", + "once_cell", + "parking_lot", + "profiling", + "range-alloc", + "raw-window-handle", + "renderdoc-sys", + "rustc-hash", + "smallvec", + "thiserror", + "wasm-bindgen", + "web-sys", + "wgpu-types", + "winapi", +] + +[[package]] +name = "wgpu-types" +version = "22.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc9d91f0e2c4b51434dfa6db77846f2793149d8e73f800fa2e41f52b8eac3c5d" +dependencies = [ + "bitflags 2.6.0", + "js-sys", + "web-sys", +] + +[[package]] +name = "widestring" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xml-rs" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af4e2e2f7cba5a093896c1e150fbfe177d1883e7448200efb81d40b9d339ef26" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.85", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..11dcc81 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "rax" +version = "0.1.0" +edition = "2021" + +[dependencies] +cubecl = { git = "https://github.com/tracel-ai/cubecl.git", features = ["wgpu", "cuda"] } diff --git a/examples/align.rs b/examples/align.rs new file mode 100644 index 0000000..f328e4d --- /dev/null +++ b/examples/align.rs @@ -0,0 +1 @@ +fn main() {} diff --git a/src/alignment.rs b/src/alignment.rs new file mode 100644 index 0000000..d48505b --- /dev/null +++ b/src/alignment.rs @@ -0,0 +1,20 @@ +use crate::misc::DistanceKind; + +pub mod block_matching; +pub mod gauss_pyramid; +pub mod local_search; +pub mod upsample_alignments; + +pub struct AlingLevelConfig { + pub factor: u32, + pub upsampling_factor: u32, + pub tile_size: u32, + pub search_radius: u32, + pub distance: DistanceKind, +} + +pub struct AlignmentConfig { + pub levels: Vec, +} + +pub struct Alignments {} diff --git a/src/alignment/block_matching.rs b/src/alignment/block_matching.rs new file mode 100644 index 0000000..2a1a02c --- /dev/null +++ b/src/alignment/block_matching.rs @@ -0,0 +1,96 @@ +use cubecl::{client::ComputeClient, Runtime}; + +use crate::image::{Alignments, GrayImage}; + +use super::{ + gauss_pyramid::build_pyramid, local_search::local_search, + upsample_alignments::upsample_alignments, AlingLevelConfig, +}; + +/// +/// Align the reference image with the img : returns a patchwise flow such that +/// for patches py, px : +/// img[py, px] ~= ref_img[py + alignments[py, px, 1], +/// px + alignments[py, px, 0]] +/// +/// Parameters +/// ---------- +/// ref_img : Img [imshape_y, imshape_x] +/// Image to be compared +/// +/// ref_pyramid : [Img] +/// Pyramid representation of the ref image J_1 +/// +/// Returns +/// ------- +/// alignments : Alignments +/// a device array[n_patchs_y, n_patchs_x, 2] +/// Patchwise flow : V_n(p) for each patch (p) +/// +pub fn align_image_block_matching( + cube: &ComputeClient, + img: &GrayImage, + ref_pyramid: &[GrayImage], + level_configs: &[AlingLevelConfig], +) -> Alignments { + let alt_pyramid = build_pyramid::(cube, img, level_configs.iter().map(|x| x.factor)); + + // Align alternate image to the reference image + let mut alignments = None; + let cfg_iter = level_configs.iter().rev(); + let mut prev_tile_size = 64; + + for (lv, cfg) in cfg_iter.enumerate() { + alignments = Some(align_on_a_level::( + cube, + &ref_pyramid[lv], + &alt_pyramid[lv], + cfg, + alignments.as_ref(), + prev_tile_size, + )); + + prev_tile_size = cfg.tile_size; + } + + alignments.unwrap() +} + +/// +/// Alignment will always be an integer with this function, however it is +/// set to DEFAULT_FLOAT_TYPE. This enables to directly use the outputed +/// alignment for ICA without any casting from int to float +/// +pub(crate) fn align_on_a_level( + cube: &ComputeClient, + ref_pyramid_lvl: &GrayImage, + alt_pyramid_lvl: &GrayImage, + cfg: &AlingLevelConfig, + prev: Option<&Alignments>, + prev_tile_size: u32, +) -> Alignments { + let (ref_h, ref_w) = ref_pyramid_lvl.shape(); + + // Number of patches that can fit on this level + let h = ref_h / cfg.tile_size; + let w = ref_w / cfg.tile_size; + + let mut alignments = Alignments::new_empty::(cube, w, h); + + if let Some(prev) = prev { + // use the upsampled previous alignments as initial guesses + upsample_alignments::( + cube, + &mut alignments, + ref_pyramid_lvl, + alt_pyramid_lvl, + prev, + prev_tile_size, + cfg, + ) + } + + local_search::(cube, &mut alignments, ref_pyramid_lvl, alt_pyramid_lvl, cfg); + + alignments +} diff --git a/src/alignment/gauss_pyramid.rs b/src/alignment/gauss_pyramid.rs new file mode 100644 index 0000000..81e575b --- /dev/null +++ b/src/alignment/gauss_pyramid.rs @@ -0,0 +1,71 @@ +use cubecl::{client::ComputeClient, Runtime}; + +use crate::image::{downsample::downsample, padding::pad, GrayImage}; + +/// +/// Construct N-level coarse-to-fine gaussian pyramid +/// +/// Args: +/// image: input image (expected to be a grayscale image downsampled from a Bayer raw image) +/// factors: [int], dowsampling factors (fine-to-coarse) +/// +pub(crate) fn build_pyramid( + cube: &ComputeClient, + img: &GrayImage, + factors: impl Iterator, +) -> Vec { + // Subsequent pyramid levels are successively created + // with convolution by a kernel followed by downsampling + let init = Vec::with_capacity(factors.size_hint().1.unwrap_or(factors.size_hint().0)); + let mut levels = factors.fold(init, |mut acc, factor| { + acc.push(downsample::(cube, acc.last().unwrap_or(img), factor)); + acc + }); + + // Reverse the pyramid to get it coarse-to-fine + levels.reverse(); + + levels +} + +/// +/// Returns the pyramid representation of `img`, that will be used for +/// future block matching +/// +/// Parameters +/// ---------- +/// img : Img grayscale input image +/// factors : [int] pyramid scaling factors +/// tile_size : int size of the tile +/// +/// Returns +/// ------- +/// pyramid : [Img] +/// pyramid representation of the image +/// +pub fn build_gauss_pyramid( + cube: &ComputeClient, + img: &GrayImage, + factors: impl Iterator, + tile_size: u32, +) -> Vec { + let (h, w) = img.shape(); + + // if needed, pad images with zeros so that getTiles contains all image pixels + let padding_patches_height = tile_size - (h % tile_size); + let padding_patches_width = tile_size - (w % tile_size); + + if padding_patches_width != 0 || padding_patches_height != 0 { + let img = pad::( + cube, + img, + 0, + padding_patches_width, + 0, + padding_patches_height, + ); + build_pyramid::(cube, &img, factors) + } else { + build_pyramid::(cube, img, factors) + } +} diff --git a/src/alignment/local_search.rs b/src/alignment/local_search.rs new file mode 100644 index 0000000..a030161 --- /dev/null +++ b/src/alignment/local_search.rs @@ -0,0 +1,114 @@ +use cubecl::{ + client::ComputeClient, + prelude::{Float, Line, ScalarArg, Tensor, ABSOLUTE_POS_X, ABSOLUTE_POS_Y}, + CubeCount, CubeDim, Runtime, +}; + +use crate::{ + alignment::upsample_alignments::cube_compute_dist, + image::{Alignments, GrayImage}, + misc::DistanceKind, +}; + +use super::AlingLevelConfig; + +pub fn local_search( + cube: &ComputeClient, + upsampled_alignments: &mut Alignments, + ref_pyramid_lvl: &GrayImage, + alt_pyramid_lvl: &GrayImage, + cfg: &AlingLevelConfig, +) { + let threadsperblock = CubeDim::default(); + + let w = upsampled_alignments.shape()[0] as u32; + let h = upsampled_alignments.shape()[1] as u32; + + let blockspergrid_x = (w + threadsperblock.x - 1) / threadsperblock.x; + let blockspergrid_y = (h + threadsperblock.y - 1) / threadsperblock.y; + + unsafe { + cube_local_search::launch_unchecked::( + cube, + CubeCount::Static(blockspergrid_x, blockspergrid_y, 1), + threadsperblock, + ref_pyramid_lvl.as_tensor_arg().as_tensor_arg(1), + alt_pyramid_lvl.as_tensor_arg().as_tensor_arg(1), + upsampled_alignments.as_tensor_arg().as_tensor_arg(1), + ScalarArg::new(cfg.tile_size), + ScalarArg::new(cfg.search_radius as _), + cfg.distance, + ) + } +} + +#[cubecl::cube(launch_unchecked)] +#[allow(clippy::identity_op)] +fn cube_local_search( + ref_pyramid_lvl: &Tensor, + alt_pyramid_lvl: &Tensor, + upsampled_alignments: &mut Tensor, + tile_size: u32, + search_radius: i32, + #[comptime] distance: DistanceKind, +) { + let n_patchs_y = upsampled_alignments.shape(0); + let n_patchs_x = upsampled_alignments.shape(1); + + let tile_x = ABSOLUTE_POS_X; + let tile_y = ABSOLUTE_POS_Y; + + if tile_y >= n_patchs_y || tile_x >= n_patchs_x { + return; + } + + let offset = tile_y * upsampled_alignments.stride(0) + tile_x * upsampled_alignments.stride(1); + + let mut local_flow = Line::empty(2); + local_flow[0] = upsampled_alignments[offset + 0]; + local_flow[1] = upsampled_alignments[offset + 1]; + + // position of the pixel in the top left corner of the patch + let patch_pos_x = tile_x * tile_size; + let patch_pos_y = tile_y * tile_size; + + // this should be rewritten to allow patchs bigger than 32 + let mut local_ref = Line::empty(1024); + for i in 0..tile_size { + for j in 0..tile_size { + let idx = patch_pos_x + j; + let idy = patch_pos_y + i; + + local_ref[i * 32 + j] = ref_pyramid_lvl[idy * ref_pyramid_lvl.stride(0) + idx]; + } + } + + let mut min_dist = f32::new(f32::INFINITY); // init as infty + let mut min_shift_y: i32 = 0; + let mut min_shift_x: i32 = 0; + + // window search + for search_shift_y in -search_radius..=search_radius { + for search_shift_x in -search_radius..=search_radius { + // computing dist + let dist = cube_compute_dist( + &local_ref, + alt_pyramid_lvl, + &local_flow, + patch_pos_x as i32 + search_shift_x, + patch_pos_y as i32 + search_shift_y, + tile_size, + distance, + ); + + if dist < min_dist { + min_dist = dist; + min_shift_y = search_shift_y; + min_shift_x = search_shift_x; + } + } + } + + upsampled_alignments[offset + 0] = local_flow[0] + min_shift_x as f32; + upsampled_alignments[offset + 1] = local_flow[1] + min_shift_y as f32; +} diff --git a/src/alignment/upsample_alignments.rs b/src/alignment/upsample_alignments.rs new file mode 100644 index 0000000..3eee9a6 --- /dev/null +++ b/src/alignment/upsample_alignments.rs @@ -0,0 +1,263 @@ +use cubecl::{ + client::ComputeClient, + prelude::{Abs, Float, Line, Max, Min, ScalarArg, Tensor, ABSOLUTE_POS_X, ABSOLUTE_POS_Y}, + CubeCount, CubeDim, Runtime, +}; + +use crate::{ + image::{Alignments, GrayImage}, + misc::DistanceKind, +}; + +use super::AlingLevelConfig; + +/// +/// Upsample alignements to adapt them to the next pyramid level (Section 3.2 of the IPOL article). +/// +pub(crate) fn upsample_alignments( + cube: &ComputeClient, + dst: &mut Alignments, + ref_pyramid_lvl: &GrayImage, + alt_pyramid_lvl: &GrayImage, + prev_alignments: &Alignments, + prev_tile_size: u32, + cfg: &AlingLevelConfig, +) { + let threadsperblock = CubeDim::default(); + + let n_tiles_y_new = dst.shape()[0] as u32; + let n_tiles_x_new = dst.shape()[1] as u32; + + let blockspergrid_x = (n_tiles_x_new + threadsperblock.x - 1) / threadsperblock.x; + let blockspergrid_y = (n_tiles_y_new + threadsperblock.y - 1) / threadsperblock.y; + + unsafe { + cube_upsample_alignments::launch_unchecked::( + cube, + CubeCount::Static(blockspergrid_x, blockspergrid_y, 1), + threadsperblock, + ref_pyramid_lvl.as_tensor_arg().as_tensor_arg(1), + alt_pyramid_lvl.as_tensor_arg().as_tensor_arg(1), + prev_alignments.as_tensor_arg().as_tensor_arg(1), + dst.as_tensor_arg().as_tensor_arg(1), + ScalarArg::new(cfg.upsampling_factor), + ScalarArg::new(cfg.tile_size), + ScalarArg::new(prev_tile_size), + ) + } +} + +#[allow(clippy::identity_op)] +#[cubecl::cube(launch_unchecked)] +fn cube_upsample_alignments( + ref_pyramid_lvl: &Tensor, + alt_pyramid_lvl: &Tensor, + previous_alignments: &Tensor, + upsampled_alignments: &mut Tensor, + upsampling_factor: u32, + tile_size: u32, + prev_tile_size: u32, +) { + let subtile_x = ABSOLUTE_POS_X; + let subtile_y = ABSOLUTE_POS_Y; + + let n_tiles_y_prev = previous_alignments.shape(0); + let n_tiles_x_prev = previous_alignments.shape(1); + + let n_tiles_y_new = upsampled_alignments.shape(0); + let n_tiles_x_new = upsampled_alignments.shape(1); + + let w = ref_pyramid_lvl.shape(1); + + let repeat_factor = upsampling_factor / (tile_size / prev_tile_size); + + if subtile_x >= n_tiles_x_new || subtile_y >= n_tiles_y_new { + return; + } + + let des_offset = + subtile_y * upsampled_alignments.stride(0) + subtile_x * upsampled_alignments.stride(1); + + // the new subtile is on the side of the image, and is not contained within a bigger old tile + if subtile_x >= repeat_factor * n_tiles_x_prev || subtile_y >= repeat_factor * n_tiles_y_prev { + upsampled_alignments[des_offset + 0] = 0.; + upsampled_alignments[des_offset + 1] = 0.; + return; + } + + let prev_tile_x = subtile_x / repeat_factor; + let prev_tile_y = subtile_y / repeat_factor; + + // position of the top left pixel in the subtile + let subtile_pos_y = subtile_y * tile_size; + let subtile_pos_x = subtile_x * tile_size; + + // copying ref patch into local memory, because it needs to be read 3 times + // this should be rewritten to allow patchs bigger than 32 + let mut local_ref = Line::empty(1024); + for i in 0..tile_size { + for j in 0..tile_size { + let idx = subtile_pos_x + j; + let idy = subtile_pos_y + i; + + local_ref[i * 32 + j] = ref_pyramid_lvl[idy * w + idx]; + } + } + + // position of the new tile within the old tile + let ups_subtile_x = subtile_x % repeat_factor; + let ups_subtile_y = subtile_y % repeat_factor; + + // computing id for the 3 closest patchs + let x_shift = if 2 * ups_subtile_x + 1 > repeat_factor { + 1i32 + } else { + -(1i32) + }; + + let y_shift = if 2 * ups_subtile_y + 1 > repeat_factor { + 1i32 + } else { + -(1i32) + }; + + // Choosing the best of the 3 alignments by minimising L1 dist + let mut dist = 1.0f32 / 0.0f32; + let mut optimal_flow_x = 0.0f32; + let mut optimal_flow_y = 0.0f32; + + // 3 Candidates alignments are fetched (by fetching them as early as possible, we may received + // them from global memory before we even require them, as calculations are performed during this delay) + let candidate_alignment_0_shift = cube_alignment_flow( + previous_alignments, + prev_tile_x, + prev_tile_y, + upsampling_factor as f32, + ); + + let candidate_alignment_vert_shift = cube_alignment_flow( + previous_alignments, + prev_tile_x, + clamp(prev_tile_y as i32 + y_shift, 0, n_tiles_y_prev as i32 - 1) as u32, + upsampling_factor as f32, + ); + + let candidate_alignment_horizontal_shift = cube_alignment_flow( + previous_alignments, + clamp(prev_tile_x as i32 + x_shift, 0, n_tiles_x_prev as i32 - 1) as u32, + prev_tile_y, + upsampling_factor as f32, + ); + + // 0 shift + let dist_ = cube_compute_dist( + &local_ref, + alt_pyramid_lvl, + &candidate_alignment_0_shift, + subtile_pos_x as i32, + subtile_pos_y as i32, + tile_size, + DistanceKind::L1, + ); + + if dist_ < dist { + dist = dist_; + optimal_flow_x = candidate_alignment_0_shift[0]; + optimal_flow_y = candidate_alignment_0_shift[1]; + } + + // vertical shift + let dist_ = cube_compute_dist( + &local_ref, + alt_pyramid_lvl, + &candidate_alignment_vert_shift, + subtile_pos_x as i32, + subtile_pos_y as i32, + tile_size, + DistanceKind::L1, + ); + + if dist_ < dist { + dist = dist_; + optimal_flow_x = candidate_alignment_vert_shift[0]; + optimal_flow_y = candidate_alignment_vert_shift[1]; + } + + // horizontal shift + let dist_ = cube_compute_dist( + &local_ref, + alt_pyramid_lvl, + &candidate_alignment_horizontal_shift, + subtile_pos_x as i32, + subtile_pos_y as i32, + tile_size, + DistanceKind::L1, + ); + + if dist_ < dist { + optimal_flow_x = candidate_alignment_horizontal_shift[0]; + optimal_flow_y = candidate_alignment_horizontal_shift[1]; + } + + // applying best flow + upsampled_alignments[des_offset + 0] = optimal_flow_x; + upsampled_alignments[des_offset + 1] = optimal_flow_y; +} + +#[cubecl::cube] +fn clamp(a: i32, min: i32, max: i32) -> i32 { + // Clamp::clamp(a, min, max) + Min::min(Max::max(a, min), max) +} + +#[cubecl::cube] +#[allow(clippy::identity_op)] +fn cube_alignment_flow( + previous_alignments: &Tensor, + x: u32, + y: u32, + factor: f32, +) -> Line { + let offset = y * previous_alignments.stride(0) + x * previous_alignments.stride(1); + + let mut candidate_alignment: Line = Line::empty(2); + candidate_alignment[0] = previous_alignments[offset + 0] * factor; + candidate_alignment[1] = previous_alignments[offset + 1] * factor; + candidate_alignment +} + +#[cubecl::cube] +pub(crate) fn cube_compute_dist( + local_ref: &Line, + alt_pyramid_lvl: &Tensor, + candidate_alignment: &Line, + pos_x: i32, + pos_y: i32, + tile_size: u32, + #[comptime] distance: DistanceKind, +) -> f32 { + let h = alt_pyramid_lvl.shape(0); + let w = alt_pyramid_lvl.shape(1); + + let mut dist = 0.0f32; + for i in 0..tile_size { + for j in 0..tile_size { + let new_idx = pos_x + j as i32 + candidate_alignment[0] as i32; + let new_idy = pos_y + i as i32 + candidate_alignment[1] as i32; + + if (0 <= new_idx && new_idx < w as i32) && (0 <= new_idy && new_idy < h as i32) { + let alt_offset = new_idy as u32 * w + new_idx as u32; + let diff = local_ref[i * 32 + j] - alt_pyramid_lvl[alt_offset]; + + dist += match distance { + DistanceKind::L1 => Abs::abs(diff), + DistanceKind::L2 => diff * diff, + }; + } else { + dist = f32::new(f32::INFINITY) + } + } + } + + dist +} diff --git a/src/gat.rs b/src/gat.rs new file mode 100644 index 0000000..045f8f8 --- /dev/null +++ b/src/gat.rs @@ -0,0 +1,69 @@ +use cubecl::prelude::{Max, Sqrt, Tensor, ABSOLUTE_POS_X, ABSOLUTE_POS_Y}; + +use crate::{image::GrayImage, kernels::NoiseConfig}; + +/// +/// Generalized Ascombe Transform +/// noise model : stdĀ² = alpha * I + beta +/// Where alpha and beta are iso dependant. +/// +/// Parameters +/// ---------- +/// image : TYPE +/// DESCRIPTION. +/// alpha : float +/// value of alpha for the given iso +/// iso : float +/// ISO value +/// beta : float +/// Value of beta for the given iso +/// +/// Returns +/// ------- +/// VST_image : TYPE +/// input image with stabilized variance +/// +pub fn generalized_ascombe_transform(image: &GrayImage, nc: NoiseConfig) -> GrayImage { + let _ = nc; + let _ = image; + // assert len(image.shape) == 2 + // imshape_y, imshape_x = image.shape + + // VST_image = cuda.device_array(image.shape, DEFAULT_NUMPY_FLOAT_TYPE) + + // threadsperblock = (DEFAULT_THREADS, DEFAULT_THREADS) + // blockspergrid_x = math.ceil(imshape_x/threadsperblock[1]) + // blockspergrid_y = math.ceil(imshape_y/threadsperblock[0]) + // blockspergrid = (blockspergrid_x, blockspergrid_y) + + // cuda_GAT[blockspergrid, threadsperblock](image, VST_image, + // alpha, beta) + + // return VST_image + todo!() +} + +#[cubecl::cube] +fn cuda_gat(image: &Tensor, vst_image: &mut Tensor, alpha: f32, beta: f32) { + let x = ABSOLUTE_POS_X; + let y = ABSOLUTE_POS_Y; + + let imshape_y = image.shape(0); + let imshape_x = image.shape(1); + + if y >= imshape_y || x >= imshape_x { + return; + } + + let offset = y * image.stride(0) + x; + + // ISO should not appear here, since alpha and beta are + // already iso dependant. + + let vst = Max::max( + 0.0, + alpha * image[offset] + 3.0 / 8.0 * alpha * alpha + beta, + ); + + vst_image[offset] = 2.0 / alpha * Sqrt::sqrt(vst); +} diff --git a/src/image.rs b/src/image.rs new file mode 100644 index 0000000..a003b54 --- /dev/null +++ b/src/image.rs @@ -0,0 +1,133 @@ +use std::marker::PhantomData; + +use cubecl::{ + client::ComputeClient, + prelude::{CubePrimitive, TensorHandleRef}, + server::Handle, + Runtime, +}; + +pub mod convolution; +pub mod downsample; +pub mod padding; + +#[derive(Debug)] +pub struct Tensor { + pub(crate) handle: Handle, + pub(crate) shape: [usize; R], + pub(crate) stride: [usize; R], + _m: PhantomData, +} + +impl Tensor { + pub fn new_empty( + cube: &ComputeClient, + width: u32, + height: u32, + ) -> Self { + let _ = height; + let _ = width; + let _ = cube; + todo!() + } + + #[inline] + pub fn shape(&self) -> [usize; R] { + self.shape + } + + #[inline] + pub fn stride(&self) -> [usize; R] { + self.stride + } + + #[inline] + pub fn as_tensor_arg(&self) -> TensorHandleRef<'_, Rt> { + TensorHandleRef { + handle: &self.handle, + strides: &self.stride, + shape: &self.shape, + runtime: PhantomData, + } + } + + #[inline] + pub fn from_slice( + cube: &ComputeClient, + data: &[T], + ) -> Self { + let _ = data; + let _ = cube; + todo!() + } +} + +pub struct ImageBuffer { + data: Tensor, + width: u32, + height: u32, +} + +impl std::ops::Deref for ImageBuffer { + type Target = Tensor; + + fn deref(&self) -> &Self::Target { + &self.data + } +} + +impl std::ops::DerefMut for ImageBuffer { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.data + } +} + +impl ImageBuffer { + pub fn from_tensor(data: Tensor, width: u32, height: u32) -> Self { + assert!(height as usize <= data.shape[0]); + assert!(width as usize <= data.shape[0] * S); + + Self { + data, + width, + height, + } + } + + #[inline] + pub fn shape(&self) -> (u32, u32) { + (self.height, self.width) + } + + #[inline] + pub fn height(&self) -> u32 { + self.height + } + + #[inline] + pub fn width(&self) -> u32 { + self.width + } + + pub fn clone(&self, cube: &ComputeClient) -> Self { + let _ = cube; + todo!() + } +} + +// Bayer Raw Images +pub type Raw16Image = ImageBuffer; +pub type Raw10Image = ImageBuffer; + +// Gray Images +pub type GrayImage = ImageBuffer; +pub type Gray10Image = ImageBuffer; +pub type Gray16Image = ImageBuffer; + +// Rgb Images +pub type RgbImage = ImageBuffer; +pub type Rgba8Image = ImageBuffer; +pub type Rgb10Image = ImageBuffer; + +// Alingments +pub type Alignments = Tensor; diff --git a/src/image/convolution.rs b/src/image/convolution.rs new file mode 100644 index 0000000..5c4c639 --- /dev/null +++ b/src/image/convolution.rs @@ -0,0 +1,61 @@ +use cubecl::{ + client::ComputeClient, + prelude::{Abs, Float, ABSOLUTE_POS_X, ABSOLUTE_POS_Y}, + Runtime, +}; + +use super::Tensor; + +pub fn conv1d( + cube: &ComputeClient, + img: &Tensor, + kern: &Tensor, +) -> Tensor { + let _ = kern; + let _ = img; + let _ = cube; + todo!() +} + +pub fn conv1d_t( + cube: &ComputeClient, + img: &Tensor, + kern: &Tensor, +) -> Tensor { + let _ = kern; + let _ = img; + let _ = cube; + todo!() +} + +#[cubecl::cube(launch_unchecked)] +fn cube_conv1d( + input: &cubecl::prelude::Tensor, + kern: &cubecl::prelude::Tensor, + output: &mut cubecl::prelude::Tensor, + #[comptime] trans: bool, +) { + let kern_half = kern.len() as i32 / 2; + let h = output.shape(0) as i32 - 1; + let w = output.shape(1) as i32 - 1; + + let px = ABSOLUTE_POS_X as i32 - kern_half; + let py = ABSOLUTE_POS_Y as i32 - kern_half; + + for i in 0..kern.len() as i32 { + let ox = if trans { + Abs::abs(w - Abs::abs(px + i - w)) + } else { + px + }; + + let oy = if trans { + Abs::abs(h - Abs::abs(py + i - h)) + } else { + py + }; + + output[oy as u32 * output.stride(0) + ox as u32] = + input[oy as u32 * input.stride(0) + ox as u32] * kern[i as u32]; + } +} diff --git a/src/image/downsample.rs b/src/image/downsample.rs new file mode 100644 index 0000000..4ff1348 --- /dev/null +++ b/src/image/downsample.rs @@ -0,0 +1,65 @@ +use cubecl::{client::ComputeClient, Runtime}; + +use super::{convolution::conv1d, GrayImage, Tensor}; + +/// +/// Apply a convolution by a kernel if required, then downsample an image. +/// Args: +/// image: Device Array the input image (WARNING: single channel only!) +/// factor: downsampling factor +/// +pub fn downsample( + cube: &ComputeClient, + img: &GrayImage, + factor: u32, +) -> GrayImage { + // Special case + if factor == 1 { + return img.clone::(cube); + } + + // gaussian kernel std is proportional to downsampling factor + // filteredImage = gaussian_filter(image, sigma=factor * 0.5, order=0, output=None, mode='reflect') + let gaussian_kernel = create_gaussian_kernel1d(factor as f32 * 0.5, 4 * ((factor + 1) / 2)); + let gaussian_kernel = Tensor::from_slice::(cube, &gaussian_kernel); + + let temp = conv1d::(cube, &img.data, &gaussian_kernel); // convolve y + let filtered_image = conv1d::(cube, &temp, &gaussian_kernel); // convolve x + + // Shape of the downsampled image + let [h2, w2] = filtered_image.shape(); + let (h2, w2) = (h2 as f32 / factor as f32, w2 as f32 / factor as f32); + + let tensor = subsample::(cube, &filtered_image, factor, factor); + + GrayImage::from_tensor(tensor, w2 as _, h2 as _) +} + +/// +/// Computes a 1-D Gaussian convolution kernel. +/// +pub fn create_gaussian_kernel1d(sigma: f32, radius: u32) -> Vec { + let sigma2 = sigma * sigma; + + let mut phi_x = (-(radius as i32)..radius as i32 + 1) + .map(|x| f32::exp(-0.5 / (sigma2 * (x * x) as f32))) + .collect::>(); + + let sum: f32 = phi_x.iter().cloned().sum(); + + phi_x.iter_mut().for_each(|x| *x /= sum); + phi_x +} + +pub fn subsample( + cube: &ComputeClient, + img: &Tensor, + y_step: u32, + x_step: u32, +) -> Tensor { + let _ = x_step; + let _ = y_step; + let _ = img; + let _ = cube; + todo!() +} diff --git a/src/image/padding.rs b/src/image/padding.rs new file mode 100644 index 0000000..c8bf785 --- /dev/null +++ b/src/image/padding.rs @@ -0,0 +1,20 @@ +use cubecl::{client::ComputeClient, Runtime}; + +use super::GrayImage; + +pub fn pad( + cube: &ComputeClient, + img: &GrayImage, + left: u32, + right: u32, + top: u32, + bottom: u32, +) -> GrayImage { + let _ = bottom; + let _ = top; + let _ = right; + let _ = left; + let _ = img; + let _ = cube; + todo!() +} diff --git a/src/kernels.rs b/src/kernels.rs new file mode 100644 index 0000000..00e7f83 --- /dev/null +++ b/src/kernels.rs @@ -0,0 +1,385 @@ +use cubecl::prelude::{Abs, Float, Line, Max, Min, Sqrt, Tensor, ABSOLUTE_POS_X, ABSOLUTE_POS_Y}; + +use crate::{ + gat::generalized_ascombe_transform, + image::{self, GrayImage}, +}; + +pub struct EstimateKernelsConfig { + k_detail: f32, + k_denoise: f32, + d_th: f32, + d_tr: f32, + k_stretch: f32, + k_shrink: f32, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct NoiseConfig { + pub alpha: f32, + pub beta: f32, +} + +/// +/// Implementation of Alg. 5: ComputeKernelCovariance +/// +/// Returns the kernels covariance matrices for the frame J_n, sampled at the +/// center of every bayer quad (or at the center of every grey pixel in grey +/// mode). +/// +/// Parameters +/// ---------- +/// img : device Array[imshape_y, imshape_x] +/// Raw image J_n +/// +/// Returns +/// ------- +/// covs : device Array[imshape_y/2, imshape_x/2, 2, 2] +/// Covariance matrices Omega_n, sampled at the center of each bayer quad. +/// +/// +pub fn estimate_kernels( + img: &GrayImage, + cfg: EstimateKernelsConfig, + noise_cfg: NoiseConfig, +) -> image::Tensor { + // Performing Variance Stabilization Transform + let img = generalized_ascombe_transform(img, noise_cfg); + + // Decimate to grey + // if bayer_mode : + // img_grey = compute_grey_images_(img, method="decimating") + + // else : + // img_grey = img # no need to copy now, they will be copied to gpu later. + + let img_gray = img; + let (grey_imshape_y, grey_imshape_x) = img_gray.shape(); + + // Computing grads + // let th_grey_img = th.as_tensor(img_grey, dtype = DEFAULT_TORCH_FLOAT_TYPE, device = "cuda"); + + // Horizontal filters + // let grad_kernel1 = np.array([[[[-0.5, 0.5]]], [[[0.5, 0.5]]]]); + + // Vertical filters + // let grad_kernel2 = np.array([[[[0.5], [0.5]]], [[[-0.5], [0.5]]]]); + + // let tmp = conv1d(th_grey_img, grad_kernel1); + // let th_full_grad = conv1d(tmp, grad_kernel2, groups = 2); + + // The default padding mode reduces the shape of grey_img of 1 pixel in each + // direction, as expected + + // let cuda_full_grads = + // cuda.as_cuda_array(th_full_grad.squeeze().transpose(0, 1).transpose(1, 2)); + + // shape [y, x, 2] + // let covs = cuda.device_array(grey_imshape + (2, 2), DEFAULT_NUMPY_FLOAT_TYPE); + + // threadsperblock = (DEFAULT_THREADS, DEFAULT_THREADS) + // blockspergrid_x = math.ceil(grey_imshape_x/threadsperblock[1]) + // blockspergrid_y = math.ceil(grey_imshape_y/threadsperblock[0]) + // blockspergrid = (blockspergrid_x, blockspergrid_y) + + // cuda_estimate_kernel[blockspergrid, threadsperblock](cuda_full_grads, + // k_detail, k_denoise, D_th, D_tr, k_stretch, k_shrink, + // covs) + // covs + todo!() +} + +#[allow(clippy::identity_op)] +#[cubecl::cube] +fn cuda_estimate_kernel( + full_grads: &Tensor, + covs: &mut Tensor, + k_detail: f32, + k_denoise: f32, + d_th: f32, + d_tr: f32, + k_stretch: f32, + k_shrink: f32, +) { + let pixel_idx = ABSOLUTE_POS_X; + let pixel_idy = ABSOLUTE_POS_Y; + + let imshape_y = covs.shape(0); + let imshape_x = covs.shape(1); + + if pixel_idy >= imshape_y || pixel_idx >= imshape_x { + return; + } + + let covs_offset = pixel_idy * imshape_x + pixel_idx; + + let mut structure_tensor: Line = Line::empty(4); + structure_tensor[0] = 0.0; + structure_tensor[1] = 0.0; + structure_tensor[2] = 0.0; + structure_tensor[3] = 0.0; + + for i in 0..2 { + for j in 0..2 { + let x = (pixel_idx + j) as i32 - 1; + let y = (pixel_idy + i) as i32 - 1; + + if (0 <= y && y < full_grads.shape(0) as i32) + && (0 <= x && x < full_grads.shape(1) as i32) + { + let full_grad_offset = y as u32 * full_grads.stride(0) + x as u32; + let full_grad_x = full_grads[full_grad_offset + 0]; + let full_grad_y = full_grads[full_grad_offset + 1]; + + structure_tensor[0] += full_grad_x * full_grad_x; + structure_tensor[1] += full_grad_x * full_grad_y; + structure_tensor[2] += full_grad_x * full_grad_y; + structure_tensor[3] += full_grad_y * full_grad_y; + } + } + } + + let mut l = Line::empty(2); + let mut e1 = Line::empty(2); + let mut e2 = Line::empty(2); + let mut k = Line::empty(2); + + get_eigen_elmts_2x2(&structure_tensor, &mut l, &mut e1, &mut e2); + compute_k( + &mut k, l[0], l[1], k_detail, k_denoise, d_th, d_tr, k_stretch, k_shrink, + ); + + let k_1_sq = k[0] * k[0]; + let k_2_sq = k[1] * k[1]; + + covs[covs_offset + 0] = k_1_sq * e1[0] * e1[0] + k_2_sq * e2[0] * e2[0]; + covs[covs_offset + 1] = k_1_sq * e1[0] * e1[1] + k_2_sq * e2[0] * e2[1]; + covs[covs_offset + 2] = k_1_sq * e1[0] * e1[1] + k_2_sq * e2[0] * e2[1]; + covs[covs_offset + 3] = k_1_sq * e1[1] * e1[1] + k_2_sq * e2[1] * e2[1]; +} + +/// +/// Cuda function for resolving the 2x2 system A*X = B +/// by using the analytical formula +/// +/// Parameters +/// ---------- +/// A : Array[2,2] +/// +/// B : Array[2] +/// +/// Returns +/// ------- +/// None +/// +/// + +#[cubecl::cube] +fn solve_2x2(a: &Line, b: &Line, x: &mut Line) { + let det_a = a[0] * a[3] - a[1] * a[2]; + + x[0] = (a[3] * b[0] - a[1] * b[1]) / det_a; + x[1] = (a[0] * b[1] - a[2] * b[0]) / det_a; +} + +/// +/// inverts the 2x2 M array +/// +/// Parameters +/// ---------- +/// M : Array[2, 2] +/// Array to invert +/// M_i : Array[2, 2] +/// +/// Returns +/// ------- +/// None. +/// +/// +#[cubecl::cube] +fn invert_2x2(m: &Line, m_i: &mut Line) { + let det = m[0] * m[3] - m[1] * m[2]; + + if Abs::abs(det) > f32::new(f32::EPSILON) { + let det_i = 1.0 / det; + + m_i[0] = m[3] * det_i; + m_i[1] = -m[1] * det_i; + m_i[2] = -m[2] * det_i; + m_i[3] = m[0] * det_i; + } else { + m_i[0] = 1.0; + m_i[1] = 0.0; + m_i[2] = 0.0; + m_i[3] = 1.0; + } +} + +/// +/// Returns the two roots of the polynom a*X^2 + b*X + c = 0 for a, b and c +/// real numbers. The function only returns real roots : make sure they exist +/// before calling the function. l[0] contains the root with the biggest module +/// and l[1] the smallest +/// +/// +/// Parameters +/// ---------- +/// a : float +/// +/// b : float +/// +/// c : float +/// +/// roots : Array[2] +/// +/// Returns +/// ------- +/// None +/// +#[cubecl::cube] +fn get_real_polyroots_2(a: f32, b: f32, c: f32, roots: &mut Line) { + // numerical instabilities can cause delta to be slightly negative despite + // the equation admitting 2 real roots. + let delta_root = Sqrt::sqrt(Max::max(b * b - 4.0 * a * c, 0.0)); + let r1 = (-b + delta_root) / (2.0 * a); + let r2 = (-b - delta_root) / (2.0 * a); + + let r1_abs: f32 = Abs::abs(r1); + if r1_abs >= Abs::abs(r2) { + roots[0] = r1; + roots[1] = r2; + } else { + roots[0] = r2; + roots[1] = r1; + } +} + +#[cubecl::cube] +fn get_eigen_val_2x2(m: &Line, l: &mut Line) { + let a = 1.0; + let b = -(m[0] + m[3]); + let c = m[0] * m[3] - m[1] * m[2]; + + get_real_polyroots_2(a, b, c, l) +} + +/// +/// return the eigen vectors with norm 1 for the eigen values l +/// M.e1 = l1.e1 ; M.e2 = l2.e2 +/// +/// Parameters +/// ---------- +/// M : Array[2,2] +/// Real Symmetric array for which eigen values are to be determined +/// l : Array[2] +/// e1, e2 : Array[2] +/// sorted Eigenvalues +/// e1, e2 : Array[2, 2] +/// Computed orthogonal and normalized eigen vectors +/// +/// Returns +/// ------- +/// None. +/// +#[cubecl::cube] +fn get_eigen_vect_2x2(m: &Line, l: &Line, e1: &mut Line, e2: &mut Line) { + // 2x2 algorithm : https://en.wikipedia.org/wiki/Eigenvalue_algorithm + if m[1] == 0.0 && m[0] == m[3] { + // m is multiple of identity, picking 2 ortogonal eigen vectors. + e1[0] = 1.0; + e1[1] = 0.0; + e2[0] = 0.0; + e2[1] = 1.0; + } else { + // averaging 2 for increased reliability + e1[0] = m[0] + m[1] - l[1]; + e1[1] = m[2] + m[3] - l[1]; + + if e1[0] == 0.0 { + e1[1] = 1.0; + e2[0] = 1.0; + e2[1] = 0.0; + } else if e1[1] == 0.0 { + e1[0] = 1.0; + e2[0] = 0.0; + e2[1] = 1.0; + } else { + let norm_ = Sqrt::sqrt(e1[0] * e1[0] + e1[1] * e1[1]); + e1[0] /= norm_; + e1[1] /= norm_; + + let sign = copysign(e1[0]); + e2[1] = Abs::abs(e1[0]); + e2[0] = -e1[1] * sign + } + } +} + +#[cubecl::cube] +fn get_eigen_elmts_2x2(m: &Line, l: &mut Line, e1: &mut Line, e2: &mut Line) { + get_eigen_val_2x2(m, l); + get_eigen_vect_2x2(m, l, e1, e2); +} + +/// +/// Computes k_1 and k_2 based on lambda1, lambda2 and the constants. +/// +/// Parameters +/// ---------- +/// l1 : float +/// lambda1 (dominant eigen value) +/// l2 : float +/// lambda2 : second eigenvalue +/// k : Array[2] +/// empty vector where k_1 and k_2 will be stored +/// k_detail : float +/// k_denoise : float +/// D_th : float +/// D_tr : float +/// k_stretch : float +/// k_shrink : float +/// Parameters to compute k_1 and k_2, all detailed in the article. +/// +#[cubecl::cube] +fn compute_k( + k: &mut Line, + l1: f32, + l2: f32, + k_detail: f32, + k_denoise: f32, + d_th: f32, + d_tr: f32, + k_stretch: f32, + k_shrink: f32, +) { + // When A is Nan, we fall back to this condition + let mut k1 = 1.0f32; + let mut k2 = 1.0f32; + + let a: f32 = 1.0 + Sqrt::sqrt((l1 - l2) / (l1 + l2)); + let d: f32 = clamp(1.0 - Sqrt::sqrt(l1) / d_tr + d_th, 0.0, 1.0); + + // This is a very aggressive way of driving anisotropy, but it works well so far. + if a > 1.95 { + k1 = 1.0 / k_shrink; + k2 = k_stretch; + } + + k[0] = k_detail * ((1.0 - d) * k1 + d * k_denoise); + k[1] = k_detail * ((1.0 - d) * k2 + d * k_denoise); +} + +#[cubecl::cube] +fn copysign(val: f32) -> f32 { + if val < 0.0f32 { + -(1.0f32) + } else { + 1.0f32 + } +} + +#[cubecl::cube] +fn clamp(a: f32, min: f32, max: f32) -> f32 { + // Clamp::clamp(a, min, max) + Min::min(Max::max(a, min), max) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c482ee4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +pub mod alignment; +pub mod gat; +pub mod image; +pub mod kernels; +// pub mod merge; +pub mod misc; diff --git a/src/merge.rs b/src/merge.rs new file mode 100644 index 0000000..81eb94e --- /dev/null +++ b/src/merge.rs @@ -0,0 +1,411 @@ +use cubecl::prelude::Tensor; + +use crate::image::{Alignments, GrayImage}; + +/// +/// Implementation of Alg. 11: AccumulationReference +/// Accumulates the reference frame into num and den, while considering +/// (if enabled) the accumulated robustness mask to enforce single frame SR if +/// necessary. +/// +/// Parameters +/// ---------- +/// ref_img : device Array[imshape_y, imshape_x] +/// Reference image J_1 +/// kernels : device Array[imshape_y//2, imshape_x//2, 2, 2] +/// Covariance Matrices Omega_1 +/// num : device Array[s*imshape_y, s*imshape_x, c] +/// Numerator of the accumulator +/// den : device Array[s*imshape_y, s*imshape_x, c] +/// Denominator of the accumulator +/// options : dict +/// verbose options. +/// params : dict +/// parameters (containing the zoom s). +/// acc_rob : [imshape_y, imshape_x], optional +/// accumulated robustness mask. The default is None. +/// +/// Returns +/// ------- +/// None. +/// +pub fn merge_ref() {} + +#[cubecl::cube] +fn accumulate_ref(ref_img: &Tensor, covs: &Tensor, bayer_mode: bool, iso_kernel: bool, scale: f32, cfa_pattern: &Array, + num: &Tensor, den: &Tensor, acc_rob: &Tensor, + robustness_denoise: bool, max_frame_count: u32, rad_max: u32, max_multiplier: u32) { + + let output_pixel_idx, output_pixel_idy = cuda.grid(2); + let output_size_y, output_size_x, _ = num.shape; + let input_size_y, input_size_x = ref_img.shape; + + if output_pixel_idx >= output_size_x || output_pixel_idy >= output_size_y) { + return; + } + + if bayer_mode { + n_channels = 3; + acc = cuda.local.array(3, dtype=DEFAULT_CUDA_FLOAT_TYPE); + val = cuda.local.array(3, dtype=DEFAULT_CUDA_FLOAT_TYPE); + } else { + n_channels = 1; + acc = cuda.local.array(1, dtype=DEFAULT_CUDA_FLOAT_TYPE); + val = cuda.local.array(1, dtype=DEFAULT_CUDA_FLOAT_TYPE); + } + + // Copying CFA locally. We will read that 9 times, so it's worth it + let local_cfa = cuda.local.array((2,2), uint8); + for i in 0..2 { + for j in 0..2 { + local_cfa[i,j] = uint8(cfa_pattern[i,j]); + } + } + + let coarse_ref_sub_pos = cuda.local.array(2, dtype=DEFAULT_CUDA_FLOAT_TYPE); // y, x + coarse_ref_sub_pos[0] = output_pixel_idy / scale; + coarse_ref_sub_pos[1] = output_pixel_idx / scale; + + for chan in 0..n_channels { + acc[chan] = 0.0; + val[chan] = 0.0; + } + + // computing kernel + // this is rather slow and could probably be sped up + if !iso_kernel { + let interpolated_cov = cuda.local.array((2, 2), dtype = DEFAULT_CUDA_FLOAT_TYPE); + let cov_i = cuda.local.array((2, 2), dtype=DEFAULT_CUDA_FLOAT_TYPE); + + + // fetching the 4 closest covs + let close_covs = cuda.local.array((2, 2, 2 ,2), DEFAULT_CUDA_FLOAT_TYPE); + let grey_pos = cuda.local.array(2, DEFAULT_CUDA_FLOAT_TYPE); + if bayer_mode { + grey_pos[0] = (coarse_ref_sub_pos[0]-0.5)/2; // grey grid is offseted and twice more sparse + grey_pos[1] = (coarse_ref_sub_pos[1]-0.5)/2; + } else { + grey_pos[0] = coarse_ref_sub_pos[0]; // grey grid is exactly the coarse grid + grey_pos[1] = coarse_ref_sub_pos[1]; + } + + // clipping the coordinates to stay in bound + let floor_x = int(max(math.floor(grey_pos[1]), 0)); + let floor_y = int(max(math.floor(grey_pos[0]), 0)); + + let ceil_x = min(floor_x + 1, covs.shape[1]-1); + let ceil_y = min(floor_y + 1, covs.shape[0]-1); + for i in 0..2 { + for j in 0..2 { + close_covs[0, 0, i, j] = covs[floor_y, floor_x, i, j]; + close_covs[0, 1, i, j] = covs[floor_y, ceil_x, i, j]; + close_covs[1, 0, i, j] = covs[ceil_y, floor_x, i, j]; + close_covs[1, 1, i, j] = covs[ceil_y, ceil_x, i, j]; + } + } + + // interpolating covs + interpolate_cov(close_covs, grey_pos, interpolated_cov); + invert_2x2(interpolated_cov, cov_i); + } + + // fetching acc robustness if required + // Acc robustness is known for each raw pixel. An implicit interpolation done + // from LR to HR using nearest neighbor. + if robustness_denoise { + let local_acc_r = acc_rob[min(round(coarse_ref_sub_pos[0]), acc_rob.shape[0]-1), + min(round(coarse_ref_sub_pos[1]), acc_rob.shape[1]-1)]; + + additional_denoise_power = denoise_power_merge(local_acc_r, max_multiplier, max_frame_count); + rad = denoise_range_merge(local_acc_r, rad_max, max_frame_count); + } else { + additional_denoise_power = 1; + rad = 1; + } + + let center_x = round(coarse_ref_sub_pos[1]); + let center_y = round(coarse_ref_sub_pos[0]); + for i in -rad..=rad { + for j in -rad..=rad { + let pixel_idx = center_x + j; + let pixel_idy = center_y + i; + + // in bound condition + if (0 <= pixel_idx && pixel_idx < input_size_x) && + (0 <= pixel_idy && pixel_idy < input_size_y) { + + // checking if pixel is r, g or b + let channel if bayer_mode : + local_CFA[pixel_idy%2, pixel_idx%2] + else: + 0 + }; + + // By fetching the value now, we can compute the kernel weight + // while it is called from global memory + let c = ref_img[pixel_idy, pixel_idx]; + + // computing distance + let dist_x = pixel_idx - coarse_ref_sub_pos[1]; + let dist_y = pixel_idy - coarse_ref_sub_pos[0]; + + // Computing w + let mut y = if iso_kernel { + max(0, 2 * (dist_x * dist_x + dist_y * dist_y)) + } else { + max(0, quad_mat_prod(cov_i, dist_x, dist_y)) + }; + + // y can be slightly negative because of numerical precision. + // I clamp it to not explode the error with exp + + // this is equivalent to multiplying the covariance, + // but at the cost of one scalar operation (instead of 4) + y /= additional_denoise_power; + + let w = if bayer_mode { + exp(-0.5 * y) + } else { + exp(-0.5 * 4.0 * y) // original kernel constants are designed for bayer distances, not greys, Hence x4 + }; + // ----- + val[channel] += c * w; + acc[channel] += w; + } + } + } + + if robustness_denoise and local_acc_r < max_frame_count { + // Overwritting values to enforce single frame demosaicing + + for chan in 0..n_channels { + num[output_pixel_idy, output_pixel_idx, chan] = val[chan]; + den[output_pixel_idy, output_pixel_idx, chan] = acc[chan]; + } + } else { + for chan in 0..n_channels { + num[output_pixel_idy, output_pixel_idx, chan] += val[chan]; + den[output_pixel_idy, output_pixel_idx, chan] += acc[chan]; + } + } +} + +pub struct MergeConfig { + pub scale: f32, +} + +/// +/// Implementation of Alg. 4: Accumulation +/// Accumulates comp_img (J_n, n>1) into num and den, based on the alignment +/// V_n, the covariance matrices Omega_n and the robustness mask estimated before. +/// The size of the merge_result is adjustable with params['scale'] +/// +/// +/// Parameters +/// ---------- +/// comp_imgs : device Array [imsize_y, imsize_x] +/// The non-reference image to merge (J_n) +/// alignments : device Array[n_tiles_y, n_tiles_x, 2] +/// The final estimation of the tiles' alignment V_n(p) +/// covs : device array[imsize_y//2, imsize_x//2, 2, 2] +/// covariance matrices Omega_n +/// r : Device_Array[imsize_y, imsize_x] +/// Robustness mask r_n +/// num : device Array[s*imshape_y, s*imshape_x, c] +/// Numerator of the accumulator +/// den : device Array[s*imshape_y, s*imshape_x, c] +/// Denominator of the accumulator +/// +/// options : Dict +/// Options to pass +/// params : Dict +/// parameters +/// +/// Returns +/// ------- +/// None +/// +fn merge(comp_img: &GrayImage, alignments: &Alignments, covs: image::Tensor, r, num, den, cfg: MergeConfig) { + // scale = params['scale'] + + // CFA_pattern = cuda.to_device(params['exif']['CFA Pattern']) + // bayer_mode = params['mode'] == 'bayer' + // iso_kernel = params['kernel'] == 'iso' + // tile_size = params['tuning']['tileSize'] + + // native_im_size = comp_img.shape + + // // casting to integer to account for floating scale + // output_size = (round(scale*native_im_size[0]), round(scale*native_im_size[1])) + + + // dispatching threads. 1 thread for 1 output pixel + // threadsperblock = (DEFAULT_THREADS, DEFAULT_THREADS) # maximum, we may take less + // blockspergrid_x = math.ceil(output_size[1]/threadsperblock[1]) + // blockspergrid_y = math.ceil(output_size[0]/threadsperblock[0]) + // blockspergrid = (blockspergrid_x, blockspergrid_y) + + // accumulate[blockspergrid, threadsperblock]( + // comp_img, alignments, covs, r, + // bayer_mode, iso_kernel, scale, tile_size, CFA_pattern, + // num, den) +} + + +#[cubecl::cube] +fn accumulate(comp_img, alignments, covs, r, + bayer_mode, iso_kernel, scale, tile_size, CFA_pattern, + num, den) { + + let output_pixel_idx, output_pixel_idy = cuda.grid(2); + + let output_size_y, output_size_x, _ = num.shape; + let input_size_y, input_size_x = comp_img.shape; + + if output_pixel_idx >= output_size_x || output_pixel_idy >= output_size_y { + return; + } + + if bayer_mode { + n_channels = 3; + acc = cuda.local.array(3, dtype=DEFAULT_CUDA_FLOAT_TYPE); + val = cuda.local.array(3, dtype=DEFAULT_CUDA_FLOAT_TYPE); + } else { + n_channels = 1; + acc = cuda.local.array(1, dtype=DEFAULT_CUDA_FLOAT_TYPE); + val = cuda.local.array(1, dtype=DEFAULT_CUDA_FLOAT_TYPE); + } + + // Copying CFA locally. We will read that 9 times, so it's worth it + let local_cfa = cuda.local.array((2,2), uint8); + for i in 0..2 { + for j in 0..2 { + local_CFA[i,j] = uint8(cfa_pattern[i,j]); + } + } + + let coarse_ref_sub_pos = cuda.local.array(2, dtype=DEFAULT_CUDA_FLOAT_TYPE); // y, x + + coarse_ref_sub_pos[0] = output_pixel_idy / scale; + coarse_ref_sub_pos[1] = output_pixel_idx / scale; + + // fetch of the flow, as early as possible + let local_optical_flow = cuda.local.array(2, dtype=DEFAULT_CUDA_FLOAT_TYPE); + let patch_idy = int(coarse_ref_sub_pos[0] / tile_size); + let patch_idx = int(coarse_ref_sub_pos[1] /tile_size); + + local_optical_flow[0] = alignments[patch_idy, patch_idx, 0]; + local_optical_flow[1] = alignments[patch_idy, patch_idx, 1]; + + for chan in 0..n_channels { + acc[chan] = 0.0; + val[chan] = 0.0; + } + + let patch_center_pos = cuda.local.array(2, DEFAULT_CUDA_FLOAT_TYPE); // y, x + + + // fetching robustness + // The robustness coefficient is known for every raw pixel, and implicitely + // interpolated to HR using nearest neighboor interpolations. + + let y_r = clamp(round(coarse_ref_sub_pos[0]), 0, r.shape[0]-1); + let x_r = clamp(round(coarse_ref_sub_pos[1]), 0, r.shape[1]-1); + let local_r = r[y_r, x_r]; + + patch_center_pos[1] = coarse_ref_sub_pos[1] + local_optical_flow[0]; + patch_center_pos[0] = coarse_ref_sub_pos[0] + local_optical_flow[1]; + + // updating inbound condition + if patch_center_pos[1] >= input_size_x || patch_center_pos[0] >= input_size_y { + return; + } + + // computing kernel + if !iso_kernel { + let interpolated_cov = cuda.local.array((2, 2), dtype = DEFAULT_CUDA_FLOAT_TYPE); + let cov_i = cuda.local.array((2, 2), dtype=DEFAULT_CUDA_FLOAT_TYPE); + // fetching the 4 closest covs + let close_covs = cuda.local.array((2, 2, 2 ,2), DEFAULT_CUDA_FLOAT_TYPE); + let grey_pos = cuda.local.array(2, DEFAULT_CUDA_FLOAT_TYPE); + + if bayer_mode { + grey_pos[0] = (patch_center_pos[0] - 0.5) /2; // grey grid is offseted and twice more sparse + grey_pos[1] = (patch_center_pos[1] - 0.5) /2; + } else { + grey_pos[0] = patch_center_pos[0]; // grey grid is exactly the coarse grid + grey_pos[1] = patch_center_pos[1]; + } + + // clipping the coordinates to stay in bound + let floor_x = int(max(math.floor(grey_pos[1]), 0)); + let floor_y = int(max(math.floor(grey_pos[0]), 0)); + + let ceil_x = min(floor_x + 1, covs.shape[1]-1); + let ceil_y = min(floor_y + 1, covs.shape[0]-1); + for i in 0..2 { + for j in range(0, 2) { + close_covs[0, 0, i, j] = covs[floor_y, floor_x, i, j]; + close_covs[0, 1, i, j] = covs[floor_y, ceil_x, i, j]; + close_covs[1, 0, i, j] = covs[ceil_y, floor_x, i, j]; + close_covs[1, 1, i, j] = covs[ceil_y, ceil_x, i, j]; + } + } + + // interpolating covs at the desired spot + interpolate_cov(close_covs, grey_pos, interpolated_cov); + invert_2x2(interpolated_cov, cov_i); + } + + let center_x = round(patch_center_pos[1]); + let center_y = round(patch_center_pos[0]); + + for i in -1..=1 { + for j in -1..=1 { + let pixel_idx = center_x + j; + let pixel_idy = center_y + i; + + // in bound condition + if (0 <= pixel_idx && pixel_idx< input_size_x) && + (0 <= pixel_idy && pixel_idy< input_size_y) { + + // checking if pixel is r, g or b + let channel = if bayer_mode { + local_CFA[pixel_idy%2, pixel_idx%2] + } else { + 0 + }; + // By fetching the value now, we can compute the kernel weight + // while it is called from global memory + let c = comp_img[pixel_idy, pixel_idx]; + + // computing distance + let dist_x = pixel_idx - patch_center_pos[1]; + let dist_y = pixel_idy - patch_center_pos[0]; + + // Computing w + let y = if iso_kernel { + max(0, 2 * (dist_x * dist_x + dist_y * dist_y)) + } else { + max(0, quad_mat_prod(cov_i, dist_x, dist_y)) + // y can be slightly negative because of numerical precision. + // I clamp it to not explode the error with exp + }; + + w = math.exp(-0.5*y) + + //--- + + val[channel] += c * w * local_r; + acc[channel] += w * local_r; + } + } + } + + for chan in 0..n_channels { + num[output_pixel_idy, output_pixel_idx, chan] += val[chan]; + den[output_pixel_idy, output_pixel_idx, chan] += acc[chan]; + } +} + + diff --git a/src/misc.rs b/src/misc.rs new file mode 100644 index 0000000..62e6c03 --- /dev/null +++ b/src/misc.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum DistanceKind { + L1, + L2, +}