This commit is contained in:
Андрей Ткаченко 2024-12-05 17:54:52 +04:00
commit 80a8d585e2
44 changed files with 10650 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

2485
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

18
Cargo.toml Normal file
View File

@ -0,0 +1,18 @@
[package]
name = "cub"
version = "0.1.0"
edition = "2021"
[dependencies]
arrayvec = "0.7.6"
rawloader = { git = "https://github.com/andreytkachenko/rawloader" }
cubecl = { git = "https://github.com/tracel-ai/cubecl.git", features = [
# "wgpu-spirv",
"wgpu",
# "cuda",
] }
smallvec = "1.13.2"
bytemuck = "1.19.0"
image = "0.25.5"
thiserror = "2.0.4"
log = "0.4.22"

28
readme.md Normal file
View File

@ -0,0 +1,28 @@
align_merge_spatial_domain ok
color_difference ok
estimate_color_noise ok
robust_merge ok
align_image ok
avg_pool -
build_pyramid ok
compute_tile_diff -
correct_upsampling_error -
find_best_tile_alignment -
warp_image partial
add_image ok
add_image_highlights ok
add_image_exposure ok
add_image_weighted ok
blur partial
calculate_black_levels -
calculate_weight_highlights -
crop_image -
image_mean -
upsample_alignments -
find_hotpixels -
normalize_image -
prepare_image -
image_like -
calculate_temporal_average -
correct_exposure partial
image_max -

0
src/alignment.rs Normal file
View File

66
src/backend.rs Normal file
View File

@ -0,0 +1,66 @@
use std::ops;
use cubecl::{client::ComputeClient, prelude::CubePrimitive, Runtime};
use crate::tensor::{CpuTensor, GpuTensor, Tensor};
#[derive(Debug, Clone)]
pub struct Backend<R: Runtime> {
client: ComputeClient<R::Server, R::Channel>,
}
impl<Rt: Runtime> Backend<Rt> {
pub fn alloc_tensor<T: CubePrimitive, const R: usize>(
&self,
shape: [usize; R],
) -> GpuTensor<T, R> {
Tensor::create(
self.client
.empty(shape.into_iter().product::<usize>() * size_of::<T>()),
shape,
)
}
pub fn load_tensor<T: bytemuck::NoUninit + CubePrimitive, const R: usize>(
&self,
data: &[T],
shape: [usize; R],
) -> GpuTensor<T, R> {
let size = shape.into_iter().product::<usize>();
assert_eq!(
data.len(),
size,
"load_tensor: Data size mismatch shape size!"
);
Tensor::create(self.client.create(bytemuck::cast_slice(data)), shape)
}
pub fn store_tensor<T: bytemuck::AnyBitPattern + CubePrimitive, const R: usize>(
&self,
tensor: GpuTensor<T, R>,
) -> CpuTensor<T, R> {
let data = self.client.read_one(tensor.data.binding());
let size = data.len() / std::mem::size_of::<T>();
let data = Box::into_raw(data.into_boxed_slice()) as *mut T;
let data = unsafe { Vec::from_raw_parts(data, size, size) };
CpuTensor::create(data, tensor.shape)
}
pub fn new(device: &Rt::Device) -> Self {
Self {
client: Rt::client(device),
}
}
}
impl<R: Runtime> ops::Deref for Backend<R> {
type Target = ComputeClient<R::Server, R::Channel>;
fn deref(&self) -> &Self::Target {
&self.client
}
}

250
src/image.rs Normal file
View File

@ -0,0 +1,250 @@
use std::path::Path;
use crate::{backend::Backend, tensor::GpuTensor};
use cubecl::{prelude::*, CubeCount, CubeDim, Runtime, PLANE_DIM_APPROX};
use image::{GrayImage, RgbImage};
use rawloader::{RawImageData, Tag};
pub type GpuImage<T> = GpuTensor<T, 3>;
impl<T: bytemuck::AnyBitPattern + CubePrimitive> GpuImage<T> {
#[inline]
pub fn height(&self) -> u32 {
self.shape[0] as u32
}
#[inline]
pub fn width(&self) -> u32 {
self.shape[1] as u32
}
pub fn channels(&self) -> u32 {
self.shape[2] as u32
}
}
impl<T: bytemuck::AnyBitPattern + CubePrimitive> GpuImage<T> {
#[inline]
pub fn new<Rt: Runtime>(client: &Backend<Rt>, width: u32, height: u32, channels: u32) -> Self {
let dim = CubeDim::default();
let corr_width = width.next_multiple_of(dim.x);
let corr_height = height.next_multiple_of(dim.y);
client.alloc_tensor([corr_height as usize, corr_width as usize, channels as usize])
}
}
impl GpuImage<f32> {
pub fn to_image<R: Runtime>(self, client: &Backend<R>) -> image::DynamicImage {
let width = self.width();
let height = self.height();
let channels = self.channels();
let data = client.store_tensor(self);
if channels == 1 {
GrayImage::from_par_fn(width, height, move |x, y| {
let x = (data[[y as usize, x as usize, 0]] * 2.0).clamp(0.0, 1.0);
image::Luma([(x * 255.0) as u8])
})
.into()
} else {
RgbImage::from_par_fn(width, height, move |x, y| {
let r = (data[[y as usize, x as usize, 0]] * 4.0).clamp(0.0, 1.0) * 255.0;
let g = (data[[y as usize, x as usize, 1]] * 3.0).clamp(0.0, 1.0) * 255.0;
let b = (data[[y as usize, x as usize, 2]] * 4.0).clamp(0.0, 1.0) * 255.0;
image::Rgb([r as u8, g as u8, b as u8])
})
.into()
}
}
}
#[derive(Debug, Clone)]
pub struct ImageMetadata {
/// ISO value used to capture the image
pub iso_speed: f32,
/// Shutter Speed used to capture the image
pub shutter_speed: f32,
/// Exposure Time used to capture the image
pub exposure_time: f32,
/// Exposure Bias
pub exposure_bias: f32,
/// Aperture value
pub aperture_value: f32,
/// F-number
pub f_number: f32,
/// Focal length
pub focal_length: f32,
/// Colour conversion matrix: xyz to rgb
// pub ccm: Mat4x3f,
/// Whitebalance coefficients encoded in the file in RGBE order
pub wb_coeffs: [f32; 4],
}
pub fn load_raw<R: Runtime, P: AsRef<Path>>(
client: &Backend<R>,
path: P,
) -> Result<GpuImage<f32>, rawloader::RawLoaderError> {
println!("image `{}`:", path.as_ref().display());
let image = rawloader::decode_file(path)?;
println!(" CFA {:?}", image.cfa.name);
println!(" white_levels {:?}", image.whitelevels);
println!(" black_levels {:?}", image.blacklevels);
println!(" size {}x{}", image.width, image.height);
println!(" crop {:?}", image.crops);
println!();
let _meta = if let Some(exif) = image.exif.as_ref() {
let iso_speed = exif
.get_rational(Tag::ISOSpeed)
.or_else(|| exif.get_uint(Tag::ISOSpeed).map(|x| x as f32))
.unwrap_or(0.0);
let shutter_speed = exif.get_rational(Tag::ShutterSpeedValue).unwrap_or(0.0);
let aperture_value = exif.get_rational(Tag::ApertureValue).unwrap_or(0.0);
let exposure_time = exif.get_rational(Tag::ExposureTime).unwrap_or(0.0);
let exposure_bias = exif
.get_rational(Tag::ExposureBiasValue)
.or_else(|| {
exif.get_uint(Tag::ExposureBiasValue)
.map(|x| x as i32 as f32)
})
.unwrap_or(0.0);
let f_number = exif.get_rational(Tag::FNumber).unwrap_or(0.0);
let focal_length = exif
.get_rational(Tag::FocalLength)
.or_else(|| exif.get_uint(Tag::FocalLength).map(|x| x as f32))
.unwrap_or(0.0);
Some(ImageMetadata {
iso_speed,
shutter_speed,
exposure_time,
exposure_bias,
aperture_value,
f_number,
focal_length,
// ccm: image.cam_to_xyz().into(),
wb_coeffs: image.wb_coeffs,
})
} else {
None
};
let image_data = match image.data {
RawImageData::Integer(vec) => vec,
RawImageData::Float(_) => unimplemented!(),
};
let pad_top = image.crops[0];
let pad_right = image.crops[1];
let pad_bottom = image.crops[2];
let pad_left = image.crops[3];
let (pad_top, pad_left) = match (image.cfa.name.as_str(), pad_top & 1 == 0, pad_left & 1 == 0) {
("RGGB", false, false) => (pad_top + 1, pad_left + 1),
("RGGB", false, true) => (pad_top + 1, pad_left),
("RGGB", true, false) => (pad_top, pad_left + 1),
("RGGB", true, true) => (pad_top, pad_left),
("BGGR", false, false) => (pad_top, pad_left),
("BGGR", false, true) => (pad_top, pad_left + 1),
("BGGR", true, false) => (pad_top + 1, pad_left),
("BGGR", true, true) => (pad_top + 1, pad_left + 1),
("GRBG", false, false) => (pad_top + 1, pad_left),
("GRBG", false, true) => (pad_top + 1, pad_left + 1),
("GRBG", true, false) => (pad_top, pad_left),
("GRBG", true, true) => (pad_top, pad_left + 1),
_ => panic!("not imaplemented {}", image.cfa.name),
};
let real_image_width = image.width - (pad_right + pad_left);
let real_image_height = image.height - (pad_top + pad_bottom);
let aligned_width = real_image_width.next_multiple_of(PLANE_DIM_APPROX);
let src_img = client.load_tensor(&image_data, [image.height, image.width, 1]);
let dst_img = GpuImage::new(client, real_image_width as u32, real_image_height as u32, 1);
let mut white_levels = SequenceArg::new();
for wl in image.whitelevels {
white_levels.push(ScalarArg::new(wl as u32));
}
let mut black_levels = SequenceArg::new();
for bl in image.blacklevels {
black_levels.push(ScalarArg::new(bl as u32));
}
#[cube(launch_unchecked)]
fn normalize_image(
input: &Tensor<u32>,
output: &mut Tensor<f32>,
white_levels: &Sequence<u32>,
black_levels: &Sequence<u32>,
pad_left: u32,
pad_top: u32,
) {
let input_x = ABSOLUTE_POS_X + pad_left;
let input_y = ABSOLUTE_POS_Y + pad_top;
let var_x = input_x & 1;
let var_y = input_y & 1;
let pix = input[input_y * input.stride(0) + input_x / 2];
let pix = match var_x {
0 => pix & 0xFFFF,
_ => pix >> 16u32,
};
let bayer_offset = var_y + var_y + var_x;
let wl = match bayer_offset {
0 => *white_levels.index(0u32) as i32,
3 => *white_levels.index(2u32) as i32,
_ => *white_levels.index(1u32) as i32,
};
let bl = match bayer_offset {
0 => *black_levels.index(0u32) as i32,
3 => *black_levels.index(2u32) as i32,
_ => *black_levels.index(1u32) as i32,
};
output[ABSOLUTE_POS] = (pix as i32 - bl) as f32 / (wl - bl) as f32;
}
let dim = CubeDim::default();
unsafe {
normalize_image::launch_unchecked::<R>(
&client,
CubeCount::Static(
aligned_width as u32 / dim.x,
real_image_height as u32 / dim.y,
1,
),
dim,
src_img.as_word_aligned_tensor_ref().as_tensor_arg(1),
dst_img.as_word_aligned_tensor_ref().as_tensor_arg(1),
white_levels,
black_levels,
ScalarArg::new(pad_left as u32),
ScalarArg::new(pad_top as u32),
)
};
Ok(dst_img)
}

56
src/main.rs Normal file
View File

@ -0,0 +1,56 @@
pub mod alignment;
pub mod backend;
pub mod image;
pub mod ops;
pub mod tensor;
use backend::Backend;
use cubecl::{prelude::*, wgpu::WgpuDevice};
use image::load_raw;
use ops::{decimate_bayer::decimate_to_gray, downsample::downsample};
pub fn launch<R: Runtime>(device: &R::Device) {
println!("device: {:?}", device);
let client = Backend::<R>::new(device);
let input_img = load_raw(&client, "test_burst1/im_00.dng").unwrap();
let gray_img = decimate_to_gray(&client, &input_img);
// let img = pad::<R>(
// cube,
// img,
// 0,
// padding_patches_width,
// 0,
// padding_patches_height,
// );
//
//
// let (h, w) = img.shape();
// // if needed, pad images with zeros so that getTiles contains all image pixels
// let padding_patches_height = tile_size - (h % tile_size);
// let padding_patches_width = tile_size - (w % tile_size);
// if padding_patches_width != 0 || padding_patches_height != 0 {
// build_pyramid(cube, &img, factors)
// } else {
// build_pyramid(cube, img, factors)
// }
let small1_img = downsample(&client, &gray_img, 2);
let small2_img = downsample(&client, &small1_img, 2);
let small3_img = downsample(&client, &small2_img, 2);
let gray = small3_img.to_image(&client);
println!("finish");
gray.save("test.png").unwrap();
println!("saved to test.png");
}
fn main() {
let device = WgpuDevice::IntegratedGpu(0);
launch::<cubecl::wgpu::WgpuRuntime>(&device);
// launch::<cubecl::cuda::CudaRuntime>(&Default::default());
}

58
src/ops/avg_pool.rs Normal file
View File

@ -0,0 +1,58 @@
use cubecl::prelude::*;
#[cube(launch_unchecked)]
fn cube_avg_pool(src: &Tensor<f32>, dst: &mut Tensor<f32>, scale: u32, black_level: f32) {
let mut out_pixel = 0.0;
let x0 = ABSOLUTE_POS_X * scale;
let y0 = ABSOLUTE_POS_Y * scale;
for dx in 0..scale {
for dy in 0..scale {
let x = x0 + dx;
let y = y0 + dy;
out_pixel += src[y * src.stride(0) + x * src.stride(1)] - black_level;
}
}
out_pixel /= (scale * scale) as f32;
dst[ABSOLUTE_POS_Y * dst.stride(0) + ABSOLUTE_POS_X * dst.stride(1)] = out_pixel;
}
#[cube(launch_unchecked)]
fn cube_avg_pool_normalization(
src: &Tensor<f32>,
dst: &mut Tensor<f32>,
scale: u32,
black_level: f32,
factor_red: f32,
factor_green: f32,
factor_blue: f32,
) {
let mut out_pixel = 0.0f32;
let x0 = ABSOLUTE_POS_X * scale;
let y0 = ABSOLUTE_POS_Y * scale;
let mut norm_factors: Line<f32> = Line::empty(4);
norm_factors[0] = factor_red;
norm_factors[1] = factor_green;
norm_factors[2] = factor_green;
norm_factors[3] = factor_blue;
let mean_factor =
0.25 * (norm_factors[0] + norm_factors[1] + norm_factors[2] + norm_factors[3]);
for dx in 0..scale {
for dy in 0..scale {
let x = x0 + dx;
let y = y0 + dy;
out_pixel += mean_factor / norm_factors[dy * scale + dx]
* src[y * src.stride(0) + x * src.stride(1)]
- black_level;
}
}
out_pixel /= (scale * scale) as f32;
dst[ABSOLUTE_POS_Y * dst.stride(0) + ABSOLUTE_POS_X * dst.stride(1)] = out_pixel;
}

466
src/ops/block_matching.rs Normal file
View File

@ -0,0 +1,466 @@
use cubecl::{prelude::*, CubeCount, CubeDim, Runtime};
use crate::{backend::Backend, image::GpuImage, tensor::GpuTensor};
use super::gauss_pyramid::build_gauss_pyramid;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum DistanceKind {
L1,
L2,
}
pub struct AlingLevelConfig {
pub factor: u32,
pub upsampling_factor: u32,
pub tile_size: u32,
pub search_radius: u32,
pub distance: DistanceKind,
}
pub struct AlignmentConfig {
pub levels: Vec<AlingLevelConfig>,
}
///
/// Align the reference image with the img : returns a patchwise flow such that
/// for patches py, px :
/// img[py, px] ~= ref_img[py + alignments[py, px, 1],
/// px + alignments[py, px, 0]]
///
/// Parameters
/// ----------
/// ref_img : Img [imshape_y, imshape_x]
/// Image to be compared
///
/// ref_pyramid : [Img]
/// Pyramid representation of the ref image J_1
///
/// Returns
/// -------
/// alignments : Alignments
/// a device array[n_patchs_y, n_patchs_x, 2]
/// Patchwise flow : V_n(p) for each patch (p)
///
pub fn align_image_block_matching<R: Runtime>(
cube: &Backend<R>,
img: &GpuImage<f32>,
ref_pyramid: &[GpuImage<f32>],
level_configs: &[AlingLevelConfig],
) -> GpuTensor<f32, 3> {
let alt_pyramid = build_gauss_pyramid(cube, img, level_configs.iter().map(|x| x.factor));
// Align alternate image to the reference image
let mut alignments = None;
let cfg_iter = level_configs.iter().rev();
let mut prev_tile_size = 64;
for (lv, cfg) in cfg_iter.enumerate() {
alignments = Some(align_on_a_level::<R>(
cube,
&ref_pyramid[lv],
&alt_pyramid[lv],
cfg,
alignments.as_ref(),
prev_tile_size,
));
prev_tile_size = cfg.tile_size;
}
alignments.unwrap()
}
///
/// Alignment will always be an integer with this function, however it is
/// set to DEFAULT_FLOAT_TYPE. This enables to directly use the outputed
/// alignment for ICA without any casting from int to float
///
pub(crate) fn align_on_a_level<R: Runtime>(
cube: &Backend<R>,
ref_pyramid_lvl: &GpuImage<f32>,
alt_pyramid_lvl: &GpuImage<f32>,
cfg: &AlingLevelConfig,
prev: Option<&GpuTensor<f32, 3>>,
prev_tile_size: u32,
) -> GpuTensor<f32, 3> {
let [ref_h, ref_w, _] = ref_pyramid_lvl.shape();
// Number of patches that can fit on this level
let h = ref_h as u32 / cfg.tile_size;
let w = ref_w as u32 / cfg.tile_size;
let mut alignments = cube.alloc_tensor([h as usize, w as usize, 2]);
if let Some(prev) = prev {
// use the upsampled previous alignments as initial guesses
upsample_alignments::<R>(
cube,
&mut alignments,
ref_pyramid_lvl,
alt_pyramid_lvl,
prev,
prev_tile_size,
cfg,
)
}
local_search::<R>(cube, &mut alignments, ref_pyramid_lvl, alt_pyramid_lvl, cfg);
alignments
}
pub fn local_search<R: Runtime>(
cube: &Backend<R>,
upsampled_alignments: &mut GpuTensor<f32, 3>,
ref_pyramid_lvl: &GpuImage<f32>,
alt_pyramid_lvl: &GpuImage<f32>,
cfg: &AlingLevelConfig,
) {
let threadsperblock = CubeDim::default();
let w = upsampled_alignments.shape()[0] as u32;
let h = upsampled_alignments.shape()[1] as u32;
let blockspergrid_x = (w + threadsperblock.x - 1) / threadsperblock.x;
let blockspergrid_y = (h + threadsperblock.y - 1) / threadsperblock.y;
unsafe {
cube_local_search::launch_unchecked::<R>(
cube,
CubeCount::Static(blockspergrid_x, blockspergrid_y, 1),
threadsperblock,
ref_pyramid_lvl.as_tensor_ref().as_tensor_arg(1),
alt_pyramid_lvl.as_tensor_ref().as_tensor_arg(1),
upsampled_alignments
.as_word_aligned_tensor_ref()
.as_tensor_arg(1),
ScalarArg::new(cfg.tile_size),
ScalarArg::new(cfg.search_radius as _),
cfg.distance,
)
}
}
#[cubecl::cube(launch_unchecked)]
#[allow(clippy::identity_op)]
fn cube_local_search(
ref_pyramid_lvl: &Tensor<f32>,
alt_pyramid_lvl: &Tensor<f32>,
upsampled_alignments: &mut Tensor<f32>,
tile_size: u32,
search_radius: i32,
#[comptime] distance: DistanceKind,
) {
let n_patchs_y = upsampled_alignments.shape(0);
let n_patchs_x = upsampled_alignments.shape(1);
let tile_x = ABSOLUTE_POS_X;
let tile_y = ABSOLUTE_POS_Y;
if tile_y >= n_patchs_y || tile_x >= n_patchs_x {
return;
}
let offset = tile_y * upsampled_alignments.stride(0) + tile_x * upsampled_alignments.stride(1);
let mut local_flow = Line::empty(2);
local_flow[0] = upsampled_alignments[offset + 0];
local_flow[1] = upsampled_alignments[offset + 1];
// position of the pixel in the top left corner of the patch
let patch_pos_x = tile_x * tile_size;
let patch_pos_y = tile_y * tile_size;
// this should be rewritten to allow patchs bigger than 32
let mut local_ref = Array::new(tile_size * tile_size);
for i in 0..tile_size {
for j in 0..tile_size {
let idx = patch_pos_x + j;
let idy = patch_pos_y + i;
local_ref[i * tile_size + j] = ref_pyramid_lvl[idy * ref_pyramid_lvl.stride(0) + idx];
}
}
let mut min_dist = f32::new(f32::INFINITY); // init as infty
let mut min_shift_y: i32 = 0;
let mut min_shift_x: i32 = 0;
// window search
for search_shift_y in -search_radius..=search_radius {
for search_shift_x in -search_radius..=search_radius {
// computing dist
let dist = cube_compute_dist(
&local_ref,
alt_pyramid_lvl,
&local_flow,
patch_pos_x as i32 + search_shift_x,
patch_pos_y as i32 + search_shift_y,
tile_size,
distance,
);
if dist < min_dist {
min_dist = dist;
min_shift_y = search_shift_y;
min_shift_x = search_shift_x;
}
}
}
upsampled_alignments[offset + 0] = local_flow[0] + min_shift_x as f32;
upsampled_alignments[offset + 1] = local_flow[1] + min_shift_y as f32;
}
///
/// Upsample alignements to adapt them to the next pyramid level (Section 3.2 of the IPOL article).
///
pub(crate) fn upsample_alignments<R: Runtime>(
cube: &Backend<R>,
dst: &mut GpuTensor<f32, 3>,
ref_pyramid_lvl: &GpuImage<f32>,
alt_pyramid_lvl: &GpuImage<f32>,
prev_alignments: &GpuTensor<f32, 3>,
prev_tile_size: u32,
cfg: &AlingLevelConfig,
) {
let threadsperblock = CubeDim::default();
let n_tiles_y_new = dst.shape()[0] as u32;
let n_tiles_x_new = dst.shape()[1] as u32;
let blockspergrid_x = (n_tiles_x_new + threadsperblock.x - 1) / threadsperblock.x;
let blockspergrid_y = (n_tiles_y_new + threadsperblock.y - 1) / threadsperblock.y;
unsafe {
cube_upsample_alignments::launch_unchecked::<R>(
cube,
CubeCount::Static(blockspergrid_x, blockspergrid_y, 1),
threadsperblock,
ref_pyramid_lvl.as_tensor_ref().as_tensor_arg(1),
alt_pyramid_lvl.as_tensor_ref().as_tensor_arg(1),
prev_alignments
.as_word_aligned_tensor_ref()
.as_tensor_arg(1),
dst.as_word_aligned_tensor_ref().as_tensor_arg(1),
ScalarArg::new(cfg.upsampling_factor),
ScalarArg::new(cfg.tile_size),
ScalarArg::new(prev_tile_size),
)
}
}
#[allow(clippy::identity_op)]
#[cubecl::cube(launch_unchecked)]
fn cube_upsample_alignments(
ref_pyramid_lvl: &Tensor<f32>,
alt_pyramid_lvl: &Tensor<f32>,
previous_alignments: &Tensor<f32>,
upsampled_alignments: &mut Tensor<f32>,
upsampling_factor: u32,
tile_size: u32,
prev_tile_size: u32,
) {
let subtile_x = ABSOLUTE_POS_X;
let subtile_y = ABSOLUTE_POS_Y;
let n_tiles_y_prev = previous_alignments.shape(0);
let n_tiles_x_prev = previous_alignments.shape(1);
let n_tiles_y_new = upsampled_alignments.shape(0);
let n_tiles_x_new = upsampled_alignments.shape(1);
let w = ref_pyramid_lvl.shape(1);
let repeat_factor = upsampling_factor / (tile_size / prev_tile_size);
if subtile_x >= n_tiles_x_new || subtile_y >= n_tiles_y_new {
return;
}
let des_offset =
subtile_y * upsampled_alignments.stride(0) + subtile_x * upsampled_alignments.stride(1);
// the new subtile is on the side of the image, and is not contained within a bigger old tile
if subtile_x >= repeat_factor * n_tiles_x_prev || subtile_y >= repeat_factor * n_tiles_y_prev {
upsampled_alignments[des_offset + 0] = 0.;
upsampled_alignments[des_offset + 1] = 0.;
return;
}
let prev_tile_x = subtile_x / repeat_factor;
let prev_tile_y = subtile_y / repeat_factor;
// position of the top left pixel in the subtile
let subtile_pos_y = subtile_y * tile_size;
let subtile_pos_x = subtile_x * tile_size;
// copying ref patch into local memory, because it needs to be read 3 times
let mut local_ref = Array::new(tile_size * tile_size);
for i in 0..tile_size {
for j in 0..tile_size {
let idx = subtile_pos_x + j;
let idy = subtile_pos_y + i;
local_ref[i * tile_size + j] = ref_pyramid_lvl[idy * w + idx];
}
}
// position of the new tile within the old tile
let ups_subtile_x = subtile_x % repeat_factor;
let ups_subtile_y = subtile_y % repeat_factor;
// computing id for the 3 closest patchs
let x_shift = if 2 * ups_subtile_x + 1 > repeat_factor {
1i32
} else {
-(1i32)
};
let y_shift = if 2 * ups_subtile_y + 1 > repeat_factor {
1i32
} else {
-(1i32)
};
// Choosing the best of the 3 alignments by minimising L1 dist
let mut dist = f32::new(1.0f32) / 0.0;
let mut optimal_flow_x = 0.0;
let mut optimal_flow_y = 0.0;
// 3 Candidates alignments are fetched (by fetching them as early as possible, we may received
// them from global memory before we even require them, as calculations are performed during this delay)
let candidate_alignment_0_shift = cube_alignment_flow(
previous_alignments,
prev_tile_x,
prev_tile_y,
upsampling_factor as f32,
);
let candidate_alignment_vert_shift = cube_alignment_flow(
previous_alignments,
prev_tile_x,
clamp(prev_tile_y as i32 + y_shift, 0, n_tiles_y_prev as i32 - 1) as u32,
upsampling_factor as f32,
);
let candidate_alignment_horizontal_shift = cube_alignment_flow(
previous_alignments,
clamp(prev_tile_x as i32 + x_shift, 0, n_tiles_x_prev as i32 - 1) as u32,
prev_tile_y,
upsampling_factor as f32,
);
// 0 shift
let dist_ = cube_compute_dist(
&local_ref,
alt_pyramid_lvl,
&candidate_alignment_0_shift,
subtile_pos_x as i32,
subtile_pos_y as i32,
tile_size,
DistanceKind::L1,
);
if dist_ < dist {
dist = dist_;
optimal_flow_x = candidate_alignment_0_shift[0];
optimal_flow_y = candidate_alignment_0_shift[1];
}
// vertical shift
let dist_ = cube_compute_dist(
&local_ref,
alt_pyramid_lvl,
&candidate_alignment_vert_shift,
subtile_pos_x as i32,
subtile_pos_y as i32,
tile_size,
DistanceKind::L1,
);
if dist_ < dist {
dist = dist_;
optimal_flow_x = candidate_alignment_vert_shift[0];
optimal_flow_y = candidate_alignment_vert_shift[1];
}
// horizontal shift
let dist_ = cube_compute_dist(
&local_ref,
alt_pyramid_lvl,
&candidate_alignment_horizontal_shift,
subtile_pos_x as i32,
subtile_pos_y as i32,
tile_size,
DistanceKind::L1,
);
if dist_ < dist {
optimal_flow_x = candidate_alignment_horizontal_shift[0];
optimal_flow_y = candidate_alignment_horizontal_shift[1];
}
// applying best flow
upsampled_alignments[des_offset + 0] = optimal_flow_x;
upsampled_alignments[des_offset + 1] = optimal_flow_y;
}
#[cubecl::cube]
fn clamp(a: i32, min: i32, max: i32) -> i32 {
// Clamp::clamp(a, min, max)
Min::min(Max::max(a, min), max)
}
#[cubecl::cube]
#[allow(clippy::identity_op)]
fn cube_alignment_flow(
previous_alignments: &Tensor<f32>,
x: u32,
y: u32,
factor: f32,
) -> Line<f32> {
let offset = y * previous_alignments.stride(0) + x * previous_alignments.stride(1);
let mut candidate_alignment: Line<f32> = Line::empty(2);
candidate_alignment[0] = previous_alignments[offset + 0] * factor;
candidate_alignment[1] = previous_alignments[offset + 1] * factor;
candidate_alignment
}
#[cubecl::cube]
pub(crate) fn cube_compute_dist(
local_ref: &Array<f32>,
alt_pyramid_lvl: &Tensor<f32>,
candidate_alignment: &Line<f32>,
pos_x: i32,
pos_y: i32,
tile_size: u32,
#[comptime] distance: DistanceKind,
) -> f32 {
let h = alt_pyramid_lvl.shape(0);
let w = alt_pyramid_lvl.shape(1);
let mut dist = 0.0f32;
for i in 0..tile_size {
for j in 0..tile_size {
let new_idx = pos_x + j as i32 + candidate_alignment[0] as i32;
let new_idy = pos_y + i as i32 + candidate_alignment[1] as i32;
if (0 <= new_idx && new_idx < w as i32) && (0 <= new_idy && new_idy < h as i32) {
let alt_offset = new_idy as u32 * w + new_idx as u32;
let diff = local_ref[i * 32 + j] - alt_pyramid_lvl[alt_offset];
dist += match distance {
DistanceKind::L1 => Abs::abs(diff),
DistanceKind::L2 => diff * diff,
};
} else {
dist = f32::new(f32::INFINITY)
}
}
}
dist
}

68
src/ops/decimate_bayer.rs Normal file
View File

@ -0,0 +1,68 @@
use cubecl::{prelude::*, Runtime};
use crate::{backend::Backend, image::GpuImage};
pub fn decimate_to_rgb<R: Runtime>(img: &GpuImage<f32>, client: &Backend<R>) -> GpuImage<f32> {
let mut output_img = GpuImage::new(client, img.width() / 2, img.height() / 2, 3);
unsafe {
decimate::launch_unchecked::<R>(
&client,
output_img.cube_count(1, 1),
CubeDim::default(),
img.as_tensor_ref().as_tensor_arg(1),
output_img.as_tensor_mut().as_tensor_arg(1),
true,
)
};
output_img
}
#[inline]
pub fn decimate_to_gray<R: Runtime>(client: &Backend<R>, img: &GpuImage<f32>) -> GpuImage<f32> {
let mut dst = GpuImage::new(client, img.width() / 2, img.height() / 2, 1);
decimate_to_gray_into(client, &mut dst, img);
dst
}
pub fn decimate_to_gray_into<R: Runtime>(
client: &Backend<R>,
dst: &mut GpuImage<f32>,
img: &GpuImage<f32>,
) {
unsafe {
decimate::launch_unchecked::<R>(
&client,
dst.cube_count(1, 1),
CubeDim::default(),
img.as_tensor_ref().as_tensor_arg(1),
dst.as_tensor_mut().as_tensor_arg(1),
false,
)
};
}
#[cube(launch_unchecked)]
fn decimate(input: &Tensor<f32>, output: &mut Tensor<f32>, #[comptime] rgb: bool) {
let in_offset_0 =
(ABSOLUTE_POS_Y * 2) * input.stride(0) + (ABSOLUTE_POS_X * 2) * input.stride(1);
let in_offset_1 =
((ABSOLUTE_POS_Y * 2) + 1) * input.stride(0) + (ABSOLUTE_POS_X * 2) * input.stride(1);
let r0 = input[in_offset_0 + 0];
let g1 = input[in_offset_0 + 1];
let g2 = input[in_offset_1 + 0];
let b0 = input[in_offset_1 + 1];
let g0 = (g1 + g2) * 0.5;
let offset = ABSOLUTE_POS_Y * output.stride(0) + ABSOLUTE_POS_X * output.stride(1);
if rgb {
output[offset + 0] = r0;
output[offset + 1] = g0;
output[offset + 2] = b0;
} else {
output[offset] = (r0 * 0.299 + g0 * 0.587 + b0 * 0.114) * 2.0;
}
}

207
src/ops/demosaic.rs Normal file
View File

@ -0,0 +1,207 @@
use cubecl::prelude::*;
use crate::{backend::Backend, image::GpuImage};
pub fn demosaic_rgb<R: Runtime>(client: &Backend<R>, img: &GpuImage<f32>) -> GpuImage<f32> {
let mut output_img = GpuImage::new(client, img.width(), img.height(), 3);
unsafe {
demosaic::launch_unchecked::<R>(
&client,
output_img.cube_count(1, 1),
CubeDim::default(),
img.as_tensor_ref().as_tensor_arg(1),
output_img.as_tensor_mut().as_tensor_arg(1),
true,
)
};
output_img
}
pub fn demosaic_gray<R: Runtime>(client: &Backend<R>, img: &GpuImage<f32>) -> GpuImage<f32> {
let mut output_img = GpuImage::new(client, img.width(), img.height(), 1);
unsafe {
demosaic::launch_unchecked::<R>(
&client,
output_img.cube_count(1, 1),
CubeDim::default(),
img.as_tensor_ref().as_tensor_arg(1),
output_img.as_tensor_mut().as_tensor_arg(1),
false,
)
};
output_img
}
///
/// Malvar-He-Cutler Linear Image Demosaicking
/// - https://www.ipol.im/pub/art/2011/g_mhcd/revisions/2011-08-14/article.pdf
/// - https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/Demosaicing_ICASSP04.pdf
#[cube(launch_unchecked)]
fn demosaic(input: &Tensor<f32>, output: &mut Tensor<f32>, #[comptime] write_rgb: bool) {
if ABSOLUTE_POS_Y > 0
&& ABSOLUTE_POS_Y < input.shape(0) - 1
&& ABSOLUTE_POS_X > 0
&& ABSOLUTE_POS_X < input.shape(1) - 1
{
let var_x = ABSOLUTE_POS_X & 1;
let var_y = ABSOLUTE_POS_Y & 1;
let case = var_y + var_y + var_x;
let in_offset_top2 = (ABSOLUTE_POS_Y - 2) * input.stride(0) + ABSOLUTE_POS_X;
let in_offset_top = (ABSOLUTE_POS_Y - 1) * input.stride(0) + ABSOLUTE_POS_X;
let in_offset_mid = ABSOLUTE_POS_Y * input.stride(0) + ABSOLUTE_POS_X;
let in_offset_bot = (ABSOLUTE_POS_Y + 1) * input.stride(0) + ABSOLUTE_POS_X;
let in_offset_bot2 = (ABSOLUTE_POS_Y + 2) * input.stride(0) + ABSOLUTE_POS_X;
// t2c0
// t1l1 t1c0 t1r1
// m0l2 m0l1 m0c0 m0r1 m0r2
// t1l1 b1c0 b1r1
// b2c0
let t2c0 = input[in_offset_top2];
let t1c0 = input[in_offset_top];
let m0c0 = input[in_offset_mid];
let b1c0 = input[in_offset_bot];
let b2c0 = input[in_offset_bot2];
let m0l2 = input[in_offset_mid - 2];
let m0l1 = input[in_offset_mid - 1];
let m0r1 = input[in_offset_mid + 1];
let m0r2 = input[in_offset_mid + 2];
let t1l1 = input[in_offset_top - 1];
let t1r1 = input[in_offset_top + 1];
let b1l1 = input[in_offset_bot - 1];
let b1r1 = input[in_offset_bot + 1];
let mut r = 0.0f32;
let mut g = 0.0f32;
let mut b = 0.0f32;
match case {
// red
0 => {
r = m0c0;
// -1
// 2
// -1 2 4 2 -1
// 2
// -1
g = (-(t2c0 + b2c0 + m0l2 + m0r2)
+ 2.0 * (t1c0 + b1c0 + m0l1 + m0r1) //-
+ 4.0 * m0c0)
/ 8.0;
// -3/2
// 2 2
// -3/2 6 -3/2
// 2 2
// -3/2
b = (-1.5 * (t2c0 + b2c0 + m0l2 + m0r2)
+ 2.0 * (t1l1 + t1r1 + b1l1 + b1r1)
+ 6.0 * m0c0)
/ 8.0;
}
// green1
1 => {
// 1/2
// -1 -1
// -1 4 5 4 -1
// -1 -1
// 1/2
r = (-(m0l2 + m0r2 + t1l1 + t1r1 + b1l1 + b1r1)
+ 0.5 * (t2c0 + b2c0)
+ 4.0 * (m0l1 + m0r1)
+ 5.0 * m0c0)
/ 8.0;
g = m0c0;
// -1
// -1 4 -1
// 1/2 5 1/2
// -1 4 -1
// -1
b = (-(t2c0 + b2c0 + t1l1 + t1r1 + b1l1 + b1r1)
+ 0.5 * (m0l2 + m0r2)
+ 4.0 * (t1c0 + b1c0)
+ 5.0 * m0c0)
/ 8.0;
}
// green2
2 => {
// -1
// -1 4 -1
// 1/2 5 1/2
// -1 4 -1
// -1
r = (-(t2c0 + b2c0 + t1l1 + t1r1 + b1l1 + b1r1)
+ 0.5 * (m0l2 + m0r2)
+ 4.0 * (t1c0 + b1c0)
+ 5.0 * m0c0)
/ 8.0;
g = m0c0;
// 1/2
// -1 -1
// -1 4 5 4 -1
// -1 -1
// 1/2
b = (-(m0l2 + m0r2 + t1l1 + t1r1 + b1l1 + b1r1)
+ 0.5 * (t2c0 + b2c0)
+ 4.0 * (m0l1 + m0r1)
+ 5.0 * m0c0)
/ 8.0;
}
// blue
3 => {
// -3/2
// 2 2
// -3/2 6 -3/2
// 2 2
// -3/2
r = (-1.5 * (t2c0 + b2c0 + m0l2 + m0r2)
+ 2.0 * (t1l1 + t1r1 + b1l1 + b1r1)
+ 6.0 * m0c0)
/ 8.0;
// -1
// 2
// -1 2 4 2 -1
// 2
// -1
g = (-(t2c0 + b2c0 + m0l2 + m0r2)
+ 2.0 * (t1c0 + b1c0 + m0l1 + m0r1) //-
+ 4.0 * m0c0)
/ 8.0;
b = m0c0;
}
_ => {}
}
let out_offset = ABSOLUTE_POS_Y * output.stride(0) + ABSOLUTE_POS_X * output.stride(1);
if write_rgb {
output[out_offset] = (r * 0.299 + g * 0.587 + b * 0.114) * 2.0;
} else {
output[out_offset + 0] = r;
output[out_offset + 1] = g;
output[out_offset + 2] = b;
}
}
}

120
src/ops/downsample.rs Normal file
View File

@ -0,0 +1,120 @@
use cubecl::{prelude::*, CubeDim, Runtime};
use crate::{backend::Backend, image::GpuImage};
fn create_gaussian_kernel1d(sigma: f32, radius: u32) -> Vec<f32> {
let sigma2 = sigma * sigma;
let mut phi_x = (-(radius as i32)..=radius as i32)
.map(|x| f32::exp(-0.5 / sigma2 * (x * x) as f32))
.collect::<Vec<_>>();
let sum: f32 = phi_x.iter().cloned().sum();
phi_x.iter_mut().for_each(|x| *x /= sum);
phi_x
}
///
/// Apply a convolution by a kernel if required, then downsample an image.
/// Args:
/// client: ComputeClient
/// image: Device Array the input image (WARNING: single channel only!)
/// factor: downsampling factor
pub fn downsample<R: Runtime>(
client: &Backend<R>,
img: &GpuImage<f32>,
factor: u32,
) -> GpuImage<f32> {
let kern_host = create_gaussian_kernel1d(
factor as f32 * 0.5,
(4.0 * factor as f32 * 0.5 + 0.5) as u32,
);
let kern = client.load_tensor(&kern_host, [kern_host.len()]);
let mut middle_img: GpuImage<f32> = GpuImage::new(client, img.width(), img.height(), 1);
unsafe {
conv1d::launch_unchecked::<R>(
&client,
middle_img.cube_count(1, 1),
CubeDim::default(),
img.as_tensor_ref().as_tensor_arg(1),
kern.as_array_arg(1),
middle_img.as_tensor_mut().as_tensor_arg(1),
false,
)
};
unsafe {
conv1d::launch_unchecked::<R>(
&client,
middle_img.cube_count(1, 1),
CubeDim::default(),
middle_img.as_tensor_ref().as_tensor_arg(1),
kern.as_array_arg(1),
middle_img.as_tensor_ref().as_tensor_arg(1),
true,
)
};
let mut output_img = GpuImage::new(client, img.width() / factor, img.height() / factor, 1);
unsafe {
subsample::launch_unchecked::<R>(
&client,
middle_img.cube_count(1, 1),
CubeDim::default(),
middle_img.as_tensor_ref().as_tensor_arg(1),
output_img.as_tensor_mut().as_tensor_arg(1),
ScalarArg::new(factor),
ScalarArg::new(factor),
)
};
output_img
}
#[cube(launch_unchecked)]
pub fn conv1d(
input: &Tensor<f32>,
kern: &Array<f32>,
output: &mut Tensor<f32>,
#[comptime] transpose: bool,
) {
let kern_half = kern.len() as i32 / 2;
let h = output.shape(0) as i32 - 1;
let w = output.shape(1) as i32 - 1;
let px = ABSOLUTE_POS_X as i32 - kern_half;
let py = ABSOLUTE_POS_Y as i32 - kern_half;
let mut val = 0.0f32;
for i in 0..kern.len() as u32 {
let ox = if transpose {
ABSOLUTE_POS_X
} else {
i32::abs(w - i32::abs(px + i as i32 - w)) as u32
};
let oy = if transpose {
i32::abs(h - i32::abs(py + i as i32 - h)) as u32
} else {
ABSOLUTE_POS_Y
};
val += input[oy * input.stride(0) + ox] * kern[i];
}
output[ABSOLUTE_POS_Y * output.stride(0) + ABSOLUTE_POS_X] = val;
}
#[cube(launch_unchecked)]
pub fn subsample(input: &Tensor<f32>, output: &mut Tensor<f32>, step_x: u32, step_y: u32) {
let in_x = ABSOLUTE_POS_X * step_x;
let in_y = ABSOLUTE_POS_Y * step_y;
output[ABSOLUTE_POS_Y * output.stride(0) + ABSOLUTE_POS_X] =
input[in_y * input.stride(0) + in_x];
}

442
src/ops/estimate_kernels.rs Normal file
View File

@ -0,0 +1,442 @@
use cubecl::prelude::*;
use crate::{backend::Backend, image::GpuImage, tensor::GpuTensor};
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct EstimateKernelsConfig {
pub k_detail: f32,
pub k_denoise: f32,
pub d_th: f32,
pub d_tr: f32,
pub k_stretch: f32,
pub k_shrink: f32,
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct NoiseConfig {
pub alpha: f32,
pub beta: f32,
}
///
/// Generalized Ascombe Transform
/// noise model : std² = alpha * I + beta
/// Where alpha and beta are iso dependant.
///
/// Parameters
/// ----------
/// image : TYPE
/// DESCRIPTION.
/// alpha : float
/// value of alpha for the given iso
/// iso : float
/// ISO value
/// beta : float
/// Value of beta for the given iso
///
/// Returns
/// -------
/// VST_image : TYPE
/// input image with stabilized variance
///
pub fn generalized_ascombe_transform<R: Runtime>(
client: &Backend<R>,
image: &GpuImage<f32>,
nc: NoiseConfig,
) -> GpuImage<f32> {
let mut dst = GpuImage::new(client, image.width(), image.height(), 1);
generalized_ascombe_transform_into(client, &mut dst, image, nc);
dst
}
pub fn generalized_ascombe_transform_into<R: Runtime>(
client: &Backend<R>,
dst: &mut GpuImage<f32>,
image: &GpuImage<f32>,
nc: NoiseConfig,
) {
unsafe {
cube_gat::launch_unchecked::<R>(
client,
image.cube_count(1, 1),
CubeDim::default(),
image.as_tensor_ref().as_tensor_arg(1),
dst.as_tensor_mut().as_tensor_arg(1),
ScalarArg::new(nc.alpha),
ScalarArg::new(nc.beta),
)
};
}
#[cube(launch_unchecked)]
fn cube_gat(image: &Tensor<f32>, vst_image: &mut Tensor<f32>, alpha: f32, beta: f32) {
let x = ABSOLUTE_POS_X;
let y = ABSOLUTE_POS_Y;
let imshape_y = image.shape(0);
let imshape_x = image.shape(1);
if y >= imshape_y || x >= imshape_x {
return;
}
let offset = y * image.stride(0) + x;
// ISO should not appear here, since alpha and beta are
// already iso dependant.
let vst = f32::max(
0.0,
alpha * image[offset] + 3.0 / 8.0 * alpha * alpha + beta,
);
vst_image[offset] = 2.0 / alpha * f32::sqrt(vst);
}
pub fn estimate_kernels<R: Runtime>(
client: &Backend<R>,
img: &GpuImage<f32>,
cfg: EstimateKernelsConfig,
noise_cfg: NoiseConfig,
) -> GpuTensor<f32, 4> {
// Performing Variance Stabilization Transform
let img = generalized_ascombe_transform(client, img, noise_cfg);
// Decimate to grey
// if bayer_mode :
// img_grey = compute_grey_images_(img, method="decimating")
// else :
// img_grey = img # no need to copy now, they will be copied to gpu later.
let img_gray = img;
let [grey_imshape_y, grey_imshape_x, _] = img_gray.shape();
// Computing grads
// let th_grey_img = th.as_tensor(img_grey, dtype = DEFAULT_TORCH_FLOAT_TYPE, device = "cuda");
// Horizontal filters
// let grad_kernel1 = np.array([[[[-0.5, 0.5]]], [[[0.5, 0.5]]]]);
// Vertical filters
// let grad_kernel2 = np.array([[[[0.5], [0.5]]], [[[-0.5], [0.5]]]]);
// let tmp = conv1d(th_grey_img, grad_kernel1);
// let th_full_grad = conv1d(tmp, grad_kernel2, groups = 2);
// The default padding mode reduces the shape of grey_img of 1 pixel in each
// direction, as expected
// let cuda_full_grads =
// cuda.as_cuda_array(th_full_grad.squeeze().transpose(0, 1).transpose(1, 2));
// shape [y, x, 2]
// let covs = cuda.device_array(grey_imshape + (2, 2), DEFAULT_NUMPY_FLOAT_TYPE);
// threadsperblock = (DEFAULT_THREADS, DEFAULT_THREADS)
// blockspergrid_x = math.ceil(grey_imshape_x/threadsperblock[1])
// blockspergrid_y = math.ceil(grey_imshape_y/threadsperblock[0])
// blockspergrid = (blockspergrid_x, blockspergrid_y)
// cuda_estimate_kernel[blockspergrid, threadsperblock](cuda_full_grads,
// k_detail, k_denoise, D_th, D_tr, k_stretch, k_shrink,
// covs)
// covs
todo!()
}
#[allow(clippy::identity_op)]
#[cubecl::cube]
fn cube_estimate_kernel(
full_grads: &Tensor<f32>,
covs: &mut Tensor<f32>,
k_detail: f32,
k_denoise: f32,
d_th: f32,
d_tr: f32,
k_stretch: f32,
k_shrink: f32,
) {
let pixel_idx = ABSOLUTE_POS_X;
let pixel_idy = ABSOLUTE_POS_Y;
let imshape_y = covs.shape(0);
let imshape_x = covs.shape(1);
if pixel_idy >= imshape_y || pixel_idx >= imshape_x {
return;
}
let covs_offset = pixel_idy * imshape_x + pixel_idx;
let mut structure_tensor: Line<f32> = Line::empty(4);
structure_tensor[0] = 0.0;
structure_tensor[1] = 0.0;
structure_tensor[2] = 0.0;
structure_tensor[3] = 0.0;
for i in 0..2 {
for j in 0..2 {
let x = (pixel_idx + j) as i32 - 1;
let y = (pixel_idy + i) as i32 - 1;
if (0 <= y && y < full_grads.shape(0) as i32)
&& (0 <= x && x < full_grads.shape(1) as i32)
{
let full_grad_offset = y as u32 * full_grads.stride(0) + x as u32;
let full_grad_x = full_grads[full_grad_offset + 0];
let full_grad_y = full_grads[full_grad_offset + 1];
structure_tensor[0] += full_grad_x * full_grad_x;
structure_tensor[1] += full_grad_x * full_grad_y;
structure_tensor[2] += full_grad_x * full_grad_y;
structure_tensor[3] += full_grad_y * full_grad_y;
}
}
}
let mut l = Line::empty(2);
let mut e1 = Line::empty(2);
let mut e2 = Line::empty(2);
let mut k = Line::empty(2);
get_eigen_elmts_2x2(&structure_tensor, &mut l, &mut e1, &mut e2);
compute_k(
&mut k, l[0], l[1], k_detail, k_denoise, d_th, d_tr, k_stretch, k_shrink,
);
let k_1_sq = k[0] * k[0];
let k_2_sq = k[1] * k[1];
covs[covs_offset + 0] = k_1_sq * e1[0] * e1[0] + k_2_sq * e2[0] * e2[0];
covs[covs_offset + 1] = k_1_sq * e1[0] * e1[1] + k_2_sq * e2[0] * e2[1];
covs[covs_offset + 2] = k_1_sq * e1[0] * e1[1] + k_2_sq * e2[0] * e2[1];
covs[covs_offset + 3] = k_1_sq * e1[1] * e1[1] + k_2_sq * e2[1] * e2[1];
}
///
/// Cuda function for resolving the 2x2 system A*X = B
/// by using the analytical formula
///
/// Parameters
/// ----------
/// A : Array[2,2]
///
/// B : Array[2]
///
/// Returns
/// -------
/// None
///
///
#[cubecl::cube]
fn solve_2x2(a: &Line<f32>, b: &Line<f32>, x: &mut Line<f32>) {
let det_a = a[0] * a[3] - a[1] * a[2];
x[0] = (a[3] * b[0] - a[1] * b[1]) / det_a;
x[1] = (a[0] * b[1] - a[2] * b[0]) / det_a;
}
///
/// inverts the 2x2 M array
///
/// Parameters
/// ----------
/// M : Array[2, 2]
/// Array to invert
/// M_i : Array[2, 2]
///
/// Returns
/// -------
/// None.
///
///
#[cubecl::cube]
fn invert_2x2(m: &Line<f32>, m_i: &mut Line<f32>) {
let det = m[0] * m[3] - m[1] * m[2];
if Abs::abs(det) > f32::new(f32::EPSILON) {
let det_i = 1.0 / det;
m_i[0] = m[3] * det_i;
m_i[1] = -m[1] * det_i;
m_i[2] = -m[2] * det_i;
m_i[3] = m[0] * det_i;
} else {
m_i[0] = 1.0;
m_i[1] = 0.0;
m_i[2] = 0.0;
m_i[3] = 1.0;
}
}
///
/// Returns the two roots of the polynom a*X^2 + b*X + c = 0 for a, b and c
/// real numbers. The function only returns real roots : make sure they exist
/// before calling the function. l[0] contains the root with the biggest module
/// and l[1] the smallest
///
///
/// Parameters
/// ----------
/// a : float
///
/// b : float
///
/// c : float
///
/// roots : Array[2]
///
/// Returns
/// -------
/// None
///
#[cubecl::cube]
fn get_real_polyroots_2(a: f32, b: f32, c: f32, roots: &mut Line<f32>) {
// numerical instabilities can cause delta to be slightly negative despite
// the equation admitting 2 real roots.
let delta_root = f32::sqrt(f32::max(b * b - 4.0 * a * c, 0.0));
let r1 = (-b + delta_root) / (2.0 * a);
let r2 = (-b - delta_root) / (2.0 * a);
if f32::abs(r1) >= f32::abs(r2) {
roots[0] = r1;
roots[1] = r2;
} else {
roots[0] = r2;
roots[1] = r1;
}
}
#[cubecl::cube]
fn get_eigen_val_2x2(m: &Line<f32>, l: &mut Line<f32>) {
let a = 1.0;
let b = -(m[0] + m[3]);
let c = m[0] * m[3] - m[1] * m[2];
get_real_polyroots_2(a, b, c, l)
}
///
/// return the eigen vectors with norm 1 for the eigen values l
/// M.e1 = l1.e1 ; M.e2 = l2.e2
///
/// Parameters
/// ----------
/// M : Array[2,2]
/// Real Symmetric array for which eigen values are to be determined
/// l : Array[2]
/// e1, e2 : Array[2]
/// sorted Eigenvalues
/// e1, e2 : Array[2, 2]
/// Computed orthogonal and normalized eigen vectors
///
/// Returns
/// -------
/// None.
///
#[cubecl::cube]
fn get_eigen_vect_2x2(m: &Line<f32>, l: &Line<f32>, e1: &mut Line<f32>, e2: &mut Line<f32>) {
// 2x2 algorithm : https://en.wikipedia.org/wiki/Eigenvalue_algorithm
if m[1] == 0.0 && m[0] == m[3] {
// m is multiple of identity, picking 2 ortogonal eigen vectors.
e1[0] = 1.0;
e1[1] = 0.0;
e2[0] = 0.0;
e2[1] = 1.0;
} else {
// averaging 2 for increased reliability
e1[0] = m[0] + m[1] - l[1];
e1[1] = m[2] + m[3] - l[1];
if e1[0] == 0.0 {
e1[1] = 1.0;
e2[0] = 1.0;
e2[1] = 0.0;
} else if e1[1] == 0.0 {
e1[0] = 1.0;
e2[0] = 0.0;
e2[1] = 1.0;
} else {
let norm_ = f32::sqrt(e1[0] * e1[0] + e1[1] * e1[1]);
e1[0] /= norm_;
e1[1] /= norm_;
let sign = copysign(e1[0]);
e2[1] = f32::abs(e1[0]);
e2[0] = -e1[1] * sign
}
}
}
#[cubecl::cube]
fn get_eigen_elmts_2x2(m: &Line<f32>, l: &mut Line<f32>, e1: &mut Line<f32>, e2: &mut Line<f32>) {
get_eigen_val_2x2(m, l);
get_eigen_vect_2x2(m, l, e1, e2);
}
///
/// Computes k_1 and k_2 based on lambda1, lambda2 and the constants.
///
/// Parameters
/// ----------
/// l1 : float
/// lambda1 (dominant eigen value)
/// l2 : float
/// lambda2 : second eigenvalue
/// k : Array[2]
/// empty vector where k_1 and k_2 will be stored
/// k_detail : float
/// k_denoise : float
/// D_th : float
/// D_tr : float
/// k_stretch : float
/// k_shrink : float
/// Parameters to compute k_1 and k_2, all detailed in the article.
///
#[cubecl::cube]
fn compute_k(
k: &mut Line<f32>,
l1: f32,
l2: f32,
k_detail: f32,
k_denoise: f32,
d_th: f32,
d_tr: f32,
k_stretch: f32,
k_shrink: f32,
) {
// When A is Nan, we fall back to this condition
let mut k1 = 1.0f32;
let mut k2 = 1.0f32;
let a: f32 = 1.0 + f32::sqrt((l1 - l2) / (l1 + l2));
let d: f32 = clamp(1.0 - f32::sqrt(l1) / d_tr + d_th, 0.0, 1.0);
// This is a very aggressive way of driving anisotropy, but it works well so far.
if a > 1.95 {
k1 = 1.0 / k_shrink;
k2 = k_stretch;
}
k[0] = k_detail * ((1.0 - d) * k1 + d * k_denoise);
k[1] = k_detail * ((1.0 - d) * k2 + d * k_denoise);
}
#[cubecl::cube]
fn copysign(val: f32) -> f32 {
if val < 0.0f32 {
-(1.0f32)
} else {
1.0f32
}
}
#[cubecl::cube]
fn clamp(a: f32, min: f32, max: f32) -> f32 {
// Clamp::clamp(a, min, max)
f32::min(f32::max(a, min), max)
}

3
src/ops/exposure.rs Normal file
View File

@ -0,0 +1,3 @@
use cubecl::prelude::*;
pub fn correct_exposure() {}

420
src/ops/fft.rs Normal file
View File

@ -0,0 +1,420 @@
use cubecl::prelude::*;
/**
Highly-optimized fast Fourier transform applied to each color channel independently
The aim of this function is to provide improved performance compared to the more simple function backward_dft() while providing equal results. It uses the following features for reduced calculation times:
- the four color channels are stored as a float4 and all calculations employ SIMD instructions.
- the one-dimensional transformation along y-direction employs the fast Fourier transform algorithm: At first, 4 small DFTs are calculated and then final results are obtained by two steps of cross-combination of values (based on a so-called butterfly diagram). This approach reduces the total number of memory reads and computational steps considerably.
- the one-dimensional transformation along x-direction employs the fast Fourier transform algorithm: At first, 4 small DFTs are calculated and then final results are obtained by two steps of cross-combination of values (based on a so-called butterfly diagram). This approach reduces the total number of memory reads and computational steps considerably.
*/
#[cube(launch_unchecked)]
#[rustfmt::skip]
fn backward_fft(in_texture_ft: &Tensor<Line<f32>>, out_texture: &mut Tensor<Line<f32>>, tile_size: u32, n_textures: u32) {
// compute tile positions from gid
let m0 = ABSOLUTE_POS_X * tile_size;
let n0 = ABSOLUTE_POS_Y * tile_size;
let tile_size_14 = tile_size / 4;
let tile_size_24 = tile_size / 2;
let tile_size_34 = tile_size / 4 * 3;
// pre-calculate factors for sine and cosine calculation
let angle = -2.0 * f32::new(std::f32::consts::PI) / tile_size as f32;
// pre-initalize some vectors
let norm_factor = Line::empty(4).fill(1.0 / (n_textures * tile_size * tile_size) as f32);
let mut tmp_data: Array<Line<f32>> = Array::new(16);
let mut tmp_tile: Array<Line<f32>> = Array::new(128);
// row-wise one-dimensional fast Fourier transform along x-direction
for dn in 0..tile_size {
let n_tmp = dn * 2 * tile_size;
// copy data to temp vector
for dm in 0..tile_size {
let offset = (n0 + dn) * in_texture_ft.stride(0) + 2 * (m0 + dm);
tmp_data[2 * dm + 0] = in_texture_ft[offset + 0];
tmp_data[2 * dm + 1] = in_texture_ft[offset + 1];
}
// calculate 4 small discrete Fourier transforms
for dm in 0..tile_size / 4 {
// fill with zeros
let mut re0 = Line::empty(4).fill(0.0);
let mut re1 = Line::empty(4).fill(0.0);
let mut re2 = Line::empty(4).fill(0.0);
let mut re3 = Line::empty(4).fill(0.0);
let mut im0 = Line::empty(4).fill(0.0);
let mut im1 = Line::empty(4).fill(0.0);
let mut im2 = Line::empty(4).fill(0.0);
let mut im3 = Line::empty(4).fill(0.0);
for dx in 0..tile_size / 4 {
let offset = dx * 8;
let dmdx = (dm * dx * 4) as f32;
// calculate coefficients
let coef_re = Line::<f32>::empty(4).fill(f32::cos(angle * dmdx));
let coef_im = Line::<f32>::empty(4).fill(f32::sin(angle * dmdx));
// DFT0
let data_re = tmp_data[offset + 0];
let data_im = tmp_data[offset + 1];
re0 += coef_re * data_re + coef_im * data_im;
im0 += coef_im * data_re - coef_re * data_im;
// DFT1
let data_re = tmp_data[offset + 2];
let data_im = tmp_data[offset + 3];
re2 += coef_re * data_re + coef_im * data_im;
im2 += coef_im * data_re - coef_re * data_im;
// DFT2
let data_re = tmp_data[offset + 4];
let data_im = tmp_data[offset + 5];
re1 += coef_re * data_re + coef_im * data_im;
im1 += coef_im * data_re - coef_re * data_im;
//DFT3
let data_re = tmp_data[offset + 6];
let data_im = tmp_data[offset + 7];
re3 += coef_re * data_re + coef_im * data_im;
im3 += coef_im * data_re - coef_re * data_im;
}
// first butterfly to combine result
let coef_re = Line::<f32>::empty(4).fill(f32::cos(angle * (2 * dm) as f32));
let coef_im = Line::<f32>::empty(4).fill(f32::sin(angle * (2 * dm) as f32));
let re_00 = re0 + coef_re * re1 - coef_im * im1;
let im_00 = im0 + coef_im * re1 + coef_re * im1;
let re_22 = re2 + coef_re * re3 - coef_im * im3;
let im_22 = im2 + coef_im * re3 + coef_re * im3;
let coef_re = Line::<f32>::empty(4).fill(f32::cos(angle * (2 * (dm + tile_size_14)) as f32));
let coef_im = Line::<f32>::empty(4).fill(f32::sin(angle * (2 * (dm + tile_size_14)) as f32));
let re_11 = re0 + coef_re * re1 - coef_im * im1;
let im_11 = im0 + coef_im * re1 + coef_re * im1;
let re_33 = re2 + coef_re * re3 - coef_im * im3;
let im_33 = im2 + coef_im * re3 + coef_re * im3;
// second butterfly to combine results
let re0 = re_00 + Line::new(f32::cos(angle * dm as f32)) * re_22 - Line::new(f32::sin(angle * dm as f32)) * im_22;
let re1 = re_11 + Line::new(f32::cos(angle * (dm + tile_size_14) as f32)) * re_33 - Line::new(f32::sin(angle * (dm + tile_size_14) as f32)) * im_33;
let re2 = re_00 + Line::new(f32::cos(angle * (dm + tile_size_24) as f32)) * re_22 - Line::new(f32::sin(angle * (dm + tile_size_24) as f32)) * im_22;
let re3 = re_11 + Line::new(f32::cos(angle * (dm + tile_size_34) as f32)) * re_33 - Line::new(f32::sin(angle * (dm + tile_size_34) as f32)) * im_33;
let im0 = im_00 + Line::new(f32::sin(angle * dm as f32)) * re_22 + Line::new(f32::cos(angle * dm as f32)) * im_22;
let im1 = im_11 + Line::new(f32::sin(angle * (dm + tile_size_14) as f32)) * re_33 + Line::new(f32::cos(angle * (dm + tile_size_14) as f32)) * im_33;
let im2 = im_00 + Line::new(f32::sin(angle * (dm + tile_size_24) as f32)) * re_22 + Line::new(f32::cos(angle * (dm + tile_size_24) as f32)) * im_22;
let im3 = im_11 + Line::new(f32::sin(angle * (dm + tile_size_34) as f32)) * re_33 + Line::new(f32::cos(angle * (dm + tile_size_34) as f32)) * im_33;
// write into temporary tile storage
tmp_tile[n_tmp + 2 * dm + 0] = re0;
tmp_tile[n_tmp + 2 * dm + 1] = im0 * Line::new(-1.0);
tmp_tile[n_tmp + 2 * dm + tile_size_24 + 0] = re1;
tmp_tile[n_tmp + 2 * dm + tile_size_24 + 1] = im1 * Line::new(-1.0);
tmp_tile[n_tmp + 2 * dm + tile_size + 0] = re2;
tmp_tile[n_tmp + 2 * dm + tile_size + 1] = im2 * Line::new(-1.0);
tmp_tile[n_tmp + 2 * dm + tile_size_24 * 3 + 0] = re3;
tmp_tile[n_tmp + 2 * dm + tile_size_24 * 3 + 1] = im3 * Line::new(-1.0);
}
};
// column-wise one-dimensional fast Fourier transform along y-direction
for dm in 0..tile_size {
let m = m0 + dm;
// copy data to temp vector
for dn in 0..tile_size {
tmp_data[2 * dn + 0] = tmp_tile[dn * 2 * tile_size + 2 * dm + 0];
tmp_data[2 * dn + 1] = tmp_tile[dn * 2 * tile_size + 2 * dm + 1];
}
// calculate 4 small discrete Fourier transforms
for dn in 0..tile_size / 4 {
let n = n0 + dn;
// fill with zeros
let mut re0 = Line::empty(4).fill(0.0);
let mut re1 = Line::empty(4).fill(0.0);
let mut re2 = Line::empty(4).fill(0.0);
let mut re3 = Line::empty(4).fill(0.0);
let mut im0 = Line::empty(4).fill(0.0);
let mut im1 = Line::empty(4).fill(0.0);
let mut im2 = Line::empty(4).fill(0.0);
let mut im3 = Line::empty(4).fill(0.0);
for dy in 0..tile_size / 4 {
let offset = dy * 8;
let dndy = (dn * dy * 4) as f32;
// calculate coefficients
let coef_re = Line::new(f32::cos(angle * dndy));
let coef_im = Line::new(f32::sin(angle * dndy));
// DFT0
let data_re = tmp_data[offset + 0];
let data_im = tmp_data[offset + 1];
re0 += coef_re * data_re + coef_im * data_im;
im0 += coef_im * data_re - coef_re * data_im;
// DFT1
let data_re = tmp_data[offset + 2];
let data_im = tmp_data[offset + 3];
re2 += coef_re*data_re + coef_im*data_im;
im2 += coef_im*data_re - coef_re*data_im;
// DFT2
let data_re = tmp_data[offset + 4];
let data_im = tmp_data[offset + 5];
re1 += coef_re * data_re + coef_im * data_im;
im1 += coef_im * data_re - coef_re * data_im;
// DFT3
let data_re = tmp_data[offset + 6];
let data_im = tmp_data[offset + 7];
re3 += coef_re * data_re + coef_im * data_im;
im3 += coef_im * data_re - coef_re * data_im;
}
// first butterfly to combine results
let coef_re = Line::new(f32::cos(angle * (2 * dn) as f32));
let coef_im = Line::new(f32::sin(angle * (2 * dn) as f32));
let re_00 = re0 + coef_re * re1 - coef_im * im1;
// let im_00 = im0 + coef_im * re1 + coef_re * im1;
let re_22 = re2 + coef_re * re3 - coef_im * im3;
let im_22 = im2 + coef_im * re3 + coef_re * im3;
let coef_re = Line::new(f32::cos(angle * (2 * (dn + tile_size_14)) as f32));
let coef_im = Line::new(f32::sin(angle * (2 * (dn + tile_size_14)) as f32));
let re_11 = re0 + coef_re * re1 - coef_im * im1;
// let im_11 = im0 + coef_im * re1 + coef_re * im1;
let re_33 = re2 + coef_re * re3 - coef_im * im3;
let im_33 = im2 + coef_im * re3 + coef_re * im3;
// second butterfly to combine results
let re0 = re_00 + Line::new(f32::cos(angle * dn as f32)) * re_22 - Line::new(f32::sin(angle * dn as f32)) * im_22;
let re2 = re_00 + Line::new(f32::cos(angle * (dn + tile_size_24) as f32)) * re_22 - Line::new(f32::sin(angle * (dn + tile_size_24) as f32)) * im_22;
let re1 = re_11 + Line::new(f32::cos(angle * (dn + tile_size_14) as f32)) * re_33 - Line::new(f32::sin(angle * (dn + tile_size_14) as f32)) * im_33;
let re3 = re_11 + Line::new(f32::cos(angle * (dn + tile_size_34) as f32)) * re_33 - Line::new(f32::sin(angle * (dn + tile_size_34) as f32)) * im_33;
// write into output textures
out_texture[n * out_texture.stride(0) + m] = re0 * norm_factor;
out_texture[(n + tile_size_14) * out_texture.stride(0) + m] = re1 * norm_factor;
out_texture[(n + tile_size_24) * out_texture.stride(0) + m] = re2 * norm_factor;
out_texture[(n + tile_size_34) * out_texture.stride(0) + m] = re3 * norm_factor;
}
}
}
/**
Highly-optimized fast Fourier transform applied to each color channel independently
The aim of this function is to provide improved performance compared to the more simple function forward_dft() while providing equal results. It uses the following features for reduced calculation times:
- the four color channels are stored as a float4 and all calculations employ SIMD instructions.
- the one-dimensional transformation along y-direction is a discrete Fourier transform. As the input image is real-valued, the frequency domain representation is symmetric and only values for N/2+1 rows have to be calculated.
- the one-dimensional transformation along x-direction employs the fast Fourier transform algorithm: At first, 4 small DFTs are calculated and then final results are obtained by two steps of cross-combination of values (based on a so-called butterfly diagram). This approach reduces the total number of memory reads and computational steps considerably.
- due to the symmetry mentioned earlier, only N/2+1 rows have to be transformed and the remaining N/2-1 rows can be directly inferred.
*/
#[cube(launch_unchecked)]
#[rustfmt::skip]
fn forward_fft(in_texture: &Tensor<Line<f32>>, out_texture_ft: &mut Tensor<Line<f32>>, tile_size: u32) {
// compute tile positions from gid
let m0 = ABSOLUTE_POS_X * tile_size;
let n0 = ABSOLUTE_POS_Y * tile_size;
let tile_size_14 = tile_size / 4;
let tile_size_24 = tile_size / 2;
let tile_size_34 = tile_size / 4 * 3;
// pre-calculate factors for sine and cosine calculation
let angle = -2.0 * f32::new(std::f32::consts::PI) / tile_size as f32;
// pre-initalize some vectors
let mut tmp_data: Array<Line<f32>> = Array::new(16);
let mut tmp_tile: Array<Line<f32>> = Array::new(80);
// column-wise one-dimensional discrete Fourier transform along y-direction
for dm in 0..tile_size / 2 {
let dm = dm * 2;
let m = m0 + dm;
// copy data to temp vector
for dn in 0..tile_size {
let offset = (n0 + dn) * in_texture.stride(0) + m;
tmp_data[2 * dn + 0] = in_texture[offset + 0];
tmp_data[2 * dn + 1] = in_texture[offset + 1];
}
// exploit symmetry of real dft and calculate reduced number of rows
for dn in 0..=tile_size / 2 {
let n_tmp = dn * 2 * tile_size;
// fill with zeros
let mut re0 = Line::empty(4).fill(0.0);
let mut re1 = Line::empty(4).fill(0.0);
let mut im0 = Line::empty(4).fill(0.0);
let mut im1 = Line::empty(4).fill(0.0);
for dy in 0..tile_size {
// see section "Overlapped tiles" in https://graphics.stanford.edu/papers/hdrp/hasinoff-hdrplus-sigasia16.pdf or
// section "Overlapped Tiles and Raised Cosine Window" in https://www.ipol.im/pub/art/2021/336/
// calculate modified raised cosine window weight for blending tiles to suppress artifacts
let norm_cosine0 = Line::new((0.5 - 0.5 * f32::cos(-angle * (dm as f32 + 0.5))) * (0.5 - 0.5 * f32::cos(-angle * (dy as f32 + 0.5))));
let norm_cosine1 = Line::new((0.5 - 0.5 * f32::cos(-angle * (dm as f32 + 1.5))) * (0.5 - 0.5 * f32::cos(-angle * (dy as f32 + 0.5))));
// calculate coefficients
let coef_re = Line::new(f32::cos(angle * (dn * dy) as f32));
let coef_im = Line::new(f32::sin(angle * (dn * dy) as f32));
let data_re = norm_cosine0 * tmp_data[2 * dy + 0];
re0 += coef_re * data_re;
im0 += coef_im * data_re;
let data_re = norm_cosine1 * tmp_data[2 * dy + 1];
re1 += coef_re * data_re;
im1 += coef_im * data_re;
}
// write into temporary tile storage
tmp_tile[n_tmp + 2 * dm + 0] = re0;
tmp_tile[n_tmp + 2 * dm + 1] = im0;
tmp_tile[n_tmp + 2 * dm + 2] = re1;
tmp_tile[n_tmp + 2 * dm + 3] = im1;
}
}
// row-wise one-dimensional fast Fourier transform along x-direction
// exploit symmetry of real dft and calculate reduced number of rows
for dn in 0..=tile_size / 2 {
let n = n0 + dn;
// copy data to temp vector
for dm in 0..tile_size {
let offset = dn * 2 * tile_size + 2 * dm;
tmp_data[2 * dm + 0] = tmp_tile[offset + 0];
tmp_data[2 * dm + 1] = tmp_tile[offset + 1];
}
// calculate 4 small discrete Fourier transforms
for dm in 0..tile_size / 4 {
let m = 2 * (m0 + dm);
let mut re0 = Line::empty(4).fill(0.0);
let mut re1 = Line::empty(4).fill(0.0);
let mut re2 = Line::empty(4).fill(0.0);
let mut re3 = Line::empty(4).fill(0.0);
let mut im0 = Line::empty(4).fill(0.0);
let mut im1 = Line::empty(4).fill(0.0);
let mut im2 = Line::empty(4).fill(0.0);
let mut im3 = Line::empty(4).fill(0.0);
for dx in 0..tile_size / 4 {
let dx = dx * 4;
// calculate coefficients
let coef_re = Line::new(f32::cos(angle * (dm * dx) as f32));
let coef_im = Line::new(f32::sin(angle * (dm * dx) as f32));
// DFT0
let data_re = tmp_data[2 * dx + 0];
let data_im = tmp_data[2 * dx + 1];
re0 += coef_re * data_re - coef_im * data_im;
im0 += coef_im * data_re + coef_re * data_im;
// DFT1
let data_re = tmp_data[2 * dx + 2];
let data_im = tmp_data[2 * dx + 3];
re2 += coef_re * data_re - coef_im * data_im;
im2 += coef_im * data_re + coef_re * data_im;
// DFT2
let data_re = tmp_data[2 * dx + 4];
let data_im = tmp_data[2 * dx + 5];
re1 += coef_re * data_re - coef_im * data_im;
im1 += coef_im * data_re + coef_re * data_im;
// DFT3
let data_re = tmp_data[2 * dx + 6];
let data_im = tmp_data[2 * dx + 7];
re3 += coef_re * data_re - coef_im * data_im;
im3 += coef_im * data_re + coef_re * data_im;
}
// first butterfly to combine results
let coef_re = Line::new(f32::cos(angle * (2 * dm) as f32));
let coef_im = Line::new(f32::sin(angle * (2 * dm) as f32));
let re_00 = re0 + coef_re * re1 - coef_im * im1;
let im_00 = im0 + coef_im * re1 + coef_re * im1;
let re_22 = re2 + coef_re * re3 - coef_im * im3;
let im_22 = im2 + coef_im * re3 + coef_re * im3;
let coef_re = Line::new(f32::cos(angle * (2 * (dm + tile_size_14)) as f32));
let coef_im = Line::new(f32::sin(angle * (2 * (dm + tile_size_14)) as f32));
let re_11 = re0 + coef_re * re1 - coef_im * im1;
let im_11 = im0 + coef_im * re1 + coef_re * im1;
let re_33 = re2 + coef_re * re3 - coef_im * im3;
let im_33 = im2 + coef_im * re3 + coef_re * im3;
// second butterfly to combine results
let re0 = re_00 + Line::new(f32::cos(angle * dm as f32)) * re_22 - Line::new(f32::sin(angle * dm as f32)) * im_22;
let im0 = im_00 + Line::new(f32::sin(angle * dm as f32)) * re_22 + Line::new(f32::cos(angle * dm as f32)) * im_22;
let re2 = re_00 + Line::new(f32::cos(angle * (dm+tile_size_24) as f32)) * re_22 - Line::new(f32::sin(angle * (dm+tile_size_24) as f32)) * im_22;
let im2 = im_00 + Line::new(f32::sin(angle * (dm+tile_size_24) as f32)) * re_22 + Line::new(f32::cos(angle * (dm+tile_size_24) as f32)) * im_22;
let re1 = re_11 + Line::new(f32::cos(angle * (dm+tile_size_14) as f32)) * re_33 - Line::new(f32::sin(angle * (dm+tile_size_14) as f32)) * im_33;
let im1 = im_11 + Line::new(f32::sin(angle * (dm+tile_size_14) as f32)) * re_33 + Line::new(f32::cos(angle * (dm+tile_size_14) as f32)) * im_33;
let re3 = re_11 + Line::new(f32::cos(angle * (dm+tile_size_34) as f32)) * re_33 - Line::new(f32::sin(angle * (dm+tile_size_34) as f32)) * im_33;
let im3 = im_11 + Line::new(f32::sin(angle * (dm+tile_size_34) as f32)) * re_33 + Line::new(f32::cos(angle * (dm+tile_size_34) as f32)) * im_33;
// write into output texture
let offset = n * out_texture_ft.stride(0) + m;
out_texture_ft[offset + 0] = re0;
out_texture_ft[offset + 1] = im0;
out_texture_ft[offset + tile_size_24 + 0] = re1;
out_texture_ft[offset + tile_size_24 + 1] = im1;
out_texture_ft[offset + tile_size + 0] = re2;
out_texture_ft[offset + tile_size + 1] = im2;
out_texture_ft[offset + tile_size_24 * 3 + 0] = re3;
out_texture_ft[offset + tile_size_24 * 3 + 1] = im3;
// exploit symmetry of real dft and set values for remaining rows
if dn > 0 && dn != tile_size / 2 {
let n2 = n0 + tile_size - dn;
//int const m20 = 2*(m0 + (dm==0 ? 0 : tile_size-dm));
let m20 = 2 * (m0 + Min::min(dm, 1) * (tile_size - dm));
let m21 = 2 * (m0 + tile_size - dm - tile_size_14);
let m22 = 2 * (m0 + tile_size - dm - tile_size_24);
let m23 = 2 * (m0 + tile_size - dm - tile_size_14 * 3);
// write into output texture
let offset = n2 * out_texture_ft.stride(0);
out_texture_ft[offset + m20 + 0] = re0;
out_texture_ft[offset + m20 + 1] = im0 * Line::new(-1.0);
out_texture_ft[offset + m21 + 0] = re1;
out_texture_ft[offset + m21 + 1] = im1 * Line::new(-1.0);
out_texture_ft[offset + m22 + 0] = re2;
out_texture_ft[offset + m22 + 1] = im2 * Line::new(-1.0);
out_texture_ft[offset + m23 + 0] = re3;
out_texture_ft[offset + m23 + 1] = im3 * Line::new(-1.0);
}
}
}
}

37
src/ops/gauss_pyramid.rs Normal file
View File

@ -0,0 +1,37 @@
use cubecl::Runtime;
use crate::{backend::Backend, image::GpuImage};
use super::downsample::downsample;
pub struct PyramidLayerConfig {
pub tile_size: u32,
pub factor: u32,
}
///
/// Construct N-level coarse-to-fine gaussian pyramid
///
/// Args:
/// image: input image (expected to be a grayscale image downsampled from a Bayer raw image)
/// factors: [int], dowsampling factors (fine-to-coarse)
///
pub(crate) fn build_gauss_pyramid<R: Runtime>(
cube: &Backend<R>,
img: &GpuImage<f32>,
factors: impl Iterator<Item = u32>,
) -> Vec<GpuImage<f32>> {
// Subsequent pyramid levels are successively created
// with convolution by a kernel followed by downsampling
let init = Vec::with_capacity(factors.size_hint().1.unwrap_or(factors.size_hint().0));
let mut levels = factors.fold(init, |mut acc, factor| {
acc.push(downsample(cube, acc.last().unwrap_or(img), factor));
acc
});
// Reverse the pyramid to get it coarse-to-fine
levels.reverse();
levels
}

216
src/ops/lucas_kanade.rs Normal file
View File

@ -0,0 +1,216 @@
use cubecl::prelude::*;
use crate::{image::GpuImage, tensor::GpuTensor};
fn compute_grads(img: &GpuImage<f32>) -> GpuTensor<f32, 3> {
todo!()
}
fn compute_hessian(grads: &GpuTensor<f32, 3>, tile_size: u32) -> GpuTensor<f32, 3> {
todo!()
}
#[cube(launch_unchecked)]
fn cube_hessian(grad: &Tensor<f32>, hess: &mut Tensor<f32>, tile_size: u32) {
let patch_idx = ABSOLUTE_POS_X;
let patch_idy = ABSOLUTE_POS_Y;
let n_patch_y = hess.shape(0);
let n_patch_x = hess.shape(1);
// discarding non existing patches
if patch_idy >= n_patch_y || patch_idx >= n_patch_x {
return;
}
let patch_pos_idx = tile_size * patch_idx; // global position on the coarse grey grid.
// Because of extremity padding, it can be out of bound
let patch_pos_idy = tile_size * patch_idy;
let mut local_hessian = Line::empty(4);
local_hessian[0] = 0.0;
local_hessian[1] = 0.0;
local_hessian[2] = 0.0;
local_hessian[3] = 0.0;
for i in 0..tile_size {
for j in 0..tile_size {
let pixel_global_idy = patch_pos_idy + i;
let pixel_global_idx = patch_pos_idx + j;
if pixel_global_idy < grad.shape(0) && pixel_global_idx < grad.shape(1) {
let offset = pixel_global_idy * grad.stride(0) + pixel_global_idx * grad.stride(1);
let local_gradx = grad[offset + 0];
let local_grady = grad[offset + 1];
local_hessian[0] += local_gradx * local_gradx;
local_hessian[1] += local_gradx * local_grady;
local_hessian[2] += local_gradx * local_grady;
local_hessian[3] += local_grady * local_grady;
}
}
}
let offset = patch_idy * hess.stride(0) + patch_idx * hess.stride(1);
hess[offset + 0] = local_hessian[0];
hess[offset + 1] = local_hessian[1];
hess[offset + 2] = local_hessian[2];
hess[offset + 3] = local_hessian[3];
}
///
/// The update relies on solving AX = B, a 2 by 2 system.
/// A is precomputed, but B is evaluated each time.
///
#[cube(launch_unchecked)]
fn get_new_flow(
ref_img: &Tensor<f32>,
alignment: &mut Tensor<f32>,
comp_img: &Tensor<f32>,
grad: &Tensor<f32>,
hessian: &Tensor<f32>,
tile_size: u32,
) {
let imsize_y = comp_img.shape(0);
let imsize_x = comp_img.shape(1);
let n_patchs_y = alignment.shape(0);
let n_patchs_x = alignment.shape(1);
let patch_idx = ABSOLUTE_POS_X;
let patch_idy = ABSOLUTE_POS_Y;
if patch_idy >= n_patchs_y || patch_idx >= n_patchs_x {
return;
}
let alig_offset = patch_idy * alignment.stride(0) + patch_idx * alignment.stride(1);
let hess_offset = patch_idy * hessian.stride(0) + patch_idx * hessian.stride(1);
let patch_pos_x = tile_size * patch_idx;
let patch_pos_y = tile_size * patch_idy;
let mut a = Line::empty(4);
a[0] = hessian[hess_offset + 0];
a[1] = hessian[hess_offset + 1];
a[2] = hessian[hess_offset + 2];
a[3] = hessian[hess_offset + 3];
// By putting non solvable exit this early, the remaining calculations are
// skipped for burned patches, which represents most of over-exposed images !
if f32::abs(a[0] * a[3] - a[1] * a[2]) < 1e-10 {
// system is Not solvable
return;
}
let mut b = Line::empty(2);
b[0] = 0.0;
b[1] = 0.0;
let mut local_alignment = Line::empty(2);
local_alignment[0] = alignment[alig_offset + 0];
local_alignment[1] = alignment[alig_offset + 1];
let mut buffer_val = Line::empty(4);
let mut pos = Line::empty(2); // y, x
for i in 0..tile_size {
for j in 0..tile_size {
let pixel_global_idx = patch_pos_x + j; // global position on the coarse grey grid.
// Because of extremity padding, it can be out of bound
let pixel_global_idy = patch_pos_y + i;
if pixel_global_idx < imsize_x && pixel_global_idy < imsize_y {
let grad_offset =
pixel_global_idy * grad.stride(0) + pixel_global_idx * grad.stride(1);
let local_gradx = grad[grad_offset + 0];
let local_grady = grad[grad_offset + 1];
// Warp I with W(x; p) to compute I(W(x; p))
let new_idx = local_alignment[0] + pixel_global_idx as f32;
let new_idy = local_alignment[1] + pixel_global_idy as f32;
if new_idx >= 0.0
&& new_idx < imsize_x as f32 - 1.0 // -1 for bicubic interpolation
&& new_idy >= 0.0
&& new_idy < imsize_y as f32 - 1.0
// -1 for bicubic interpolation
{
// bicubic interpolation
// https://www.rollpie.com/post/252
// separating floor and floating part
let floor_x = f32::floor(new_idx);
let normalised_pos_x = new_idx - floor_x;
let floor_y = f32::floor(new_idy);
let normalised_pos_y = new_idy - floor_y;
let floor_x = floor_x as i32;
let floor_y = floor_y as i32;
let ceil_x = floor_x + 1;
let ceil_y = floor_y + 1;
pos[0] = normalised_pos_y;
pos[1] = normalised_pos_x;
let floor_y_offset = floor_y as u32 * comp_img.stride(0);
buffer_val[0] = comp_img[floor_y_offset + floor_x as u32 * comp_img.stride(1)];
buffer_val[1] = comp_img[floor_y_offset + ceil_x as u32 * comp_img.stride(1)];
let ceil_y_offset = ceil_y as u32 * comp_img.stride(0);
buffer_val[2] = comp_img[ceil_y_offset + floor_x as u32 * comp_img.stride(1)];
buffer_val[3] = comp_img[ceil_y_offset + ceil_x as u32 * comp_img.stride(1)];
let comp_val = bilinear_interpolation(&buffer_val, &pos);
let ref_img_offset =
pixel_global_idy * ref_img.stride(0) + pixel_global_idx * ref_img.stride(1);
let gradt = comp_val - ref_img[ref_img_offset];
b[0] += -local_gradx * gradt;
b[1] += -local_grady * gradt;
}
}
}
}
let mut alignment_step = Line::empty(2);
// solvability is ensured by design
solve_2x2(&a, &b, &mut alignment_step);
alignment[alig_offset + 0] = local_alignment[0] + alignment_step[0];
alignment[alig_offset + 1] = local_alignment[1] + alignment_step[1];
}
#[cube]
fn solve_2x2(a: &Line<f32>, b: &Line<f32>, x: &mut Line<f32>) {
let det_a = a[0] * a[3] - a[1] * a[2];
x[0] = (a[3] * b[0] - a[1] * b[1]) / det_a;
x[1] = (a[0] * b[1] - a[2] * b[0]) / det_a;
}
///
/// Parameters
/// ----------
/// values : Array[2, 2]
/// values on the 4 closest neighboors
/// pos : Array[2]
/// position where interpolation must be done (in [0, 1]x[0, 1]). y, x
///
/// Returns
/// -------
/// val : float
/// interpolated value
///
#[cube]
fn bilinear_interpolation(values: &Line<f32>, pos: &Line<f32>) -> f32 {
let posy = pos[0];
let posx = pos[1];
values[0] * (1.0 - posx) * (1.0 - posy)
+ values[1] * (posx) * (1.0 - posy)
+ values[2] * (1.0 - posx) * (posy)
+ values[3] * posx * posy
}

156
src/ops/merge.rs Normal file
View File

@ -0,0 +1,156 @@
// use cubecl::prelude::*;
// #[cube]
// fn accumulate(comp_img, alignments, covs, r,
// bayer_mode, iso_kernel, scale, tile_size, CFA_pattern,
// num, den) {
// let output_pixel_idx, output_pixel_idy = cuda.grid(2);
// let output_size_y, output_size_x, _ = num.shape;
// let input_size_y, input_size_x = comp_img.shape;
// if output_pixel_idx >= output_size_x || output_pixel_idy >= output_size_y {
// return;
// }
// if bayer_mode {
// n_channels = 3;
// acc = cuda.local.array(3, dtype=DEFAULT_CUDA_FLOAT_TYPE);
// val = cuda.local.array(3, dtype=DEFAULT_CUDA_FLOAT_TYPE);
// } else {
// n_channels = 1;
// acc = cuda.local.array(1, dtype=DEFAULT_CUDA_FLOAT_TYPE);
// val = cuda.local.array(1, dtype=DEFAULT_CUDA_FLOAT_TYPE);
// }
// // Copying CFA locally. We will read that 9 times, so it's worth it
// let local_cfa = cuda.local.array((2,2), uint8);
// for i in 0..2 {
// for j in 0..2 {
// local_CFA[i,j] = uint8(cfa_pattern[i,j]);
// }
// }
// let coarse_ref_sub_pos = cuda.local.array(2, dtype=DEFAULT_CUDA_FLOAT_TYPE); // y, x
// coarse_ref_sub_pos[0] = output_pixel_idy / scale;
// coarse_ref_sub_pos[1] = output_pixel_idx / scale;
// // fetch of the flow, as early as possible
// let local_optical_flow = cuda.local.array(2, dtype=DEFAULT_CUDA_FLOAT_TYPE);
// let patch_idy = int(coarse_ref_sub_pos[0] / tile_size);
// let patch_idx = int(coarse_ref_sub_pos[1] /tile_size);
// local_optical_flow[0] = alignments[patch_idy, patch_idx, 0];
// local_optical_flow[1] = alignments[patch_idy, patch_idx, 1];
// for chan in 0..n_channels {
// acc[chan] = 0.0;
// val[chan] = 0.0;
// }
// let patch_center_pos = cuda.local.array(2, DEFAULT_CUDA_FLOAT_TYPE); // y, x
// // fetching robustness
// // The robustness coefficient is known for every raw pixel, and implicitely
// // interpolated to HR using nearest neighboor interpolations.
// let y_r = clamp(round(coarse_ref_sub_pos[0]), 0, r.shape[0]-1);
// let x_r = clamp(round(coarse_ref_sub_pos[1]), 0, r.shape[1]-1);
// let local_r = r[y_r, x_r];
// patch_center_pos[1] = coarse_ref_sub_pos[1] + local_optical_flow[0];
// patch_center_pos[0] = coarse_ref_sub_pos[0] + local_optical_flow[1];
// // updating inbound condition
// if patch_center_pos[1] >= input_size_x || patch_center_pos[0] >= input_size_y {
// return;
// }
// // computing kernel
// if !iso_kernel {
// let interpolated_cov = cuda.local.array((2, 2), dtype = DEFAULT_CUDA_FLOAT_TYPE);
// let cov_i = cuda.local.array((2, 2), dtype=DEFAULT_CUDA_FLOAT_TYPE);
// // fetching the 4 closest covs
// let close_covs = cuda.local.array((2, 2, 2 ,2), DEFAULT_CUDA_FLOAT_TYPE);
// let grey_pos = cuda.local.array(2, DEFAULT_CUDA_FLOAT_TYPE);
// if bayer_mode {
// grey_pos[0] = (patch_center_pos[0] - 0.5) /2; // grey grid is offseted and twice more sparse
// grey_pos[1] = (patch_center_pos[1] - 0.5) /2;
// } else {
// grey_pos[0] = patch_center_pos[0]; // grey grid is exactly the coarse grid
// grey_pos[1] = patch_center_pos[1];
// }
// // clipping the coordinates to stay in bound
// let floor_x = int(max(math.floor(grey_pos[1]), 0));
// let floor_y = int(max(math.floor(grey_pos[0]), 0));
// let ceil_x = min(floor_x + 1, covs.shape[1]-1);
// let ceil_y = min(floor_y + 1, covs.shape[0]-1);
// for i in 0..2 {
// for j in range(0, 2) {
// close_covs[0, 0, i, j] = covs[floor_y, floor_x, i, j];
// close_covs[0, 1, i, j] = covs[floor_y, ceil_x, i, j];
// close_covs[1, 0, i, j] = covs[ceil_y, floor_x, i, j];
// close_covs[1, 1, i, j] = covs[ceil_y, ceil_x, i, j];
// }
// }
// // interpolating covs at the desired spot
// interpolate_cov(close_covs, grey_pos, interpolated_cov);
// invert_2x2(interpolated_cov, cov_i);
// }
// let center_x = round(patch_center_pos[1]);
// let center_y = round(patch_center_pos[0]);
// for i in -1..=1 {
// for j in -1..=1 {
// let pixel_idx = center_x + j;
// let pixel_idy = center_y + i;
// // in bound condition
// if (0 <= pixel_idx && pixel_idx< input_size_x) &&
// (0 <= pixel_idy && pixel_idy< input_size_y) {
// // checking if pixel is r, g or b
// let channel = if bayer_mode {
// local_CFA[pixel_idy%2, pixel_idx%2]
// } else {
// 0
// };
// // By fetching the value now, we can compute the kernel weight
// // while it is called from global memory
// let c = comp_img[pixel_idy, pixel_idx];
// // computing distance
// let dist_x = pixel_idx - patch_center_pos[1];
// let dist_y = pixel_idy - patch_center_pos[0];
// // Computing w
// let y = if iso_kernel {
// max(0, 2 * (dist_x * dist_x + dist_y * dist_y))
// } else {
// max(0, quad_mat_prod(cov_i, dist_x, dist_y))
// // y can be slightly negative because of numerical precision.
// // I clamp it to not explode the error with exp
// };
// w = math.exp(-0.5*y)
// //---
// val[channel] += c * w * local_r;
// acc[channel] += w * local_r;
// }
// }
// }
// for chan in 0..n_channels {
// num[output_pixel_idy, output_pixel_idx, chan] += val[chan];
// den[output_pixel_idy, output_pixel_idx, chan] += acc[chan];
// }
// }

12
src/ops/mod.rs Normal file
View File

@ -0,0 +1,12 @@
pub mod avg_pool;
pub mod block_matching;
pub mod decimate_bayer;
pub mod demosaic;
pub mod downsample;
pub mod estimate_kernels;
pub mod exposure;
pub mod fft;
pub mod gauss_pyramid;
pub mod lucas_kanade;
pub mod merge;
pub mod spatial_merge;

2621
src/ops/spatial_merge.rs Normal file

File diff suppressed because it is too large Load Diff

189
src/tensor.rs Normal file
View File

@ -0,0 +1,189 @@
use std::marker::PhantomData;
use cubecl::{
prelude::{ArrayArg, CubePrimitive, TensorHandleRef},
server::Handle,
CubeCount, CubeDim, Runtime,
};
pub type GpuTensor<T, const R: usize> = Tensor<T, Handle, R>;
pub type CpuTensor<T, const R: usize> = Tensor<T, Vec<T>, R>;
#[derive(Debug)]
pub struct Tensor<T: CubePrimitive, D, const R: usize> {
pub(crate) data: D,
pub(crate) shape: [usize; R],
pub(crate) stride: [usize; R],
pub(crate) word_aligned_shape: [usize; R],
pub(crate) word_aligned_stride: [usize; R],
_m: PhantomData<T>,
}
impl<T: CubePrimitive, D, const R: usize> Tensor<T, D, R> {
pub(crate) fn create(data: D, shape: [usize; R]) -> Self {
let mut strides = [1usize; R];
for x in (0..R - 1).rev() {
strides[x] = strides[x + 1] * shape[x + 1];
}
Self::with_strides(data, shape, strides)
}
pub(crate) fn with_strides(data: D, shape: [usize; R], stride: [usize; R]) -> Self {
let mut word_aligned_shape = shape;
let mut word_aligned_stride = stride;
match (core::mem::size_of::<T>(), shape[R - 1]) {
(1, 4) => {
word_aligned_shape[R - 1] /= 4;
for i in 0..R - 1 {
word_aligned_stride[i] /= 4;
}
}
(1, 2) => {
word_aligned_shape[R - 1] /= 2;
word_aligned_shape[R - 2] /= 2;
word_aligned_stride[R - 2] /= 2;
for i in 0..R - 2 {
word_aligned_stride[i] /= 4;
}
}
(1, 1) => {
word_aligned_shape[R - 2] /= 4;
for i in 0..R - 2 {
word_aligned_stride[i] /= 4;
}
}
(2, 4) => {
word_aligned_shape[R - 1] /= 2;
for i in 0..R - 1 {
word_aligned_stride[i] /= 2;
}
}
(2, 2) => {
word_aligned_shape[R - 1] /= 2;
for i in 0..R - 2 {
word_aligned_stride[i] /= 2;
}
}
(2, 1) => {
word_aligned_shape[R - 2] /= 2;
for i in 0..R - 2 {
word_aligned_stride[i] /= 2;
}
}
(4, _) => {}
_ => unimplemented!(),
};
Self {
data,
shape,
stride,
word_aligned_shape,
word_aligned_stride,
_m: PhantomData,
}
}
#[inline]
pub fn shape(&self) -> [usize; R] {
self.shape
}
#[inline]
pub fn stride(&self) -> [usize; R] {
self.stride
}
#[inline]
pub fn cube_count(&self, div_x: u32, div_y: u32) -> CubeCount {
let dim = CubeDim::default();
let x = self.shape[1] / dim.x as usize;
let y = (self.shape[0] + dim.y as usize - 1) / dim.y as usize;
CubeCount::Static(x as u32 / div_x, y as u32 / div_y, 1)
}
#[inline]
fn len(&self) -> usize {
self.shape.iter().product()
}
}
#[derive(Debug)]
pub struct CubeGpuTensor<'a, Rt: Runtime, const R: usize> {
pub(crate) data: &'a Handle,
pub(crate) shape: [usize; R],
pub(crate) stride: [usize; R],
_m: PhantomData<Rt>,
}
impl<'a, Rt: Runtime, const R: usize> CubeGpuTensor<'a, Rt, R> {
pub fn as_tensor_ref(&'a self) -> TensorHandleRef<'a, Rt> {
TensorHandleRef {
handle: self.data,
strides: &self.stride,
shape: &self.shape,
elem_size: 4,
runtime: PhantomData,
}
}
}
impl<T: CubePrimitive, const R: usize> GpuTensor<T, R> {
#[inline]
pub fn as_array_arg<Rt: Runtime>(&self, vectorization: u8) -> ArrayArg<'_, Rt> {
let factor = usize::max(4 / core::mem::size_of::<T>(), 1);
unsafe {
ArrayArg::from_raw_parts_and_size(&self.data, self.len() / factor, vectorization, 4)
}
}
#[inline]
pub fn as_tensor_ref<Rt: Runtime>(&self) -> TensorHandleRef<'_, Rt> {
TensorHandleRef {
handle: &self.data,
strides: &self.stride,
shape: &self.shape,
elem_size: 4,
runtime: PhantomData,
}
}
#[inline]
pub fn as_tensor_mut<Rt: Runtime>(&mut self) -> TensorHandleRef<'_, Rt> {
TensorHandleRef {
handle: &self.data,
strides: &self.stride,
shape: &self.shape,
elem_size: 4,
runtime: PhantomData,
}
}
#[inline]
pub fn as_word_aligned_tensor_ref<Rt: Runtime>(&self) -> TensorHandleRef<'_, Rt> {
TensorHandleRef {
handle: &self.data,
strides: &self.word_aligned_stride,
shape: &self.word_aligned_shape,
elem_size: 4,
runtime: PhantomData,
}
}
}
impl<T: CubePrimitive, const R: usize> std::ops::Index<[usize; R]> for CpuTensor<T, R> {
type Output = T;
fn index(&self, index: [usize; R]) -> &Self::Output {
self.data.index(
std::iter::zip(self.stride, index)
.map(|(s, i)| s * i)
.sum::<usize>(),
)
}
}

BIN
test.cr2 Normal file

Binary file not shown.

BIN
test.dng Normal file

Binary file not shown.

BIN
test.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

BIN
test_burst1/im_00.dng Normal file

Binary file not shown.

783
test_burst1/im_00.dng.pp3 Normal file
View File

@ -0,0 +1,783 @@
[Version]
AppVersion=5.11
Version=351
[General]
ColorLabel=0
InTrash=false
[Exposure]
Auto=false
Clip=0.02
Compensation=0
Brightness=0
Contrast=0
Saturation=-100
Black=-13804
HighlightCompr=0
HighlightComprThreshold=0
ShadowCompr=50
HistogramMatching=false
CurveFromHistogramMatching=false
ClampOOG=true
CurveMode=FilmLike
CurveMode2=Standard
Curve=0;
Curve2=0;
[HLRecovery]
Enabled=true
Method=Coloropp
Hlbl=0
Hlth=1
[Retinex]
Enabled=false
Str=20
Scal=3
Iter=1
Grad=1
Grads=1
Gam=1.3
Slope=3
Median=false
Neigh=80
Offs=0
Vart=200
Limd=8
highl=4
skal=3
complexMethod=normal
RetinexMethod=high
mapMethod=none
viewMethod=none
Retinexcolorspace=Lab
Gammaretinex=none
CDCurve=0;
MAPCurve=0;
CDHCurve=0;
LHCurve=0;
Highlights=0
HighlightTonalWidth=80
Shadows=0
ShadowTonalWidth=80
Radius=40
TransmissionCurve=1;0;0.5;0.34999999999999998;0.34999999999999998;0.59999999999999998;0.75;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
GainTransmissionCurve=1;0;0.10000000000000001;0.34999999999999998;0;0.25;0.25;0.34999999999999998;0.34999999999999998;0.69999999999999996;0.25;0.34999999999999998;0.34999999999999998;1;0.10000000000000001;0;0;
[Local Contrast]
Enabled=false
Radius=80
Amount=0.20000000000000001
Darkness=1
Lightness=1
[Channel Mixer]
Enabled=false
Red=1000;0;0;
Green=0;1000;0;
Blue=0;0;1000;
[Black & White]
Enabled=false
Method=Desaturation
Auto=false
ComplementaryColors=true
Setting=RGB-Rel
Filter=None
MixerRed=33
MixerOrange=33
MixerYellow=33
MixerGreen=33
MixerCyan=33
MixerBlue=33
MixerMagenta=33
MixerPurple=33
GammaRed=0
GammaGreen=0
GammaBlue=0
Algorithm=SP
LuminanceCurve=0;
BeforeCurveMode=Standard
AfterCurveMode=Standard
BeforeCurve=0;
AfterCurve=0;
[Luminance Curve]
Enabled=false
Brightness=0
Contrast=0
Chromaticity=0
Gamutmunse=MUN
RedAndSkinTonesProtection=0
LCredsk=true
LCurve=0;
aCurve=0;
bCurve=0;
ccCurve=0;
chCurve=0;
lhCurve=0;
hhCurve=0;
LcCurve=0;
ClCurve=0;
[Sharpening]
Enabled=false
Contrast=30
Method=rld
Radius=0.5
BlurRadius=0.20000000000000001
Amount=200
Threshold=20;80;2000;1200;
OnlyEdges=false
EdgedetectionRadius=1.8999999999999999
EdgeTolerance=1800
HalocontrolEnabled=false
HalocontrolAmount=85
DeconvRadius=0.48999999999999999
DeconvAmount=100
DeconvDamping=0
DeconvIterations=30
[Vibrance]
Enabled=false
Pastels=40
Saturated=40
PSThreshold=0;75;
ProtectSkins=false
AvoidColorShift=true
PastSatTog=true
SkinTonesCurve=0;
[SharpenEdge]
Enabled=false
Passes=2
Strength=50
ThreeChannels=false
[SharpenMicro]
Enabled=false
Matrix=false
Strength=20
Contrast=20
Uniformity=5
[White Balance]
Enabled=false
Setting=Camera
Temperature=5149
Green=1.1970000000000001
Equal=1
TemperatureBias=0
StandardObserver=TWO_DEGREES
Itcwb_green=0
Itcwb_rangegreen=1
Itcwb_nopurple=false
Itcwb_alg=false
Itcwb_prim=beta
Itcwb_sampling=false
CompatibilityVersion=2
[Color appearance]
Enabled=false
Degree=90
AutoDegree=true
Degreeout=90
AutoDegreeout=true
Surround=Average
complex=normal
ModelCat=16
CatCat=clas
Surrsrc=Average
AdaptLum=16
Badpixsl=0
Model=RawT
Illum=i50
Algorithm=JC
J-Light=0
Q-Bright=0
C-Chroma=0
S-Chroma=0
M-Chroma=0
J-Contrast=0
Q-Contrast=0
H-Hue=0
RSTProtection=0
AdaptScene=2000
AutoAdapscen=true
YbScene=18
Autoybscen=true
SurrSource=false
Gamut=true
Tempout=5003
Autotempout=true
Greenout=1
Tempsc=5003
Greensc=1
Ybout=18
Datacie=false
Tonecie=false
CurveMode=Lightness
CurveMode2=Brightness
CurveMode3=Chroma
Curve=0;
Curve2=0;
Curve3=0;
[Impulse Denoising]
Enabled=false
Threshold=1
[Defringing]
Enabled=false
Radius=2
Threshold=13
HueCurve=1;0.16666666699999999;0;0.34999999999999998;0.34999999999999998;0.34699999999999998;0;0.34999999999999998;0.34999999999999998;0.51366742600000004;0;0.34999999999999998;0.34999999999999998;0.66894457100000004;0;0.34999999999999998;0.34999999999999998;0.82877752459999998;0.97835991;0.34999999999999998;0.34999999999999998;0.99088838270000001;0;0.34999999999999998;0.34999999999999998;
[Dehaze]
Enabled=false
Strength=50
ShowDepthMap=false
Depth=25
Saturation=50
[Directional Pyramid Denoising]
Enabled=false
Enhance=false
Median=false
Luma=48.390000000000001
Ldetail=21.890000000000001
Chroma=6.0999999999999996
Method=Lab
LMethod=SLI
CMethod=MAN
C2Method=MANU
SMethod=shal
MedMethod=soft
RGBMethod=soft
MethodMed=Lonly
Redchro=1.6000000000000001
Bluechro=-1.2
AutoGain=true
Gamma=1.7
Passes=1
LCurve=1;0.050000000000000003;0.14999999999999999;0.34999999999999998;0.34999999999999998;0.14055299539170502;0.39631336405529954;0.34999999999999998;0.34999999999999998;0.19815668202764983;0.096774193548387136;0.34999999999999998;0.34999999999999998;0.25345622119815681;0.91474654377880205;0.34999999999999998;0.34999999999999998;0.55000000000000004;0.040000000000000001;0.34999999999999998;0.34999999999999998;0.83410138248847965;0.73271889400921653;0.34999999999999998;0.34999999999999998;
CCCurve=0;
[EPD]
Enabled=false
Strength=0.5
Gamma=1
EdgeStopping=1.3999999999999999
Scale=1
ReweightingIterates=0
[FattalToneMapping]
Enabled=false
Threshold=30
Amount=20
Anchor=50
[Shadows & Highlights]
Enabled=false
Highlights=0
HighlightTonalWidth=70
Shadows=0
ShadowTonalWidth=30
Radius=40
Lab=false
[ToneEqualizer]
Enabled=false
Band0=0
Band1=0
Band2=0
Band3=0
Band4=0
Band5=0
Regularization=0
Pivot=0
[Crop]
Enabled=false
X=0
Y=0
W=4024
H=3016
FixedRatio=true
Ratio=As Image
Orientation=As Image
Guide=Frame
[Coarse Transformation]
Rotate=0
HorizontalFlip=false
VerticalFlip=false
[Common Properties for Transformations]
Method=log
AutoFill=true
[Rotation]
Degree=0
[Distortion]
Amount=0
[LensProfile]
LcMode=lfauto
LCPFile=
UseDistortion=true
UseVignette=false
UseCA=false
LFCameraMake=
LFCameraModel=
LFLens=
[Perspective]
Method=simple
Horizontal=0
Vertical=0
CameraCropFactor=0
CameraFocalLength=4.1900000000000004
CameraPitch=0
CameraRoll=0
CameraShiftHorizontal=0
CameraShiftVertical=0
CameraYaw=0
ProjectionShiftHorizontal=0
ProjectionPitch=0
ProjectionRotate=0
ProjectionShiftVertical=0
ProjectionYaw=0
ControlLineValues=
ControlLineTypes=
[Gradient]
Enabled=false
Degree=0
Feather=25
Strength=0.59999999999999998
CenterX=0
CenterY=0
[Locallab]
Enabled=false
Selspot=0
[PCVignette]
Enabled=false
Strength=0.59999999999999998
Feather=50
Roundness=50
[CACorrection]
Red=0
Blue=0
[Vignetting Correction]
Amount=0
Radius=50
Strength=1
CenterX=0
CenterY=0
[Resize]
Enabled=false
Scale=0.22
AppliesTo=Cropped area
Method=Lanczos
DataSpecified=3
Width=900
Height=900
LongEdge=900
ShortEdge=900
AllowUpscaling=false
[PostDemosaicSharpening]
Enabled=false
Contrast=17
AutoContrast=true
AutoRadius=true
DeconvRadius=0.59999999999999998
DeconvRadiusOffset=0
DeconvIterCheck=true
DeconvIterations=20
[PostResizeSharpening]
Enabled=false
Contrast=15
Method=rld
Radius=0.5
Amount=200
Threshold=20;80;2000;1200;
OnlyEdges=false
EdgedetectionRadius=1.8999999999999999
EdgeTolerance=1800
HalocontrolEnabled=false
HalocontrolAmount=85
DeconvRadius=0.45000000000000001
DeconvAmount=100
DeconvDamping=0
DeconvIterations=100
[Color Management]
InputProfile=(camera)
ToneCurve=false
ApplyLookTable=true
ApplyBaselineExposureOffset=true
ApplyHueSatMap=true
DCPIlluminant=0
WorkingProfile=ProPhoto
WorkingTRC=none
Will=D50
Wprim=def
Wcat=brad
WorkingTRCGamma=2.3999999999999999
WorkingTRCSlope=12.92
Wmidtcie=0
Wsmoothcie=false
Redx=0.73470000000000002
Redy=0.26529999999999998
Grex=0.15959999999999999
Grey=0.84040000000000004
Blux=0.036600000000000001
Bluy=0.0001
Refi=0
Shiftx=0
Shifty=0
LabGridcieALow=0.51763000000000003
LabGridcieBLow=-0.33582000000000001
LabGridcieAHigh=-0.75163000000000002
LabGridcieBHigh=-0.81799999999999995
LabGridcieGx=-0.69164000000000003
LabGridcieGy=-0.70909
LabGridcieWx=-0.18964
LabGridcieWy=-0.16636000000000001
LabGridcieMx=0
LabGridcieMy=0
Preser=0
Fbw=false
TrcExp=false
Gamut=false
OutputProfile=RTv4_sRGB
aIntent=Relative
OutputProfileIntent=Relative
OutputBPC=true
[Wavelet]
Enabled=false
Strength=100
Balance=0
Sigmafin=1
Sigmaton=1
Sigmacol=1
Sigmadir=1
Rangeab=20
Protab=0
Iter=0
MaxLev=7
TilesMethod=full
complexMethod=normal
mixMethod=mix7
sliMethod=sli
quaMethod=cons
DaubMethod=4_
ChoiceLevMethod=all
BackMethod=grey
LevMethod=4
DirMethod=all
CBgreenhigh=0
CBgreenmed=0
CBgreenlow=0
CBbluehigh=0
CBbluemed=0
CBbluelow=0
Ballum=7
Sigm=1
Levden=0
Thrden=0
Limden=0
Balchrom=0
Chromfine=0
Chromcoarse=0
MergeL=20
MergeC=20
Softrad=0
Softradend=0
Strend=50
Detend=0
Thrend=0
Expcontrast=false
Expchroma=false
Expedge=false
expbl=false
Expresid=false
Expfinal=false
Exptoning=false
Expnoise=false
Expclari=false
LabGridALow=0
LabGridBLow=0
LabGridAHigh=0
LabGridBHigh=0
Contrast1=0
Contrast2=0
Contrast3=0
Contrast4=0
Contrast5=0
Contrast6=0
Contrast7=0
Contrast8=0
Contrast9=0
Chroma1=0
Chroma2=0
Chroma3=0
Chroma4=0
Chroma5=0
Chroma6=0
Chroma7=0
Chroma8=0
Chroma9=0
ContExtra=0
HSMethod=with
HLRange=50;75;100;98;
SHRange=0;2;50;25;
Edgcont=0;10;75;40;
Level0noise=0;0;
Level1noise=0;0;
Level2noise=0;0;
Level3noise=0;0;
Leveldenoise=0;0;
Levelsigm=1;1;
ThresholdHighlight=4
ThresholdShadow=5
Edgedetect=90
Edgedetectthr=20
EdgedetectthrHi=0
Edgesensi=60
Edgeampli=10
ThresholdChroma=5
CHromaMethod=link
Medgreinf=less
Ushamethod=clari
CHSLromaMethod=SL
EDMethod=CU
NPMethod=none
BAMethod=none
TMMethod=cont
ChromaLink=0
ContrastCurve=1;0;0.25;0.34999999999999998;0.34999999999999998;0.5;0.75;0.34999999999999998;0.34999999999999998;0.90000000000000002;0;0.34999999999999998;0.34999999999999998;
blcurve=1;0;0;0;0.34999999999999998;0.5;0;0.34999999999999998;0.34999999999999998;1;0;0.34999999999999998;0.34999999999999998;
Pastlev=0;2;30;20;
Satlev=30;45;130;100;
OpacityCurveRG=1;0;0.5;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
OpacityCurveBY=1;0;0.5;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
wavdenoise=1;0;1;0.34999999999999998;0.34999999999999998;0.5;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
wavdenoiseh=1;0;1;0.34999999999999998;0.34999999999999998;0.5;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
OpacityCurveW=1;0;0.34999999999999998;0.34999999999999998;0;0.34999999999999998;0.75;0.34999999999999998;0.34999999999999998;0.59999999999999998;0.75;0.34999999999999998;0.34999999999999998;1;0.34999999999999998;0;0;
OpacityCurveWL=1;0;0.5;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
HHcurve=0;
Wavguidcurve=0;
Wavhuecurve=0;
CHcurve=0;
WavclCurve=0;
Median=false
Medianlev=false
Linkedg=false
CBenab=false
Lipst=false
Skinprotect=0
chrwav=0
bluwav=1
Hueskin=-5;25;170;120;
Edgrad=15
Edgeffect=1
Edgval=0
ThrEdg=10
AvoidColorShift=false
Showmask=false
Oldsh=true
TMr=false
Sigma=1
Offset=1
Lowthr=40
ResidualcontShadow=0
ResidualcontHighlight=0
ThresholdResidShadow=30
ThresholdResidHighLight=70
Residualradius=40
Residualchroma=0
Residualblur=0
Residualblurc=0
ResidualTM=0
ResidualEDGS=1.3999999999999999
ResidualSCALE=1
Residualgamma=1
HueRangeResidual=0
HueRange=-260;-250;-130;-140;
Contrast=0
[Spot removal]
Enabled=false
[Directional Pyramid Equalizer]
Enabled=false
Gamutlab=false
cbdlMethod=bef
Mult0=1
Mult1=1
Mult2=1
Mult3=1
Mult4=1
Mult5=1
Threshold=0.20000000000000001
Skinprotect=0
Hueskin=-5;25;170;120;
[HSV Equalizer]
Enabled=false
HCurve=0;
SCurve=0;
VCurve=0;
[SoftLight]
Enabled=false
Strength=30
[Film Simulation]
Enabled=false
ClutFilename=
Strength=100
[RGB Curves]
Enabled=false
LumaMode=false
rCurve=0;
gCurve=0;
bCurve=0;
[ColorToning]
Enabled=false
Method=LabRegions
Lumamode=true
Twocolor=Std
Redlow=0
Greenlow=0
Bluelow=0
Satlow=0
Balance=0
Sathigh=0
Redmed=0
Greenmed=0
Bluemed=0
Redhigh=0
Greenhigh=0
Bluehigh=0
Autosat=true
OpacityCurve=1;0;0.29999999999999999;0.34999999999999998;0;0.25;0.80000000000000004;0.34999999999999998;0.34999999999999998;0.69999999999999996;0.80000000000000004;0.34999999999999998;0.34999999999999998;1;0.29999999999999999;0;0;
ColorCurve=1;0.050000000000000003;0.62;0.25;0.25;0.58499999999999996;0.11;0.25;0.25;
SatProtectionThreshold=30
SaturatedOpacity=80
Strength=50
HighlightsColorSaturation=60;80;
ShadowsColorSaturation=80;208;
ClCurve=3;0;0;0.34999999999999998;0.65000000000000002;1;1;
Cl2Curve=3;0;0;0.34999999999999998;0.65000000000000002;1;1;
LabGridALow=0
LabGridBLow=0
LabGridAHigh=0
LabGridBHigh=0
LabRegionA_1=0
LabRegionB_1=0
LabRegionSaturation_1=0
LabRegionSlope_1=1
LabRegionOffset_1=0
LabRegionPower_1=1
LabRegionHueMask_1=1;0.16666666699999999;1;0.34999999999999998;0.34999999999999998;0.82877752459999998;1;0.34999999999999998;0.34999999999999998;
LabRegionChromaticityMask_1=1;0;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
LabRegionLightnessMask_1=1;0;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
LabRegionMaskBlur_1=0
LabRegionChannel_1=-1
LabRegionsShowMask=-1
[RAW]
DarkFrame=/szeva
DarkFrameAuto=false
FlatFieldFile=/szeva
FlatFieldAutoSelect=false
FlatFieldFromMetaData=false
FlatFieldBlurRadius=32
FlatFieldBlurType=Area Flatfield
FlatFieldAutoClipControl=false
FlatFieldClipControl=0
CA=false
CAAvoidColourshift=true
CAAutoIterations=2
CARed=0
CABlue=0
HotPixelFilter=false
DeadPixelFilter=false
HotDeadPixelThresh=100
PreExposure=1
[RAW Bayer]
Method=igv
Border=4
ImageNum=1
CcSteps=0
PreBlack0=0
PreBlack1=0
PreBlack2=0
PreBlack3=0
PreTwoGreen=true
LineDenoise=0
LineDenoiseDirection=3
GreenEqThreshold=0
DCBIterations=2
DCBEnhance=true
LMMSEIterations=3
DualDemosaicAutoContrast=true
DualDemosaicContrast=16
PixelShiftMotionCorrectionMethod=1
PixelShiftEperIso=0
PixelShiftSigma=1
PixelShiftShowMotion=false
PixelShiftShowMotionMaskOnly=false
pixelShiftHoleFill=true
pixelShiftAverage=false
pixelShiftMedian=false
pixelShiftGreen=true
pixelShiftBlur=true
pixelShiftSmoothFactor=0.69999999999999996
pixelShiftEqualBright=false
pixelShiftEqualBrightChannel=false
pixelShiftNonGreenCross=true
pixelShiftDemosaicMethod=amaze
PDAFLinesFilter=false
[RAW X-Trans]
Method=3-pass (best)
DualDemosaicAutoContrast=true
DualDemosaicContrast=20
Border=7
CcSteps=0
PreBlackRed=0
PreBlackGreen=0
PreBlackBlue=0
[MetaData]
Mode=1
ExifKeys=Exif.Image.Artist;Exif.Image.Copyright;Exif.Image.ImageDescription;Exif.Image.Make;Exif.Image.Model;Exif.Image.XResolution;Exif.Image.YResolution;Exif.Photo.DateTimeOriginal;Exif.Photo.ExposureBiasValue;Exif.Photo.ExposureTime;Exif.Photo.FNumber;Exif.Photo.Flash;Exif.Photo.FocalLength;Exif.Photo.ISOSpeedRatings;Exif.Photo.LensModel;Exif.Photo.UserComment;
[Film Negative]
Enabled=false
RedRatio=1.3600000000000001
GreenExponent=1.5
BlueRatio=0.85999999999999999
ColorSpace=1
RefInput=0;0;0;
RefOutput=2739.961181640625;2740.07568359375;2739.944580078125;
[RAW Preprocess WB]
Mode=1

1164
test_burst1/im_00.dng.xmp Normal file

File diff suppressed because it is too large Load Diff

BIN
test_burst1/im_01.dng Normal file

Binary file not shown.

783
test_burst1/im_01.dng.pp3 Normal file
View File

@ -0,0 +1,783 @@
[Version]
AppVersion=5.11
Version=351
[General]
ColorLabel=0
InTrash=false
[Exposure]
Auto=false
Clip=0.02
Compensation=0
Brightness=0
Contrast=0
Saturation=0
Black=0
HighlightCompr=0
HighlightComprThreshold=0
ShadowCompr=50
HistogramMatching=true
CurveFromHistogramMatching=true
ClampOOG=true
CurveMode=FilmLike
CurveMode2=Standard
Curve=0;
Curve2=0;
[HLRecovery]
Enabled=true
Method=Coloropp
Hlbl=0
Hlth=1
[Retinex]
Enabled=false
Str=20
Scal=3
Iter=1
Grad=1
Grads=1
Gam=1.3
Slope=3
Median=false
Neigh=80
Offs=0
Vart=200
Limd=8
highl=4
skal=3
complexMethod=normal
RetinexMethod=high
mapMethod=none
viewMethod=none
Retinexcolorspace=Lab
Gammaretinex=none
CDCurve=0;
MAPCurve=0;
CDHCurve=0;
LHCurve=0;
Highlights=0
HighlightTonalWidth=80
Shadows=0
ShadowTonalWidth=80
Radius=40
TransmissionCurve=1;0;0.5;0.34999999999999998;0.34999999999999998;0.59999999999999998;0.75;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
GainTransmissionCurve=1;0;0.10000000000000001;0.34999999999999998;0;0.25;0.25;0.34999999999999998;0.34999999999999998;0.69999999999999996;0.25;0.34999999999999998;0.34999999999999998;1;0.10000000000000001;0;0;
[Local Contrast]
Enabled=false
Radius=80
Amount=0.20000000000000001
Darkness=1
Lightness=1
[Channel Mixer]
Enabled=false
Red=1000;0;0;
Green=0;1000;0;
Blue=0;0;1000;
[Black & White]
Enabled=false
Method=Desaturation
Auto=false
ComplementaryColors=true
Setting=RGB-Rel
Filter=None
MixerRed=33
MixerOrange=33
MixerYellow=33
MixerGreen=33
MixerCyan=33
MixerBlue=33
MixerMagenta=33
MixerPurple=33
GammaRed=0
GammaGreen=0
GammaBlue=0
Algorithm=SP
LuminanceCurve=0;
BeforeCurveMode=Standard
AfterCurveMode=Standard
BeforeCurve=0;
AfterCurve=0;
[Luminance Curve]
Enabled=false
Brightness=0
Contrast=0
Chromaticity=0
Gamutmunse=MUN
RedAndSkinTonesProtection=0
LCredsk=true
LCurve=0;
aCurve=0;
bCurve=0;
ccCurve=0;
chCurve=0;
lhCurve=0;
hhCurve=0;
LcCurve=0;
ClCurve=0;
[Sharpening]
Enabled=false
Contrast=20
Method=usm
Radius=0.5
BlurRadius=0.20000000000000001
Amount=200
Threshold=20;80;2000;1200;
OnlyEdges=false
EdgedetectionRadius=1.8999999999999999
EdgeTolerance=1800
HalocontrolEnabled=false
HalocontrolAmount=85
DeconvRadius=0.75
DeconvAmount=100
DeconvDamping=0
DeconvIterations=30
[Vibrance]
Enabled=false
Pastels=0
Saturated=0
PSThreshold=0;75;
ProtectSkins=false
AvoidColorShift=true
PastSatTog=true
SkinTonesCurve=0;
[SharpenEdge]
Enabled=false
Passes=2
Strength=50
ThreeChannels=false
[SharpenMicro]
Enabled=false
Matrix=false
Strength=20
Contrast=20
Uniformity=5
[White Balance]
Enabled=true
Setting=Camera
Temperature=5149
Green=1.1969842088453122
Equal=1
TemperatureBias=0
StandardObserver=TWO_DEGREES
Itcwb_green=0
Itcwb_rangegreen=1
Itcwb_nopurple=false
Itcwb_alg=false
Itcwb_prim=beta
Itcwb_sampling=false
CompatibilityVersion=2
[Color appearance]
Enabled=false
Degree=90
AutoDegree=true
Degreeout=90
AutoDegreeout=true
Surround=Average
complex=normal
ModelCat=16
CatCat=clas
Surrsrc=Average
AdaptLum=16
Badpixsl=0
Model=RawT
Illum=i50
Algorithm=No
J-Light=0
Q-Bright=0
C-Chroma=0
S-Chroma=0
M-Chroma=0
J-Contrast=0
Q-Contrast=0
H-Hue=0
RSTProtection=0
AdaptScene=2000
AutoAdapscen=true
YbScene=18
Autoybscen=true
SurrSource=false
Gamut=true
Tempout=5003
Autotempout=true
Greenout=1
Tempsc=5003
Greensc=1
Ybout=18
Datacie=false
Tonecie=false
CurveMode=Lightness
CurveMode2=Brightness
CurveMode3=Chroma
Curve=0;
Curve2=0;
Curve3=0;
[Impulse Denoising]
Enabled=false
Threshold=50
[Defringing]
Enabled=false
Radius=2
Threshold=13
HueCurve=1;0.16666666699999999;0;0.34999999999999998;0.34999999999999998;0.34699999999999998;0;0.34999999999999998;0.34999999999999998;0.51366742600000004;0;0.34999999999999998;0.34999999999999998;0.66894457100000004;0;0.34999999999999998;0.34999999999999998;0.82877752459999998;0.97835991;0.34999999999999998;0.34999999999999998;0.99088838270000001;0;0.34999999999999998;0.34999999999999998;
[Dehaze]
Enabled=false
Strength=50
ShowDepthMap=false
Depth=25
Saturation=50
[Directional Pyramid Denoising]
Enabled=false
Enhance=false
Median=false
Luma=0
Ldetail=0
Chroma=15
Method=Lab
LMethod=SLI
CMethod=MAN
C2Method=AUTO
SMethod=shal
MedMethod=soft
RGBMethod=soft
MethodMed=none
Redchro=0
Bluechro=0
AutoGain=true
Gamma=1.7
Passes=1
LCurve=1;0.050000000000000003;0.14999999999999999;0.34999999999999998;0.34999999999999998;0.55000000000000004;0.040000000000000001;0.34999999999999998;0.34999999999999998;
CCCurve=1;0.050000000000000003;0.5;0.34999999999999998;0.34999999999999998;0.34999999999999998;0.050000000000000003;0.34999999999999998;0.34999999999999998;
[EPD]
Enabled=false
Strength=0.5
Gamma=1
EdgeStopping=1.3999999999999999
Scale=1
ReweightingIterates=0
[FattalToneMapping]
Enabled=false
Threshold=30
Amount=20
Anchor=50
[Shadows & Highlights]
Enabled=false
Highlights=0
HighlightTonalWidth=70
Shadows=0
ShadowTonalWidth=30
Radius=40
Lab=false
[ToneEqualizer]
Enabled=false
Band0=0
Band1=0
Band2=0
Band3=0
Band4=0
Band5=0
Regularization=0
Pivot=0
[Crop]
Enabled=false
X=-1
Y=-1
W=4025
H=3017
FixedRatio=true
Ratio=As Image
Orientation=As Image
Guide=Frame
[Coarse Transformation]
Rotate=0
HorizontalFlip=false
VerticalFlip=false
[Common Properties for Transformations]
Method=log
AutoFill=true
[Rotation]
Degree=0
[Distortion]
Amount=0
[LensProfile]
LcMode=lfauto
LCPFile=
UseDistortion=true
UseVignette=true
UseCA=false
LFCameraMake=
LFCameraModel=
LFLens=
[Perspective]
Method=simple
Horizontal=0
Vertical=0
CameraCropFactor=0
CameraFocalLength=0
CameraPitch=0
CameraRoll=0
CameraShiftHorizontal=0
CameraShiftVertical=0
CameraYaw=0
ProjectionShiftHorizontal=0
ProjectionPitch=0
ProjectionRotate=0
ProjectionShiftVertical=0
ProjectionYaw=0
ControlLineValues=
ControlLineTypes=
[Gradient]
Enabled=false
Degree=0
Feather=25
Strength=0.59999999999999998
CenterX=0
CenterY=0
[Locallab]
Enabled=false
Selspot=0
[PCVignette]
Enabled=false
Strength=0.59999999999999998
Feather=50
Roundness=50
[CACorrection]
Red=0
Blue=0
[Vignetting Correction]
Amount=0
Radius=50
Strength=1
CenterX=0
CenterY=0
[Resize]
Enabled=false
Scale=1
AppliesTo=Cropped area
Method=Lanczos
DataSpecified=3
Width=900
Height=900
LongEdge=900
ShortEdge=900
AllowUpscaling=false
[PostDemosaicSharpening]
Enabled=true
Contrast=10
AutoContrast=true
AutoRadius=true
DeconvRadius=0.75
DeconvRadiusOffset=0
DeconvIterCheck=true
DeconvIterations=20
[PostResizeSharpening]
Enabled=false
Contrast=15
Method=rld
Radius=0.5
Amount=200
Threshold=20;80;2000;1200;
OnlyEdges=false
EdgedetectionRadius=1.8999999999999999
EdgeTolerance=1800
HalocontrolEnabled=false
HalocontrolAmount=85
DeconvRadius=0.45000000000000001
DeconvAmount=100
DeconvDamping=0
DeconvIterations=100
[Color Management]
InputProfile=(cameraICC)
ToneCurve=false
ApplyLookTable=true
ApplyBaselineExposureOffset=true
ApplyHueSatMap=true
DCPIlluminant=0
WorkingProfile=ProPhoto
WorkingTRC=none
Will=def
Wprim=def
Wcat=brad
WorkingTRCGamma=2.3999999999999999
WorkingTRCSlope=12.92
Wmidtcie=0
Wsmoothcie=false
Redx=0.73470000000000002
Redy=0.26529999999999998
Grex=0.15959999999999999
Grey=0.84040000000000004
Blux=0.036600000000000001
Bluy=0.0001
Refi=0
Shiftx=0
Shifty=0
LabGridcieALow=0.51763000000000003
LabGridcieBLow=-0.33582000000000001
LabGridcieAHigh=-0.75163000000000002
LabGridcieBHigh=-0.81799999999999995
LabGridcieGx=-0.69164000000000003
LabGridcieGy=-0.70909
LabGridcieWx=-0.18964
LabGridcieWy=-0.16636000000000001
LabGridcieMx=0
LabGridcieMy=0
Preser=0
Fbw=false
TrcExp=false
Gamut=true
OutputProfile=RTv4_sRGB
aIntent=Relative
OutputProfileIntent=Relative
OutputBPC=true
[Wavelet]
Enabled=false
Strength=100
Balance=0
Sigmafin=1
Sigmaton=1
Sigmacol=1
Sigmadir=1
Rangeab=20
Protab=0
Iter=0
MaxLev=7
TilesMethod=full
complexMethod=normal
mixMethod=mix
sliMethod=sli
quaMethod=cons
DaubMethod=4_
ChoiceLevMethod=all
BackMethod=grey
LevMethod=4
DirMethod=all
CBgreenhigh=0
CBgreenmed=0
CBgreenlow=0
CBbluehigh=0
CBbluemed=0
CBbluelow=0
Ballum=7
Sigm=1
Levden=0
Thrden=0
Limden=0
Balchrom=0
Chromfine=0
Chromcoarse=0
MergeL=20
MergeC=20
Softrad=0
Softradend=0
Strend=50
Detend=0
Thrend=0
Expcontrast=false
Expchroma=false
Expedge=false
expbl=false
Expresid=false
Expfinal=false
Exptoning=false
Expnoise=false
Expclari=false
LabGridALow=0
LabGridBLow=0
LabGridAHigh=0
LabGridBHigh=0
Contrast1=0
Contrast2=0
Contrast3=0
Contrast4=0
Contrast5=0
Contrast6=0
Contrast7=0
Contrast8=0
Contrast9=0
Chroma1=0
Chroma2=0
Chroma3=0
Chroma4=0
Chroma5=0
Chroma6=0
Chroma7=0
Chroma8=0
Chroma9=0
ContExtra=0
HSMethod=with
HLRange=50;75;100;98;
SHRange=0;2;50;25;
Edgcont=0;10;75;40;
Level0noise=0;0;
Level1noise=0;0;
Level2noise=0;0;
Level3noise=0;0;
Leveldenoise=0;0;
Levelsigm=1;1;
ThresholdHighlight=4
ThresholdShadow=5
Edgedetect=90
Edgedetectthr=20
EdgedetectthrHi=0
Edgesensi=60
Edgeampli=10
ThresholdChroma=5
CHromaMethod=without
Medgreinf=less
Ushamethod=clari
CHSLromaMethod=SL
EDMethod=CU
NPMethod=none
BAMethod=none
TMMethod=cont
ChromaLink=0
ContrastCurve=1;0;0.25;0.34999999999999998;0.34999999999999998;0.5;0.75;0.34999999999999998;0.34999999999999998;0.90000000000000002;0;0.34999999999999998;0.34999999999999998;
blcurve=1;0;0;0;0.34999999999999998;0.5;0;0.34999999999999998;0.34999999999999998;1;0;0.34999999999999998;0.34999999999999998;
Pastlev=0;2;30;20;
Satlev=30;45;130;100;
OpacityCurveRG=1;0;0.5;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
OpacityCurveBY=1;0;0.5;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
wavdenoise=1;0;1;0.34999999999999998;0.34999999999999998;0.5;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
wavdenoiseh=1;0;1;0.34999999999999998;0.34999999999999998;0.5;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
OpacityCurveW=1;0;0.34999999999999998;0.34999999999999998;0;0.34999999999999998;0.75;0.34999999999999998;0.34999999999999998;0.59999999999999998;0.75;0.34999999999999998;0.34999999999999998;1;0.34999999999999998;0;0;
OpacityCurveWL=1;0;0.5;0.34999999999999998;0.34999999999999998;1;0.5;0.34999999999999998;0.34999999999999998;
HHcurve=0;
Wavguidcurve=0;
Wavhuecurve=0;
CHcurve=0;
WavclCurve=0;
Median=false
Medianlev=false
Linkedg=false
CBenab=false
Lipst=false
Skinprotect=0
chrwav=0
bluwav=1
Hueskin=-5;25;170;120;
Edgrad=15
Edgeffect=1
Edgval=0
ThrEdg=10
AvoidColorShift=false
Showmask=false
Oldsh=true
TMr=false
Sigma=1
Offset=1
Lowthr=40
ResidualcontShadow=0
ResidualcontHighlight=0
ThresholdResidShadow=30
ThresholdResidHighLight=70
Residualradius=40
Residualchroma=0
Residualblur=0
Residualblurc=0
ResidualTM=0
ResidualEDGS=1.3999999999999999
ResidualSCALE=1
Residualgamma=1
HueRangeResidual=0
HueRange=-260;-250;-130;-140;
Contrast=0
[Spot removal]
Enabled=false
[Directional Pyramid Equalizer]
Enabled=false
Gamutlab=false
cbdlMethod=bef
Mult0=1
Mult1=1
Mult2=1
Mult3=1
Mult4=1
Mult5=1
Threshold=0.20000000000000001
Skinprotect=0
Hueskin=-5;25;170;120;
[HSV Equalizer]
Enabled=false
HCurve=0;
SCurve=0;
VCurve=0;
[SoftLight]
Enabled=false
Strength=30
[Film Simulation]
Enabled=false
ClutFilename=
Strength=100
[RGB Curves]
Enabled=false
LumaMode=false
rCurve=0;
gCurve=0;
bCurve=0;
[ColorToning]
Enabled=false
Method=LabRegions
Lumamode=true
Twocolor=Std
Redlow=0
Greenlow=0
Bluelow=0
Satlow=0
Balance=0
Sathigh=0
Redmed=0
Greenmed=0
Bluemed=0
Redhigh=0
Greenhigh=0
Bluehigh=0
Autosat=true
OpacityCurve=1;0;0.29999999999999999;0.34999999999999998;0;0.25;0.80000000000000004;0.34999999999999998;0.34999999999999998;0.69999999999999996;0.80000000000000004;0.34999999999999998;0.34999999999999998;1;0.29999999999999999;0;0;
ColorCurve=1;0.050000000000000003;0.62;0.25;0.25;0.58499999999999996;0.11;0.25;0.25;
SatProtectionThreshold=30
SaturatedOpacity=80
Strength=50
HighlightsColorSaturation=60;80;
ShadowsColorSaturation=80;208;
ClCurve=3;0;0;0.34999999999999998;0.65000000000000002;1;1;
Cl2Curve=3;0;0;0.34999999999999998;0.65000000000000002;1;1;
LabGridALow=0
LabGridBLow=0
LabGridAHigh=0
LabGridBHigh=0
LabRegionA_1=0
LabRegionB_1=0
LabRegionSaturation_1=0
LabRegionSlope_1=1
LabRegionOffset_1=0
LabRegionPower_1=1
LabRegionHueMask_1=1;0.16666666699999999;1;0.34999999999999998;0.34999999999999998;0.82877752459999998;1;0.34999999999999998;0.34999999999999998;
LabRegionChromaticityMask_1=1;0;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
LabRegionLightnessMask_1=1;0;1;0.34999999999999998;0.34999999999999998;1;1;0.34999999999999998;0.34999999999999998;
LabRegionMaskBlur_1=0
LabRegionChannel_1=-1
LabRegionsShowMask=-1
[RAW]
DarkFrame=
DarkFrameAuto=false
FlatFieldFile=
FlatFieldAutoSelect=false
FlatFieldFromMetaData=false
FlatFieldBlurRadius=32
FlatFieldBlurType=Area Flatfield
FlatFieldAutoClipControl=false
FlatFieldClipControl=0
CA=true
CAAvoidColourshift=true
CAAutoIterations=2
CARed=0
CABlue=0
HotPixelFilter=false
DeadPixelFilter=false
HotDeadPixelThresh=100
PreExposure=1
[RAW Bayer]
Method=amaze
Border=4
ImageNum=1
CcSteps=0
PreBlack0=0
PreBlack1=0
PreBlack2=0
PreBlack3=0
PreTwoGreen=true
LineDenoise=0
LineDenoiseDirection=3
GreenEqThreshold=0
DCBIterations=2
DCBEnhance=true
LMMSEIterations=2
DualDemosaicAutoContrast=true
DualDemosaicContrast=20
PixelShiftMotionCorrectionMethod=1
PixelShiftEperIso=0
PixelShiftSigma=1
PixelShiftShowMotion=false
PixelShiftShowMotionMaskOnly=false
pixelShiftHoleFill=true
pixelShiftAverage=false
pixelShiftMedian=false
pixelShiftGreen=true
pixelShiftBlur=true
pixelShiftSmoothFactor=0.69999999999999996
pixelShiftEqualBright=false
pixelShiftEqualBrightChannel=false
pixelShiftNonGreenCross=true
pixelShiftDemosaicMethod=amaze
PDAFLinesFilter=false
[RAW X-Trans]
Method=3-pass (best)
DualDemosaicAutoContrast=true
DualDemosaicContrast=20
Border=7
CcSteps=0
PreBlackRed=0
PreBlackGreen=0
PreBlackBlue=0
[MetaData]
Mode=1
ExifKeys=Exif.Image.Copyright;Exif.Image.Artist;Exif.Image.ImageDescription;Exif.Photo.UserComment;Exif.Image.Make;Exif.Image.Model;Exif.Photo.LensModel;Exif.Photo.FNumber;Exif.Photo.ExposureTime;Exif.Photo.FocalLength;Exif.Photo.ISOSpeedRatings;Exif.Photo.ExposureBiasValue;Exif.Photo.Flash;Exif.Photo.DateTimeOriginal;Exif.Image.XResolution;Exif.Image.YResolution;
[Film Negative]
Enabled=false
RedRatio=1.3600000000000001
GreenExponent=1.5
BlueRatio=0.85999999999999999
ColorSpace=1
RefInput=0;0;0;
RefOutput=0;0;0;
[RAW Preprocess WB]
Mode=1

BIN
test_burst1/im_02.dng Normal file

Binary file not shown.

BIN
test_burst1/im_03.dng Normal file

Binary file not shown.

BIN
test_burst1/im_04.dng Normal file

Binary file not shown.

BIN
test_burst1/im_05.dng Normal file

Binary file not shown.

BIN
test_burst1/im_06.dng Normal file

Binary file not shown.

BIN
test_burst1/im_07.dng Normal file

Binary file not shown.

BIN
test_burst1/im_08.dng Normal file

Binary file not shown.

BIN
test_burst1/im_09.dng Normal file

Binary file not shown.

BIN
test_burst1/im_10.dng Normal file

Binary file not shown.

BIN
test_burst1/im_11.dng Normal file

Binary file not shown.

BIN
test_burst1/im_12.dng Normal file

Binary file not shown.

BIN
test_burst1/im_13.dng Normal file

Binary file not shown.

1
test_burst1/readme.txt Normal file
View File

@ -0,0 +1 @@
Download and unzip the test burst of 13 images taken with the Samsung S8 camera at https://drive.google.com/file/d/1ot0E6guY5AacM-I6-GffHqFzykVb22wV/view?usp=share_link

BIN
x.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 MiB