From aa019b802dc55341437f9b2d0ab376d2ffceda1e Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 11 May 2023 02:31:19 -0700 Subject: [PATCH 1/5] `fn mask_edged_inter::txa`: Align. --- src/lf_mask.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lf_mask.rs b/src/lf_mask.rs index ea6826719..829e75e4d 100644 --- a/src/lf_mask.rs +++ b/src/lf_mask.rs @@ -60,6 +60,9 @@ use crate::include::common::intops::iclip; use crate::include::common::intops::imax; use crate::include::common::intops::imin; use crate::src::tables::TxfmInfo; + +use crate::src::align::Align16; + unsafe extern "C" fn decomp_tx( txa: *mut [[[uint8_t; 32]; 32]; 2], from: RectTxfmSize, @@ -324,7 +327,7 @@ unsafe extern "C" fn mask_edges_inter( &*dav1d_txfm_dimensions.as_ptr().offset(max_tx as isize) as *const TxfmInfo; let mut y = 0; let mut x = 0; - let mut txa: [[[[uint8_t; 32]; 32]; 2]; 2] = [[[[0; 32]; 32]; 2]; 2]; + let mut txa: Align16<[[[[uint8_t; 32]; 32]; 2]; 2]> = Align16([[[[0; 32]; 32]; 2]; 2]); let mut y_off = 0; let mut y_0 = 0; while y_0 < h4 { @@ -332,7 +335,7 @@ unsafe extern "C" fn mask_edges_inter( let mut x_0 = 0; while x_0 < w4 { decomp_tx( - &mut *(*(*(*txa.as_mut_ptr().offset(0)).as_mut_ptr().offset(0)) + &mut *(*(*(*txa.0.as_mut_ptr().offset(0)).as_mut_ptr().offset(0)) .as_mut_ptr() .offset(y_0 as isize)) .as_mut_ptr() From 2b1b26aaa5e694120c4928f8ab864bdb51c5b4e9 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 11 May 2023 13:43:08 -0700 Subject: [PATCH 2/5] `fn dav1d_cdef_brow_16bpc::lr_bak`: Align. --- src/cdef_apply_tmpl_16.rs | 2 +- src/cdef_apply_tmpl_8.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cdef_apply_tmpl_16.rs b/src/cdef_apply_tmpl_16.rs index b7839718e..725ef9a00 100644 --- a/src/cdef_apply_tmpl_16.rs +++ b/src/cdef_apply_tmpl_16.rs @@ -929,7 +929,7 @@ pub unsafe extern "C" fn dav1d_cdef_brow_16bpc( layout, ); } - let mut lr_bak: [[[[pixel; 2]; 8]; 3]; 2] = [[[[0; 2]; 8]; 3]; 2]; + let mut lr_bak: Align16<[[[[pixel; 2]; 8]; 3]; 2]> = Align16([[[[0; 2]; 8]; 3]; 2]); let mut iptrs: [*mut pixel; 3] = [ptrs[0], ptrs[1], ptrs[2]]; edges = ::core::mem::transmute::( edges as libc::c_uint & !(CDEF_HAVE_LEFT as libc::c_int) as libc::c_uint, diff --git a/src/cdef_apply_tmpl_8.rs b/src/cdef_apply_tmpl_8.rs index eb2e077f7..9d72d7a10 100644 --- a/src/cdef_apply_tmpl_8.rs +++ b/src/cdef_apply_tmpl_8.rs @@ -902,7 +902,7 @@ pub unsafe extern "C" fn dav1d_cdef_brow_8bpc( layout, ); } - let mut lr_bak: [[[[pixel; 2]; 8]; 3]; 2] = [[[[0; 2]; 8]; 3]; 2]; + let mut lr_bak: Align16<[[[[pixel; 2]; 8]; 3]; 2]> = Align16([[[[0; 2]; 8]; 3]; 2]); let mut iptrs: [*mut pixel; 3] = [ptrs[0], ptrs[1], ptrs[2]]; edges = ::core::mem::transmute::( edges as libc::c_uint & !(CDEF_HAVE_LEFT as libc::c_int) as libc::c_uint, From edf8d9b3d95c55209007ac658b14b09e3800319b Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 11 May 2023 13:59:33 -0700 Subject: [PATCH 3/5] `fn dav1d_apply_grain::scaling`: `cfg` alignment based on bitdepth and arch. `Align1` is used when alignment wasn't specified in C to avoid have to `cfg` the uses of `scaling`. --- src/fg_apply_tmpl_16.rs | 4 ++-- src/fg_apply_tmpl_8.rs | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/fg_apply_tmpl_16.rs b/src/fg_apply_tmpl_16.rs index aaba86941..60278a5ed 100644 --- a/src/fg_apply_tmpl_16.rs +++ b/src/fg_apply_tmpl_16.rs @@ -1,6 +1,6 @@ use crate::include::stddef::*; use crate::include::stdint::*; -use crate::src::align::{Align16, Align64}; +use crate::src::align::{Align1, Align16}; use ::libc; extern "C" { pub type Dav1dRef; @@ -392,7 +392,7 @@ pub unsafe extern "C" fn dav1d_apply_grain_16bpc( in_0: *const Dav1dPicture, ) { let mut grain_lut = Align16([[[0; 82]; 74]; 3]); - let mut scaling = Align64([[0; 4096]; 3]); + let mut scaling = Align1([[0; 4096]; 3]); let rows = (*out).p.h + 31 >> 5; dav1d_prep_grain_16bpc( dsp, diff --git a/src/fg_apply_tmpl_8.rs b/src/fg_apply_tmpl_8.rs index 4deec89c1..c2619b763 100644 --- a/src/fg_apply_tmpl_8.rs +++ b/src/fg_apply_tmpl_8.rs @@ -1,7 +1,8 @@ use crate::include::stddef::*; use crate::include::stdint::*; -use crate::src::align::{Align16, Align64}; +use crate::src::align::Align16; use ::libc; +use cfg_if::cfg_if; extern "C" { pub type Dav1dRef; fn memcpy(_: *mut libc::c_void, _: *const libc::c_void, _: libc::c_ulong) -> *mut libc::c_void; @@ -348,7 +349,17 @@ pub unsafe extern "C" fn dav1d_apply_grain_8bpc( in_0: *const Dav1dPicture, ) { let mut grain_lut = Align16([[[0; 82]; 74]; 3]); - let mut scaling = Align64([[0; 256]; 3]); + cfg_if! { + if #[cfg(target_arch = "x86_64")] { + use crate::src::align::Align64; + + let mut scaling = Align64([[0; 256]; 3]); + } else { + use crate::src::align::Align1; + + let mut scaling = Align1([[0; 256]; 3]); + } + } let rows = (*out).p.h + 31 >> 5; dav1d_prep_grain_8bpc( dsp, From 7c5ca4d64ca2d26bacef420eb66a2aec0d5fa8fb Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 11 May 2023 14:21:12 -0700 Subject: [PATCH 4/5] `fn lr_sbrow::pre_lr_border`: Align. --- src/lr_apply_tmpl_16.rs | 2 +- src/lr_apply_tmpl_8.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lr_apply_tmpl_16.rs b/src/lr_apply_tmpl_16.rs index bef395e21..073e3b029 100644 --- a/src/lr_apply_tmpl_16.rs +++ b/src/lr_apply_tmpl_16.rs @@ -869,7 +869,7 @@ unsafe extern "C" fn lr_sbrow( let max_unit_size = unit_size + half_unit_size; let row_y = y + (8 >> ss_ver) * (y != 0) as libc::c_int; let shift_hor = 7 - ss_hor; - let mut pre_lr_border: [[[pixel; 4]; 136]; 2] = [[[0; 4]; 136]; 2]; + let mut pre_lr_border: Align16<[[[pixel; 4]; 136]; 2]> = Align16([[[0; 4]; 136]; 2]); let mut lr: [*const Av1RestorationUnit; 2] = [0 as *const Av1RestorationUnit; 2]; let mut edges: LrEdgeFlags = ((if y > 0 { LR_HAVE_TOP as libc::c_int diff --git a/src/lr_apply_tmpl_8.rs b/src/lr_apply_tmpl_8.rs index f55055f01..56d53077d 100644 --- a/src/lr_apply_tmpl_8.rs +++ b/src/lr_apply_tmpl_8.rs @@ -840,7 +840,7 @@ unsafe extern "C" fn lr_sbrow( let max_unit_size = unit_size + half_unit_size; let row_y = y + (8 >> ss_ver) * (y != 0) as libc::c_int; let shift_hor = 7 - ss_hor; - let mut pre_lr_border: [[[pixel; 4]; 136]; 2] = [[[0; 4]; 136]; 2]; + let mut pre_lr_border: Align16<[[[pixel; 4]; 136]; 2]> = Align16([[[0; 4]; 136]; 2]); let mut lr: [*const Av1RestorationUnit; 2] = [0 as *const Av1RestorationUnit; 2]; let mut edges: LrEdgeFlags = ((if y > 0 { LR_HAVE_TOP as libc::c_int From c2cbe1e757abb08b4327104641629ff625bb5f3f Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 11 May 2023 14:29:47 -0700 Subject: [PATCH 5/5] `fn cdef_filter_{4,8}x{4,8}_neon::tmp_buf`: Align. --- src/cdef_tmpl_8.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cdef_tmpl_8.rs b/src/cdef_tmpl_8.rs index 9a0507b00..ead658f1d 100644 --- a/src/cdef_tmpl_8.rs +++ b/src/cdef_tmpl_8.rs @@ -845,8 +845,8 @@ unsafe extern "C" fn cdef_filter_4x4_neon( damping: libc::c_int, edges: CdefEdgeFlags, ) { - let mut tmp_buf = [0; 104]; - let mut tmp = tmp_buf.as_mut_ptr().offset(2 * 8).offset(8); + let mut tmp_buf = Align16([0; 104]); + let mut tmp = tmp_buf.0.as_mut_ptr().offset(2 * 8).offset(8); dav1d_cdef_padding4_8bpc_neon(tmp, dst, stride, left, top, bottom, 4, edges); dav1d_cdef_filter4_8bpc_neon( dst, @@ -875,8 +875,8 @@ unsafe extern "C" fn cdef_filter_4x8_neon( damping: libc::c_int, edges: CdefEdgeFlags, ) { - let mut tmp_buf = [0; 104]; - let mut tmp = tmp_buf.as_mut_ptr().offset(2 * 8).offset(8); + let mut tmp_buf = Align16([0; 104]); + let mut tmp = tmp_buf.0.as_mut_ptr().offset(2 * 8).offset(8); dav1d_cdef_padding4_8bpc_neon(tmp, dst, stride, left, top, bottom, 8, edges); dav1d_cdef_filter4_8bpc_neon( dst,