@@ -929,17 +929,59 @@ pub fn sgrproj_solve<T: Pixel>(
929
929
} else {
930
930
sgrproj_box_f_r0 ( & mut f_r1, y, cdef_w, & cdeffed, fi. cpu_feature_level ) ;
931
931
}
932
- for x in 0 ..cdef_w {
933
- let u = i32:: cast_from ( cdeffed. p ( x, y) ) << SGRPROJ_RST_BITS ;
934
- let s = ( i32:: cast_from ( input. p ( x, y) ) << SGRPROJ_RST_BITS ) - u;
935
- let f2 = f_r2_01[ dy] [ x] as i32 - u;
936
- let f1 = f_r1[ x] as i32 - u;
937
- h[ 0 ] [ 0 ] += f2 as f64 * f2 as f64 ;
938
- h[ 1 ] [ 1 ] += f1 as f64 * f1 as f64 ;
939
- h[ 0 ] [ 1 ] += f1 as f64 * f2 as f64 ;
940
- c[ 0 ] += f2 as f64 * s as f64 ;
941
- c[ 1 ] += f1 as f64 * s as f64 ;
932
+
933
+ #[ inline( always) ]
934
+ fn process_line < T : Pixel > (
935
+ h : & mut [ [ f64 ; 2 ] ; 2 ] , c : & mut [ f64 ; 2 ] , cdeffed : & [ T ] , input : & [ T ] ,
936
+ f_r1 : & [ u32 ] , f_r2_ab : & [ u32 ] , cdef_w : usize ,
937
+ ) {
938
+ let cdeffed_it = cdeffed[ ..cdef_w] . iter ( ) ;
939
+ let input_it = input[ ..cdef_w] . iter ( ) ;
940
+ let f_r2_ab_it = f_r2_ab[ ..cdef_w] . iter ( ) ;
941
+ let f_r1_it = f_r1[ ..cdef_w] . iter ( ) ;
942
+
943
+ #[ derive( Debug , Copy , Clone ) ]
944
+ struct Sums {
945
+ h : [ [ i64 ; 2 ] ; 2 ] ,
946
+ c : [ i64 ; 2 ] ,
947
+ }
948
+
949
+ let sums: Sums = cdeffed_it
950
+ . zip ( input_it)
951
+ . zip ( f_r2_ab_it. zip ( f_r1_it) )
952
+ . map ( |( ( & u, & i) , ( & f2, & f1) ) | {
953
+ let u = i32:: cast_from ( u) << SGRPROJ_RST_BITS ;
954
+ let s = ( i32:: cast_from ( i) << SGRPROJ_RST_BITS ) - u;
955
+ let f2 = f2 as i32 - u;
956
+ let f1 = f1 as i32 - u;
957
+ ( s as i64 , f1 as i64 , f2 as i64 )
958
+ } )
959
+ . fold ( Sums { h : [ [ 0 ; 2 ] ; 2 ] , c : [ 0 ; 2 ] } , |sums, ( s, f1, f2) | {
960
+ let mut ret: Sums = sums;
961
+ ret. h [ 0 ] [ 0 ] += f2 * f2;
962
+ ret. h [ 1 ] [ 1 ] += f1 * f1;
963
+ ret. h [ 0 ] [ 1 ] += f1 * f2;
964
+ ret. c [ 0 ] += f2 * s;
965
+ ret. c [ 1 ] += f1 * s;
966
+ ret
967
+ } ) ;
968
+
969
+ h[ 0 ] [ 0 ] += sums. h [ 0 ] [ 0 ] as f64 ;
970
+ h[ 1 ] [ 1 ] += sums. h [ 1 ] [ 1 ] as f64 ;
971
+ h[ 0 ] [ 1 ] += sums. h [ 0 ] [ 1 ] as f64 ;
972
+ c[ 0 ] += sums. c [ 0 ] as f64 ;
973
+ c[ 1 ] += sums. c [ 1 ] as f64 ;
942
974
}
975
+
976
+ process_line (
977
+ & mut h,
978
+ & mut c,
979
+ & cdeffed[ y] ,
980
+ & input[ y] ,
981
+ & f_r1,
982
+ & f_r2_01[ dy] ,
983
+ cdef_w,
984
+ ) ;
943
985
}
944
986
}
945
987
0 commit comments