/* * file firlib\src\fir_sp_sse_r4.c * brief FIR filtering function in radix 4 vectorized C (single precision IEEE754) * warning radix 4 input and coefficients arrays sizes, SSE4.1 extension support * author * date */ #include "firlib.h" /* * FIR filtering function in radix 4 vectorized C (single precision IEEE754) * warning radix 4 input and coefficients arrays sizes, SSE4.1 extension support */ void fir_sp_sse_r4 ( const float * restrict xk, \ const float * restrict a, \ float * restrict yk, \ int na, \ int nyk){ int i, j, idx; __m128 xmm128_xk0123, xmm128_xk1234, xmm128_xk2345, xmm128_xk3456; __m128 xmm128_a0123; __m128 xmm128_acc; xmm_t xmm128_dp0, xmm128_dp1, xmm128_dp2, xmm128_dp3; /* DEBUG section code for tests */ //xmm_t debug; //debug.m128_vec = xmm128_; //printf("\nxmm xk0123 : %.6f %.6f %.6f %.6f", debug.m128_f32[0], debug.m128_f32[1], debug.m128_f32[2], debug.m128_f32[3]); /* input array loop */ for (i=0; i