diff options
Diffstat (limited to 'Transceiver52M/x86/convolve.c')
-rw-r--r-- | Transceiver52M/x86/convolve.c | 106 |
1 files changed, 81 insertions, 25 deletions
diff --git a/Transceiver52M/x86/convolve.c b/Transceiver52M/x86/convolve.c index e2a1dea..2f3b293 100644 --- a/Transceiver52M/x86/convolve.c +++ b/Transceiver52M/x86/convolve.c @@ -26,6 +26,31 @@ #include "config.h" #endif +/* Architecture dependant function pointers */ +struct convolve_cpu_context { + void (*conv_cmplx_4n) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_cmplx_8n) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_cmplx) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real4) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real8) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real12) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real16) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real20) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real4n) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real) (const float *, int, const float *, int, float *, int, + int, int, int, int); +}; +static struct convolve_cpu_context c; + /* Forward declarations from base implementation */ int _base_convolve_real(const float *x, int x_len, const float *h, int h_len, @@ -565,45 +590,77 @@ static void sse_conv_cmplx_8n(const float *x, int x_len, } #endif +/* API: Initalize convolve module */ +void convolve_init(void) +{ + c.conv_cmplx_4n = (void *)_base_convolve_complex; + c.conv_cmplx_8n = (void *)_base_convolve_complex; + c.conv_cmplx = (void *)_base_convolve_complex; + c.conv_real4 = (void *)_base_convolve_real; + c.conv_real8 = (void *)_base_convolve_real; + c.conv_real12 = (void *)_base_convolve_real; + c.conv_real16 = (void *)_base_convolve_real; + c.conv_real20 = (void *)_base_convolve_real; + c.conv_real4n = (void *)_base_convolve_real; + c.conv_real = (void *)_base_convolve_real; + +#ifdef HAVE_SSE3 + if (__builtin_cpu_supports("sse3")) { + c.conv_cmplx_4n = sse_conv_cmplx_4n; + c.conv_cmplx_8n = sse_conv_cmplx_8n; + c.conv_real4 = sse_conv_real4; + c.conv_real8 = sse_conv_real8; + c.conv_real12 = sse_conv_real12; + c.conv_real16 = sse_conv_real16; + c.conv_real20 = sse_conv_real20; + c.conv_real4n = sse_conv_real4n; + } +#endif +} + /* API: Aligned complex-real */ int convolve_real(const float *x, int x_len, const float *h, int h_len, float *y, int y_len, int start, int len, int step, int offset) { - void (*conv_func) (const float *, int, const float *, int, float *, int, - int, int, int, int) = (void *)_base_convolve_real; - if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) return -1; memset(y, 0, len * 2 * sizeof(float)); -#ifdef HAVE_SSE3 if (step <= 4) { switch (h_len) { case 4: - conv_func = sse_conv_real4; + c.conv_real4(x, x_len, h, h_len, y, y_len, start, len, + step, offset); break; case 8: - conv_func = sse_conv_real8; + c.conv_real8(x, x_len, h, h_len, y, y_len, start, len, + step, offset); break; case 12: - conv_func = sse_conv_real12; + c.conv_real12(x, x_len, h, h_len, y, y_len, start, len, + step, offset); break; case 16: - conv_func = sse_conv_real16; + c.conv_real16(x, x_len, h, h_len, y, y_len, start, len, + step, offset); break; case 20: - conv_func = sse_conv_real20; + c.conv_real20(x, x_len, h, h_len, y, y_len, start, len, + step, offset); break; default: if (!(h_len % 4)) - conv_func = sse_conv_real4n; + c.conv_real4n(x, x_len, h, h_len, y, y_len, + start, len, step, offset); + else + c.conv_real(x, x_len, h, h_len, y, y_len, start, + len, step, offset); } - } -#endif - - conv_func(x, x_len, h, h_len, y, y_len, start, len, step, offset); + } else + c.conv_real(x, x_len, h, h_len, y, y_len, start, len, step, + offset); return len; } @@ -614,25 +671,24 @@ int convolve_complex(const float *x, int x_len, float *y, int y_len, int start, int len, int step, int offset) { - void (*conv_func) (const float *, int, const float *, int, float *, int, - int, int, int, int) = - (void *)_base_convolve_complex; - if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) return -1; memset(y, 0, len * 2 * sizeof(float)); -#ifdef HAVE_SSE3 if (step <= 4) { if (!(h_len % 8)) - conv_func = sse_conv_cmplx_8n; + c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start, + len, step, offset); else if (!(h_len % 4)) - conv_func = sse_conv_cmplx_4n; - } -#endif - - conv_func(x, x_len, h, h_len, y, y_len, start, len, step, offset); + c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start, + len, step, offset); + else + c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, + step, offset); + } else + c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, step, + offset); return len; } |