From 43fedb656b9e99e1a3445998834918df98c9679a Mon Sep 17 00:00:00 2001 From: Pau Espin Pedrol Date: Tue, 24 Apr 2018 15:22:57 +0200 Subject: Move arch specific fiels to arch subdir Take the chance to update some includes using files available in that subdir to have them ina more uniform way. Change-Id: Ibda3c54fd4dc3f6b845cc373f1a1e6b758c1ea82 --- Transceiver52M/arch/x86/convolve.c | 172 +++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 Transceiver52M/arch/x86/convolve.c (limited to 'Transceiver52M/arch/x86/convolve.c') diff --git a/Transceiver52M/arch/x86/convolve.c b/Transceiver52M/arch/x86/convolve.c new file mode 100644 index 0000000..eb38f64 --- /dev/null +++ b/Transceiver52M/arch/x86/convolve.c @@ -0,0 +1,172 @@ +/* + * SSE Convolution + * Copyright (C) 2012, 2013 Thomas Tsou + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include "convolve.h" +#include "convolve_sse_3.h" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Architecture dependant function pointers */ +struct convolve_cpu_context { + void (*conv_cmplx_4n) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_cmplx_8n) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_cmplx) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real4) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real8) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real12) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real16) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real20) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real4n) (const float *, int, const float *, int, float *, + int, int, int, int, int); + void (*conv_real) (const float *, int, const float *, int, float *, int, + int, int, int, int); +}; +static struct convolve_cpu_context c; + +/* Forward declarations from base implementation */ +int _base_convolve_real(const float *x, int x_len, + const float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset); + +int _base_convolve_complex(const float *x, int x_len, + const float *h, int h_len, + float *y, int y_len, + int start, int len, + int step, int offset); + +int bounds_check(int x_len, int h_len, int y_len, + int start, int len, int step); + +/* API: Initalize convolve module */ +void convolve_init(void) +{ + c.conv_cmplx_4n = (void *)_base_convolve_complex; + c.conv_cmplx_8n = (void *)_base_convolve_complex; + c.conv_cmplx = (void *)_base_convolve_complex; + c.conv_real4 = (void *)_base_convolve_real; + c.conv_real8 = (void *)_base_convolve_real; + c.conv_real12 = (void *)_base_convolve_real; + c.conv_real16 = (void *)_base_convolve_real; + c.conv_real20 = (void *)_base_convolve_real; + c.conv_real4n = (void *)_base_convolve_real; + c.conv_real = (void *)_base_convolve_real; + +#if defined(HAVE_SSE3) && defined(HAVE___BUILTIN_CPU_SUPPORTS) + if (__builtin_cpu_supports("sse3")) { + c.conv_cmplx_4n = sse_conv_cmplx_4n; + c.conv_cmplx_8n = sse_conv_cmplx_8n; + c.conv_real4 = sse_conv_real4; + c.conv_real8 = sse_conv_real8; + c.conv_real12 = sse_conv_real12; + c.conv_real16 = sse_conv_real16; + c.conv_real20 = sse_conv_real20; + c.conv_real4n = sse_conv_real4n; + } +#endif +} + +/* API: Aligned complex-real */ +int convolve_real(const float *x, int x_len, + const float *h, int h_len, + float *y, int y_len, int start, int len, int step, int offset) +{ + if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) + return -1; + + memset(y, 0, len * 2 * sizeof(float)); + + if (step <= 4) { + switch (h_len) { + case 4: + c.conv_real4(x, x_len, h, h_len, y, y_len, start, len, + step, offset); + break; + case 8: + c.conv_real8(x, x_len, h, h_len, y, y_len, start, len, + step, offset); + break; + case 12: + c.conv_real12(x, x_len, h, h_len, y, y_len, start, len, + step, offset); + break; + case 16: + c.conv_real16(x, x_len, h, h_len, y, y_len, start, len, + step, offset); + break; + case 20: + c.conv_real20(x, x_len, h, h_len, y, y_len, start, len, + step, offset); + break; + default: + if (!(h_len % 4)) + c.conv_real4n(x, x_len, h, h_len, y, y_len, + start, len, step, offset); + else + c.conv_real(x, x_len, h, h_len, y, y_len, start, + len, step, offset); + } + } else + c.conv_real(x, x_len, h, h_len, y, y_len, start, len, step, + offset); + + return len; +} + +/* API: Aligned complex-complex */ +int convolve_complex(const float *x, int x_len, + const float *h, int h_len, + float *y, int y_len, + int start, int len, int step, int offset) +{ + if (bounds_check(x_len, h_len, y_len, start, len, step) < 0) + return -1; + + memset(y, 0, len * 2 * sizeof(float)); + + if (step <= 4) { + if (!(h_len % 8)) + c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start, + len, step, offset); + else if (!(h_len % 4)) + c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start, + len, step, offset); + else + c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, + step, offset); + } else + c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, step, + offset); + + return len; +} -- cgit v1.2.3