aboutsummaryrefslogtreecommitdiffstats
path: root/Transceiver52M/x86/convolve.c
diff options
context:
space:
mode:
Diffstat (limited to 'Transceiver52M/x86/convolve.c')
-rw-r--r--Transceiver52M/x86/convolve.c106
1 files changed, 81 insertions, 25 deletions
diff --git a/Transceiver52M/x86/convolve.c b/Transceiver52M/x86/convolve.c
index e2a1dea..2f3b293 100644
--- a/Transceiver52M/x86/convolve.c
+++ b/Transceiver52M/x86/convolve.c
@@ -26,6 +26,31 @@
#include "config.h"
#endif
+/* Architecture dependant function pointers */
+struct convolve_cpu_context {
+ void (*conv_cmplx_4n) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_cmplx_8n) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_cmplx) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real4) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real8) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real12) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real16) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real20) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real4n) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real) (const float *, int, const float *, int, float *, int,
+ int, int, int, int);
+};
+static struct convolve_cpu_context c;
+
/* Forward declarations from base implementation */
int _base_convolve_real(const float *x, int x_len,
const float *h, int h_len,
@@ -565,45 +590,77 @@ static void sse_conv_cmplx_8n(const float *x, int x_len,
}
#endif
+/* API: Initalize convolve module */
+void convolve_init(void)
+{
+ c.conv_cmplx_4n = (void *)_base_convolve_complex;
+ c.conv_cmplx_8n = (void *)_base_convolve_complex;
+ c.conv_cmplx = (void *)_base_convolve_complex;
+ c.conv_real4 = (void *)_base_convolve_real;
+ c.conv_real8 = (void *)_base_convolve_real;
+ c.conv_real12 = (void *)_base_convolve_real;
+ c.conv_real16 = (void *)_base_convolve_real;
+ c.conv_real20 = (void *)_base_convolve_real;
+ c.conv_real4n = (void *)_base_convolve_real;
+ c.conv_real = (void *)_base_convolve_real;
+
+#ifdef HAVE_SSE3
+ if (__builtin_cpu_supports("sse3")) {
+ c.conv_cmplx_4n = sse_conv_cmplx_4n;
+ c.conv_cmplx_8n = sse_conv_cmplx_8n;
+ c.conv_real4 = sse_conv_real4;
+ c.conv_real8 = sse_conv_real8;
+ c.conv_real12 = sse_conv_real12;
+ c.conv_real16 = sse_conv_real16;
+ c.conv_real20 = sse_conv_real20;
+ c.conv_real4n = sse_conv_real4n;
+ }
+#endif
+}
+
/* API: Aligned complex-real */
int convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len, int start, int len, int step, int offset)
{
- void (*conv_func) (const float *, int, const float *, int, float *, int,
- int, int, int, int) = (void *)_base_convolve_real;
-
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
-#ifdef HAVE_SSE3
if (step <= 4) {
switch (h_len) {
case 4:
- conv_func = sse_conv_real4;
+ c.conv_real4(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 8:
- conv_func = sse_conv_real8;
+ c.conv_real8(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 12:
- conv_func = sse_conv_real12;
+ c.conv_real12(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 16:
- conv_func = sse_conv_real16;
+ c.conv_real16(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 20:
- conv_func = sse_conv_real20;
+ c.conv_real20(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
default:
if (!(h_len % 4))
- conv_func = sse_conv_real4n;
+ c.conv_real4n(x, x_len, h, h_len, y, y_len,
+ start, len, step, offset);
+ else
+ c.conv_real(x, x_len, h, h_len, y, y_len, start,
+ len, step, offset);
}
- }
-#endif
-
- conv_func(x, x_len, h, h_len, y, y_len, start, len, step, offset);
+ } else
+ c.conv_real(x, x_len, h, h_len, y, y_len, start, len, step,
+ offset);
return len;
}
@@ -614,25 +671,24 @@ int convolve_complex(const float *x, int x_len,
float *y, int y_len,
int start, int len, int step, int offset)
{
- void (*conv_func) (const float *, int, const float *, int, float *, int,
- int, int, int, int) =
- (void *)_base_convolve_complex;
-
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
-#ifdef HAVE_SSE3
if (step <= 4) {
if (!(h_len % 8))
- conv_func = sse_conv_cmplx_8n;
+ c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start,
+ len, step, offset);
else if (!(h_len % 4))
- conv_func = sse_conv_cmplx_4n;
- }
-#endif
-
- conv_func(x, x_len, h, h_len, y, y_len, start, len, step, offset);
+ c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start,
+ len, step, offset);
+ else
+ c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
+ } else
+ c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, step,
+ offset);
return len;
}