aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Transceiver52M/arm/convolve.c7
-rw-r--r--Transceiver52M/common/convert.h1
-rw-r--r--Transceiver52M/common/convolve.h2
-rw-r--r--Transceiver52M/osmo-trx.cpp8
-rw-r--r--Transceiver52M/x86/convert.c67
-rw-r--r--Transceiver52M/x86/convolve.c106
6 files changed, 142 insertions, 49 deletions
diff --git a/Transceiver52M/arm/convolve.c b/Transceiver52M/arm/convolve.c
index 2b42090..912d0c2 100644
--- a/Transceiver52M/arm/convolve.c
+++ b/Transceiver52M/arm/convolve.c
@@ -58,6 +58,13 @@ static void neon_conv_cmplx_4n(float *x, float *h, float *y, int h_len, int len)
}
#endif
+/* API: Initalize convolve module */
+void convolve_init(void)
+{
+ /* Stub */
+ return;
+}
+
/* API: Aligned complex-real */
int convolve_real(float *x, int x_len,
float *h, int h_len,
diff --git a/Transceiver52M/common/convert.h b/Transceiver52M/common/convert.h
index 4827c28..1d3a180 100644
--- a/Transceiver52M/common/convert.h
+++ b/Transceiver52M/common/convert.h
@@ -3,5 +3,6 @@
void convert_float_short(short *out, const float *in, float scale, int len);
void convert_short_float(float *out, const short *in, int len);
+void convert_init(void);
#endif /* _CONVERT_H_ */
diff --git a/Transceiver52M/common/convolve.h b/Transceiver52M/common/convolve.h
index 08bda0c..43db577 100644
--- a/Transceiver52M/common/convolve.h
+++ b/Transceiver52M/common/convolve.h
@@ -27,4 +27,6 @@ int base_convolve_complex(const float *x, int x_len,
int start, int len,
int step, int offset);
+void convolve_init(void);
+
#endif /* _CONVOLVE_H_ */
diff --git a/Transceiver52M/osmo-trx.cpp b/Transceiver52M/osmo-trx.cpp
index b07ffe8..2d35a60 100644
--- a/Transceiver52M/osmo-trx.cpp
+++ b/Transceiver52M/osmo-trx.cpp
@@ -32,6 +32,11 @@
#include <Logger.h>
#include <Configuration.h>
+extern "C" {
+#include "convolve.h"
+#include "convert.h"
+}
+
/* Samples-per-symbol for downlink path
* 4 - Uses precision modulator (more computation, less distortion)
* 1 - Uses minimized modulator (less computation, more distortion)
@@ -422,6 +427,9 @@ int main(int argc, char *argv[])
RadioDevice::InterfaceType iface = RadioDevice::NORMAL;
struct trx_config config;
+ convolve_init();
+ convert_init();
+
handle_options(argc, argv, &config);
setup_signal_handlers();
diff --git a/Transceiver52M/x86/convert.c b/Transceiver52M/x86/convert.c
index 862a2e7..db1c0fc 100644
--- a/Transceiver52M/x86/convert.c
+++ b/Transceiver52M/x86/convert.c
@@ -25,6 +25,17 @@
#include "config.h"
#endif
+/* Architecture dependant function pointers */
+struct convert_cpu_context {
+ void (*convert_si16_ps_16n) (float *, const short *, int);
+ void (*convert_si16_ps) (float *, const short *, int);
+ void (*convert_scale_ps_si16_16n)(short *, const float *, float, int);
+ void (*convert_scale_ps_si16_8n)(short *, const float *, float, int);
+ void (*convert_scale_ps_si16)(short *, const float *, float, int);
+};
+
+static struct convert_cpu_context c;
+
#ifdef HAVE_SSE3
#include <xmmintrin.h>
#include <emmintrin.h>
@@ -157,53 +168,61 @@ static void _sse_convert_scale_ps_si16_16n(short *restrict out,
_mm_storeu_si128((__m128i *) &out[16 * i + 8], m7);
}
}
-#else /* HAVE_SSE3 */
+#endif
+
+__attribute__((optimize("no-tree-vectorize")))
static void convert_scale_ps_si16(short *out, const float *in,
float scale, int len)
{
for (int i = 0; i < len; i++)
out[i] = in[i] * scale;
}
-#endif
-#ifndef HAVE_SSE4_1
+__attribute__((optimize("no-tree-vectorize")))
static void convert_si16_ps(float *out, const short *in, int len)
{
for (int i = 0; i < len; i++)
out[i] = in[i];
}
-#endif
-void convert_float_short(short *out, const float *in, float scale, int len)
+void convert_init(void)
{
- void (*conv_func)(short *, const float *, float, int);
+ c.convert_scale_ps_si16_16n = convert_scale_ps_si16;
+ c.convert_scale_ps_si16_8n = convert_scale_ps_si16;
+ c.convert_scale_ps_si16 = convert_scale_ps_si16;
+ c.convert_si16_ps_16n = convert_si16_ps;
+ c.convert_si16_ps = convert_si16_ps;
+
+#ifdef HAVE_SSE4_1
+ if (__builtin_cpu_supports("sse4.1")) {
+ c.convert_si16_ps_16n = &_sse_convert_si16_ps_16n;
+ c.convert_si16_ps = &_sse_convert_si16_ps;
+ }
+#endif
#ifdef HAVE_SSE3
+ if (__builtin_cpu_supports("sse3")) {
+ c.convert_scale_ps_si16_16n = _sse_convert_scale_ps_si16_16n;
+ c.convert_scale_ps_si16_8n = _sse_convert_scale_ps_si16_8n;
+ c.convert_scale_ps_si16 = _sse_convert_scale_ps_si16;
+ }
+#endif
+}
+
+void convert_float_short(short *out, const float *in, float scale, int len)
+{
if (!(len % 16))
- conv_func = _sse_convert_scale_ps_si16_16n;
+ c.convert_scale_ps_si16_16n(out, in, scale, len);
else if (!(len % 8))
- conv_func = _sse_convert_scale_ps_si16_8n;
+ c.convert_scale_ps_si16_8n(out, in, scale, len);
else
- conv_func = _sse_convert_scale_ps_si16;
-#else
- conv_func = convert_scale_ps_si16;
-#endif
-
- conv_func(out, in, scale, len);
+ c.convert_scale_ps_si16(out, in, scale, len);
}
void convert_short_float(float *out, const short *in, int len)
{
- void (*conv_func) (float *, const short *, int);
-
-#ifdef HAVE_SSE4_1
if (!(len % 16))
- conv_func = _sse_convert_si16_ps_16n;
+ c.convert_si16_ps_16n(out, in, len);
else
- conv_func = _sse_convert_si16_ps;
-#else
- conv_func = convert_si16_ps;
-#endif
-
- conv_func(out, in, len);
+ c.convert_si16_ps(out, in, len);
}
diff --git a/Transceiver52M/x86/convolve.c b/Transceiver52M/x86/convolve.c
index e2a1dea..2f3b293 100644
--- a/Transceiver52M/x86/convolve.c
+++ b/Transceiver52M/x86/convolve.c
@@ -26,6 +26,31 @@
#include "config.h"
#endif
+/* Architecture dependant function pointers */
+struct convolve_cpu_context {
+ void (*conv_cmplx_4n) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_cmplx_8n) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_cmplx) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real4) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real8) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real12) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real16) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real20) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real4n) (const float *, int, const float *, int, float *,
+ int, int, int, int, int);
+ void (*conv_real) (const float *, int, const float *, int, float *, int,
+ int, int, int, int);
+};
+static struct convolve_cpu_context c;
+
/* Forward declarations from base implementation */
int _base_convolve_real(const float *x, int x_len,
const float *h, int h_len,
@@ -565,45 +590,77 @@ static void sse_conv_cmplx_8n(const float *x, int x_len,
}
#endif
+/* API: Initalize convolve module */
+void convolve_init(void)
+{
+ c.conv_cmplx_4n = (void *)_base_convolve_complex;
+ c.conv_cmplx_8n = (void *)_base_convolve_complex;
+ c.conv_cmplx = (void *)_base_convolve_complex;
+ c.conv_real4 = (void *)_base_convolve_real;
+ c.conv_real8 = (void *)_base_convolve_real;
+ c.conv_real12 = (void *)_base_convolve_real;
+ c.conv_real16 = (void *)_base_convolve_real;
+ c.conv_real20 = (void *)_base_convolve_real;
+ c.conv_real4n = (void *)_base_convolve_real;
+ c.conv_real = (void *)_base_convolve_real;
+
+#ifdef HAVE_SSE3
+ if (__builtin_cpu_supports("sse3")) {
+ c.conv_cmplx_4n = sse_conv_cmplx_4n;
+ c.conv_cmplx_8n = sse_conv_cmplx_8n;
+ c.conv_real4 = sse_conv_real4;
+ c.conv_real8 = sse_conv_real8;
+ c.conv_real12 = sse_conv_real12;
+ c.conv_real16 = sse_conv_real16;
+ c.conv_real20 = sse_conv_real20;
+ c.conv_real4n = sse_conv_real4n;
+ }
+#endif
+}
+
/* API: Aligned complex-real */
int convolve_real(const float *x, int x_len,
const float *h, int h_len,
float *y, int y_len, int start, int len, int step, int offset)
{
- void (*conv_func) (const float *, int, const float *, int, float *, int,
- int, int, int, int) = (void *)_base_convolve_real;
-
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
-#ifdef HAVE_SSE3
if (step <= 4) {
switch (h_len) {
case 4:
- conv_func = sse_conv_real4;
+ c.conv_real4(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 8:
- conv_func = sse_conv_real8;
+ c.conv_real8(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 12:
- conv_func = sse_conv_real12;
+ c.conv_real12(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 16:
- conv_func = sse_conv_real16;
+ c.conv_real16(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
case 20:
- conv_func = sse_conv_real20;
+ c.conv_real20(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
break;
default:
if (!(h_len % 4))
- conv_func = sse_conv_real4n;
+ c.conv_real4n(x, x_len, h, h_len, y, y_len,
+ start, len, step, offset);
+ else
+ c.conv_real(x, x_len, h, h_len, y, y_len, start,
+ len, step, offset);
}
- }
-#endif
-
- conv_func(x, x_len, h, h_len, y, y_len, start, len, step, offset);
+ } else
+ c.conv_real(x, x_len, h, h_len, y, y_len, start, len, step,
+ offset);
return len;
}
@@ -614,25 +671,24 @@ int convolve_complex(const float *x, int x_len,
float *y, int y_len,
int start, int len, int step, int offset)
{
- void (*conv_func) (const float *, int, const float *, int, float *, int,
- int, int, int, int) =
- (void *)_base_convolve_complex;
-
if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
return -1;
memset(y, 0, len * 2 * sizeof(float));
-#ifdef HAVE_SSE3
if (step <= 4) {
if (!(h_len % 8))
- conv_func = sse_conv_cmplx_8n;
+ c.conv_cmplx_8n(x, x_len, h, h_len, y, y_len, start,
+ len, step, offset);
else if (!(h_len % 4))
- conv_func = sse_conv_cmplx_4n;
- }
-#endif
-
- conv_func(x, x_len, h, h_len, y, y_len, start, len, step, offset);
+ c.conv_cmplx_4n(x, x_len, h, h_len, y, y_len, start,
+ len, step, offset);
+ else
+ c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len,
+ step, offset);
+ } else
+ c.conv_cmplx(x, x_len, h, h_len, y, y_len, start, len, step,
+ offset);
return len;
}