aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile.am3
-rw-r--r--Transceiver52M/Makefile.am37
-rw-r--r--Transceiver52M/convolve.c676
-rw-r--r--Transceiver52M/convolve.h30
-rw-r--r--Transceiver52M/sigProcLib.cpp635
-rw-r--r--Transceiver52M/sigProcLib.h97
-rw-r--r--Transceiver52M/sigProcLibTest.cpp170
-rw-r--r--config/ax_check_compile_flag.m472
-rw-r--r--config/ax_ext.m4221
-rw-r--r--config/ax_gcc_x86_avx_xgetbv.m479
-rw-r--r--config/ax_gcc_x86_cpuid.m479
-rw-r--r--configure.ac3
12 files changed, 1524 insertions, 578 deletions
diff --git a/Makefile.am b/Makefile.am
index 058876d..e631e6b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -20,6 +20,9 @@
include $(top_srcdir)/Makefile.common
+DESTDIR :=
+
+ACLOCAL_AMFLAGS = -I config
AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(USB_INCLUDES) $(WITH_INCLUDES)
AM_CXXFLAGS = -Wall -pthread -ldl
#AM_CXXFLAGS = -Wall -O2 -NDEBUG -pthread -ldl
diff --git a/Transceiver52M/Makefile.am b/Transceiver52M/Makefile.am
index 1103e6e..da22c21 100644
--- a/Transceiver52M/Makefile.am
+++ b/Transceiver52M/Makefile.am
@@ -21,19 +21,18 @@
include $(top_srcdir)/Makefile.common
+AM_CFLAGS = $(STD_DEFINES_AND_INCLUDES) -std=gnu99 -march=native
+AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES)
+AM_CXXFLAGS = -ldl -lpthread
+
#UHD wins if both are defined
if UHD
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(UHD_CFLAGS)
+AM_CPPFLAGS += $(UHD_CFLAGS)
else
if USRP1
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES) $(USRP_CFLAGS)
-else
-#we should never be here, as this doesn't build if one of the above
-#doesn't exist
-AM_CPPFLAGS = $(STD_DEFINES_AND_INCLUDES)
+AM_CPPFLAGS += $(USRP_CFLAGS)
endif
endif
-AM_CXXFLAGS = -ldl -lpthread
rev2dir = $(datadir)/usrp/rev2
rev4dir = $(datadir)/usrp/rev4
@@ -54,14 +53,14 @@ COMMON_SOURCES = \
sigProcLib.cpp \
DriveLoop.cpp \
Transceiver.cpp \
- DummyLoad.cpp
+ DummyLoad.cpp \
+ convolve.c
libtransceiver_la_SOURCES = \
$(COMMON_SOURCES)
noinst_PROGRAMS = \
- transceiver \
- sigProcLibTest
+ transceiver
noinst_HEADERS = \
Complex.h \
@@ -74,7 +73,8 @@ noinst_HEADERS = \
USRPDevice.h \
DummyLoad.h \
rcvLPF_651.h \
- sendLPF_961.h
+ sendLPF_961.h \
+ convolve.h
transceiver_SOURCES = multiTRX.cpp
transceiver_LDADD = \
@@ -82,22 +82,14 @@ transceiver_LDADD = \
$(GSM_LA) \
$(COMMON_LA) $(SQLITE_LA)
-sigProcLibTest_SOURCES = sigProcLibTest.cpp
-sigProcLibTest_LDADD = \
- libtransceiver.la \
- $(GSM_LA) \
- $(COMMON_LA) $(SQLITE_LA)
-
#uhd wins
if UHD
libtransceiver_la_SOURCES += UHDDevice.cpp
transceiver_LDADD += $(UHD_LIBS)
-sigProcLibTest_LDADD += $(UHD_LIBS)
else
if USRP1
libtransceiver_la_SOURCES += USRPDevice.cpp
transceiver_LDADD += $(USRP_LIBS)
-sigProcLibTest_LDADD += $(USRP_LIBS)
else
#we should never be here, as one of the above mustbe defined for us to build
endif
@@ -105,10 +97,3 @@ endif
MOSTLYCLEANFILES +=
-
-#radioInterface.cpp
-#ComplexTest.cpp
-#sigProcLibTest.cpp
-#sweepGenerator.cpp
-#testRadio.cpp
-
diff --git a/Transceiver52M/convolve.c b/Transceiver52M/convolve.c
new file mode 100644
index 0000000..d52c0c3
--- /dev/null
+++ b/Transceiver52M/convolve.c
@@ -0,0 +1,676 @@
+/*
+ * SSE Convolution
+ * Copyright (C) 2012, 2013 Thomas Tsou <tom@tsou.cc>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <malloc.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SSE3
+#include <xmmintrin.h>
+#include <pmmintrin.h>
+
+/* 4-tap SSE complex-real convolution */
+static void sse_conv_real4(float *restrict x,
+ float *restrict h,
+ float *restrict y,
+ int len)
+{
+ __m128 m0, m1, m2, m3, m4, m5, m6, m7;
+
+ /* Load (aligned) filter taps */
+ m0 = _mm_load_ps(&h[0]);
+ m1 = _mm_load_ps(&h[4]);
+ m7 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+
+ for (int i = 0; i < len; i++) {
+ /* Load (unaligned) input data */
+ m0 = _mm_loadu_ps(&x[2 * i + 0]);
+ m1 = _mm_loadu_ps(&x[2 * i + 4]);
+ m2 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m3 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Quad multiply */
+ m4 = _mm_mul_ps(m2, m7);
+ m5 = _mm_mul_ps(m3, m7);
+
+ /* Sum and store */
+ m6 = _mm_hadd_ps(m4, m5);
+ m0 = _mm_hadd_ps(m6, m6);
+
+ _mm_store_ss(&y[2 * i + 0], m0);
+ m0 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(0, 3, 2, 1));
+ _mm_store_ss(&y[2 * i + 1], m0);
+ }
+}
+
+/* 8-tap SSE complex-real convolution */
+static void sse_conv_real8(float *restrict x,
+ float *restrict h,
+ float *restrict y,
+ int len)
+{
+ __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9;
+
+ /* Load (aligned) filter taps */
+ m0 = _mm_load_ps(&h[0]);
+ m1 = _mm_load_ps(&h[4]);
+ m2 = _mm_load_ps(&h[8]);
+ m3 = _mm_load_ps(&h[12]);
+
+ m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m5 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+
+ for (int i = 0; i < len; i++) {
+ /* Load (unaligned) input data */
+ m0 = _mm_loadu_ps(&x[2 * i + 0]);
+ m1 = _mm_loadu_ps(&x[2 * i + 4]);
+ m2 = _mm_loadu_ps(&x[2 * i + 8]);
+ m3 = _mm_loadu_ps(&x[2 * i + 12]);
+
+ m6 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m7 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m8 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m9 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Quad multiply */
+ m6 = _mm_mul_ps(m6, m4);
+ m7 = _mm_mul_ps(m7, m4);
+ m8 = _mm_mul_ps(m8, m5);
+ m9 = _mm_mul_ps(m9, m5);
+
+ /* Sum and store */
+ m6 = _mm_add_ps(m6, m8);
+ m7 = _mm_add_ps(m7, m9);
+ m6 = _mm_hadd_ps(m6, m7);
+ m6 = _mm_hadd_ps(m6, m6);
+
+ _mm_store_ss(&y[2 * i + 0], m6);
+ m6 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0, 3, 2, 1));
+ _mm_store_ss(&y[2 * i + 1], m6);
+ }
+}
+
+/* 12-tap SSE complex-real convolution */
+static void sse_conv_real12(float *restrict x,
+ float *restrict h,
+ float *restrict y,
+ int len)
+{
+ __m128 m0, m1, m2, m3, m4, m5, m6, m7;
+ __m128 m8, m9, m10, m11, m12, m13, m14;
+
+ /* Load (aligned) filter taps */
+ m0 = _mm_load_ps(&h[0]);
+ m1 = _mm_load_ps(&h[4]);
+ m2 = _mm_load_ps(&h[8]);
+ m3 = _mm_load_ps(&h[12]);
+ m4 = _mm_load_ps(&h[16]);
+ m5 = _mm_load_ps(&h[20]);
+
+ m12 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m13 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m14 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
+
+ for (int i = 0; i < len; i++) {
+ /* Load (unaligned) input data */
+ m0 = _mm_loadu_ps(&x[2 * i + 0]);
+ m1 = _mm_loadu_ps(&x[2 * i + 4]);
+ m2 = _mm_loadu_ps(&x[2 * i + 8]);
+ m3 = _mm_loadu_ps(&x[2 * i + 12]);
+
+ m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m6 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+
+ m0 = _mm_loadu_ps(&x[2 * i + 16]);
+ m1 = _mm_loadu_ps(&x[2 * i + 20]);
+
+ m8 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m9 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Quad multiply */
+ m0 = _mm_mul_ps(m4, m12);
+ m1 = _mm_mul_ps(m5, m12);
+ m2 = _mm_mul_ps(m6, m13);
+ m3 = _mm_mul_ps(m7, m13);
+ m4 = _mm_mul_ps(m8, m14);
+ m5 = _mm_mul_ps(m9, m14);
+
+ /* Sum and store */
+ m8 = _mm_add_ps(m0, m2);
+ m9 = _mm_add_ps(m1, m3);
+ m10 = _mm_add_ps(m8, m4);
+ m11 = _mm_add_ps(m9, m5);
+
+ m2 = _mm_hadd_ps(m10, m11);
+ m3 = _mm_hadd_ps(m2, m2);
+
+ _mm_store_ss(&y[2 * i + 0], m3);
+ m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0, 3, 2, 1));
+ _mm_store_ss(&y[2 * i + 1], m3);
+ }
+}
+
+/* 16-tap SSE complex-real convolution */
+static void sse_conv_real16(float *restrict x,
+ float *restrict h,
+ float *restrict y,
+ int len)
+{
+ __m128 m0, m1, m2, m3, m4, m5, m6, m7;
+ __m128 m8, m9, m10, m11, m12, m13, m14, m15;
+
+ /* Load (aligned) filter taps */
+ m0 = _mm_load_ps(&h[0]);
+ m1 = _mm_load_ps(&h[4]);
+ m2 = _mm_load_ps(&h[8]);
+ m3 = _mm_load_ps(&h[12]);
+
+ m4 = _mm_load_ps(&h[16]);
+ m5 = _mm_load_ps(&h[20]);
+ m6 = _mm_load_ps(&h[24]);
+ m7 = _mm_load_ps(&h[28]);
+
+ m12 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m13 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m14 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
+ m15 = _mm_shuffle_ps(m6, m7, _MM_SHUFFLE(0, 2, 0, 2));
+
+ for (int i = 0; i < len; i++) {
+ /* Load (unaligned) input data */
+ m0 = _mm_loadu_ps(&x[2 * i + 0]);
+ m1 = _mm_loadu_ps(&x[2 * i + 4]);
+ m2 = _mm_loadu_ps(&x[2 * i + 8]);
+ m3 = _mm_loadu_ps(&x[2 * i + 12]);
+
+ m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m6 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+
+ m0 = _mm_loadu_ps(&x[2 * i + 16]);
+ m1 = _mm_loadu_ps(&x[2 * i + 20]);
+ m2 = _mm_loadu_ps(&x[2 * i + 24]);
+ m3 = _mm_loadu_ps(&x[2 * i + 28]);
+
+ m8 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m9 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m10 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m11 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Quad multiply */
+ m0 = _mm_mul_ps(m4, m12);
+ m1 = _mm_mul_ps(m5, m12);
+ m2 = _mm_mul_ps(m6, m13);
+ m3 = _mm_mul_ps(m7, m13);
+
+ m4 = _mm_mul_ps(m8, m14);
+ m5 = _mm_mul_ps(m9, m14);
+ m6 = _mm_mul_ps(m10, m15);
+ m7 = _mm_mul_ps(m11, m15);
+
+ /* Sum and store */
+ m8 = _mm_add_ps(m0, m2);
+ m9 = _mm_add_ps(m1, m3);
+ m10 = _mm_add_ps(m4, m6);
+ m11 = _mm_add_ps(m5, m7);
+
+ m0 = _mm_add_ps(m8, m10);
+ m1 = _mm_add_ps(m9, m11);
+ m2 = _mm_hadd_ps(m0, m1);
+ m3 = _mm_hadd_ps(m2, m2);
+
+ _mm_store_ss(&y[2 * i + 0], m3);
+ m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0, 3, 2, 1));
+ _mm_store_ss(&y[2 * i + 1], m3);
+ }
+}
+
+/* 20-tap SSE complex-real convolution */
+static void sse_conv_real20(float *restrict x,
+ float *restrict h,
+ float *restrict y,
+ int len)
+{
+ __m128 m0, m1, m2, m3, m4, m5, m6, m7;
+ __m128 m8, m9, m11, m12, m13, m14, m15;
+
+ /* Load (aligned) filter taps */
+ m0 = _mm_load_ps(&h[0]);
+ m1 = _mm_load_ps(&h[4]);
+ m2 = _mm_load_ps(&h[8]);
+ m3 = _mm_load_ps(&h[12]);
+ m4 = _mm_load_ps(&h[16]);
+ m5 = _mm_load_ps(&h[20]);
+ m6 = _mm_load_ps(&h[24]);
+ m7 = _mm_load_ps(&h[28]);
+ m8 = _mm_load_ps(&h[32]);
+ m9 = _mm_load_ps(&h[36]);
+
+ m11 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m12 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m13 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
+ m14 = _mm_shuffle_ps(m6, m7, _MM_SHUFFLE(0, 2, 0, 2));
+ m15 = _mm_shuffle_ps(m8, m9, _MM_SHUFFLE(0, 2, 0, 2));
+
+ for (int i = 0; i < len; i++) {
+ /* Multiply-accumulate first 12 taps */
+ m0 = _mm_loadu_ps(&x[2 * i + 0]);
+ m1 = _mm_loadu_ps(&x[2 * i + 4]);
+ m2 = _mm_loadu_ps(&x[2 * i + 8]);
+ m3 = _mm_loadu_ps(&x[2 * i + 12]);
+ m4 = _mm_loadu_ps(&x[2 * i + 16]);
+ m5 = _mm_loadu_ps(&x[2 * i + 20]);
+
+ m6 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m7 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m8 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m9 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+ m0 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(0, 2, 0, 2));
+ m1 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(1, 3, 1, 3));
+
+ m2 = _mm_mul_ps(m6, m11);
+ m3 = _mm_mul_ps(m7, m11);
+ m4 = _mm_mul_ps(m8, m12);
+ m5 = _mm_mul_ps(m9, m12);
+ m6 = _mm_mul_ps(m0, m13);
+ m7 = _mm_mul_ps(m1, m13);
+
+ m0 = _mm_add_ps(m2, m4);
+ m1 = _mm_add_ps(m3, m5);
+ m8 = _mm_add_ps(m0, m6);
+ m9 = _mm_add_ps(m1, m7);
+
+ /* Multiply-accumulate last 8 taps */
+ m0 = _mm_loadu_ps(&x[2 * i + 24]);
+ m1 = _mm_loadu_ps(&x[2 * i + 28]);
+ m2 = _mm_loadu_ps(&x[2 * i + 32]);
+ m3 = _mm_loadu_ps(&x[2 * i + 36]);
+
+ m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m6 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+
+ m0 = _mm_mul_ps(m4, m14);
+ m1 = _mm_mul_ps(m5, m14);
+ m2 = _mm_mul_ps(m6, m15);
+ m3 = _mm_mul_ps(m7, m15);
+
+ m4 = _mm_add_ps(m0, m2);
+ m5 = _mm_add_ps(m1, m3);
+
+ /* Final sum and store */
+ m0 = _mm_add_ps(m8, m4);
+ m1 = _mm_add_ps(m9, m5);
+ m2 = _mm_hadd_ps(m0, m1);
+ m3 = _mm_hadd_ps(m2, m2);
+
+ _mm_store_ss(&y[2 * i + 0], m3);
+ m3 = _mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0, 3, 2, 1));
+ _mm_store_ss(&y[2 * i + 1], m3);
+ }
+}
+
+/* 4*N-tap SSE complex-complex convolution */
+static void sse_conv_cmplx_4n(float *x, float *h, float *y, int h_len, int len)
+{
+ __m128 m0, m1, m2, m3, m4, m5, m6, m7;
+
+ for (int i = 0; i < len; i++) {
+ /* Zero */
+ m6 = _mm_setzero_ps();
+ m7 = _mm_setzero_ps();
+
+ for (int n = 0; n < h_len / 4; n++) {
+ /* Load (aligned) filter taps */
+ m0 = _mm_load_ps(&h[8 * n + 0]);
+ m1 = _mm_load_ps(&h[8 * n + 4]);
+ m2 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m3 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Load (unaligned) input data */
+ m0 = _mm_loadu_ps(&x[2 * i + 8 * n + 0]);
+ m1 = _mm_loadu_ps(&x[2 * i + 8 * n + 4]);
+ m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Quad multiply */
+ m0 = _mm_mul_ps(m2, m4);
+ m1 = _mm_mul_ps(m3, m5);
+
+ m2 = _mm_mul_ps(m2, m5);
+ m3 = _mm_mul_ps(m3, m4);
+
+ /* Sum */
+ m0 = _mm_sub_ps(m0, m1);
+ m2 = _mm_add_ps(m2, m3);
+
+ /* Accumulate */
+ m6 = _mm_add_ps(m6, m0);
+ m7 = _mm_add_ps(m7, m2);
+ }
+
+ m0 = _mm_hadd_ps(m6, m7);
+ m0 = _mm_hadd_ps(m0, m0);
+
+ _mm_store_ss(&y[2 * i + 0], m0);
+ m0 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(0, 3, 2, 1));
+ _mm_store_ss(&y[2 * i + 1], m0);
+ }
+}
+
+/* 8*N-tap SSE complex-complex convolution */
+static void sse_conv_cmplx_8n(float *x, float *h, float *y, int h_len, int len)
+{
+ __m128 m0, m1, m2, m3, m4, m5, m6, m7;
+ __m128 m8, m9, m10, m11, m12, m13, m14, m15;
+
+ for (int i = 0; i < len; i++) {
+ /* Zero */
+ m12 = _mm_setzero_ps();
+ m13 = _mm_setzero_ps();
+ m14 = _mm_setzero_ps();
+ m15 = _mm_setzero_ps();
+
+ for (int n = 0; n < h_len / 8; n++) {
+ /* Load (aligned) filter taps */
+ m0 = _mm_load_ps(&h[16 * n + 0]);
+ m1 = _mm_load_ps(&h[16 * n + 4]);
+ m2 = _mm_load_ps(&h[16 * n + 8]);
+ m3 = _mm_load_ps(&h[16 * n + 12]);
+
+ m4 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m5 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m6 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 2, 0, 2));
+ m7 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Load (unaligned) input data */
+ m0 = _mm_loadu_ps(&x[2 * i + 16 * n + 0]);
+ m1 = _mm_loadu_ps(&x[2 * i + 16 * n + 4]);
+ m2 = _mm_loadu_ps(&x[2 * i + 16 * n + 8]);
+ m3 = _mm_loadu_ps(&x[2 * i + 16 * n + 12]);
+
+ m8 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(0, 2, 0, 2));
+ m9 = _mm_shuffle_ps(m0, m1, _MM_SHUFFLE(1, 3, 1, 3));
+ m10 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(0, 2, 0, 2));
+ m11 = _mm_shuffle_ps(m2, m3, _MM_SHUFFLE(1, 3, 1, 3));
+
+ /* Quad multiply */
+ m0 = _mm_mul_ps(m4, m8);
+ m1 = _mm_mul_ps(m5, m9);
+ m2 = _mm_mul_ps(m6, m10);
+ m3 = _mm_mul_ps(m7, m11);
+
+ m4 = _mm_mul_ps(m4, m9);
+ m5 = _mm_mul_ps(m5, m8);
+ m6 = _mm_mul_ps(m6, m11);
+ m7 = _mm_mul_ps(m7, m10);
+
+ /* Sum */
+ m0 = _mm_sub_ps(m0, m1);
+ m2 = _mm_sub_ps(m2, m3);
+ m4 = _mm_add_ps(m4, m5);
+ m6 = _mm_add_ps(m6, m7);
+
+ /* Accumulate */
+ m12 = _mm_add_ps(m12, m0);
+ m13 = _mm_add_ps(m13, m2);
+ m14 = _mm_add_ps(m14, m4);
+ m15 = _mm_add_ps(m15, m6);
+ }
+
+ m0 = _mm_add_ps(m12, m13);
+ m1 = _mm_add_ps(m14, m15);
+ m2 = _mm_hadd_ps(m0, m1);
+ m2 = _mm_hadd_ps(m2, m2);
+
+ _mm_store_ss(&y[2 * i + 0], m2);
+ m2 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 3, 2, 1));
+ _mm_store_ss(&y[2 * i + 1], m2);
+ }
+}
+#endif
+
+/* Base multiply and accumulate complex-real */
+static void mac_real(float *x, float *h, float *y)
+{
+ y[0] += x[0] * h[0];
+ y[1] += x[1] * h[0];
+}
+
+/* Base multiply and accumulate complex-complex */
+static void mac_cmplx(float *x, float *h, float *y)
+{
+ y[0] += x[0] * h[0] - x[1] * h[1];
+ y[1] += x[0] * h[1] + x[1] * h[0];
+}
+
+/* Base vector complex-complex multiply and accumulate */
+static void mac_real_vec_n(float *x, float *h, float *y,
+ int len, int step, int offset)
+{
+ for (int i = offset; i < len; i += step)
+ mac_real(&x[2 * i], &h[2 * i], y);
+}
+
+/* Base vector complex-complex multiply and accumulate */
+static void mac_cmplx_vec_n(float *x, float *h, float *y,
+ int len, int step, int offset)
+{
+ for (int i = offset; i < len; i += step)
+ mac_cmplx(&x[2 * i], &h[2 * i], y);
+}
+
+/* Base complex-real convolution */
+static int _base_convolve_real(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset)
+{
+ for (int i = 0; i < len; i++) {
+ mac_real_vec_n(&x[2 * (i - (h_len - 1) + start)],
+ h,
+ &y[2 * i], h_len,
+ step, offset);
+ }
+
+ return len;
+}
+
+/* Base complex-complex convolution */
+static int _base_convolve_complex(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset)
+{
+ for (int i = 0; i < len; i++) {
+ mac_cmplx_vec_n(&x[2 * (i - (h_len - 1) + start)],
+ h,
+ &y[2 * i],
+ h_len, step, offset);
+ }
+
+ return len;
+}
+
+/* Buffer validity checks */
+static int bounds_check(int x_len, int h_len, int y_len,
+ int start, int len, int step)
+{
+ if ((x_len < 1) || (h_len < 1) ||
+ (y_len < 1) || (len < 1) || (step < 1)) {
+ fprintf(stderr, "Convolve: Invalid input\n");
+ return -1;
+ }
+
+ if ((start + len > x_len) || (len > y_len) || (x_len < h_len)) {
+ fprintf(stderr, "Convolve: Boundary exception\n");
+ fprintf(stderr, "start: %i, len: %i, x: %i, h: %i, y: %i\n",
+ start, len, x_len, h_len, y_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* API: Aligned complex-real */
+int convolve_real(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset)
+{
+ void (*conv_func)(float *, float *, float *, int);
+
+ if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
+ return -1;
+
+ memset(y, 0, len * 2 * sizeof(float));
+
+#ifdef HAVE_SSE3
+ if (step <= 4) {
+ switch (h_len) {
+ case 4:
+ conv_func = sse_conv_real4;
+ break;
+ case 8:
+ conv_func = sse_conv_real8;
+ break;
+ case 12:
+ conv_func = sse_conv_real12;
+ break;
+ case 16:
+ conv_func = sse_conv_real16;
+ break;
+ case 20:
+ conv_func = sse_conv_real20;
+ break;
+ default:
+ conv_func = NULL;
+ }
+ } else {
+ conv_func = NULL;
+ }
+#else
+ conv_func = NULL;
+#endif
+ if (conv_func) {
+ conv_func(&x[2 * (-(h_len - 1) + start)],
+ h, y, len);
+ } else {
+ _base_convolve_real(x, x_len,
+ h, h_len,
+ y, y_len,
+ start, len, step, offset);
+ }
+
+ return len;
+}
+
+/* API: Aligned complex-complex */
+int convolve_complex(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset)
+{
+ void (*conv_func)(float *, float *, float *, int, int);
+
+ if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
+ return -1;
+
+ memset(y, 0, len * 2 * sizeof(float));
+
+#ifdef HAVE_SSE3
+ if (step <= 4) {
+ if (!(h_len % 8))
+ conv_func = sse_conv_cmplx_8n;
+ else if (!(h_len % 4))
+ conv_func = sse_conv_cmplx_4n;
+ } else
+ conv_func = NULL;
+#else
+ conv_func = NULL;
+#endif
+ if (conv_func) {
+ conv_func(&x[2 * (-(h_len - 1) + start)],
+ h, y, h_len, len);
+ } else {
+ _base_convolve_complex(x, x_len,
+ h, h_len,
+ y, y_len,
+ start, len, step, offset);
+ }
+
+ return len;
+}
+
+/* API: Non-aligned (no SSE) complex-real */
+int base_convolve_real(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset)
+{
+ if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
+ return -1;
+
+ memset(y, 0, len * 2 * sizeof(float));
+
+ return _base_convolve_real(x, x_len,
+ h, h_len,
+ y, y_len,
+ start, len, step, offset);
+}
+
+/* API: Non-aligned (no SSE) complex-complex */
+int base_convolve_complex(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset)
+{
+ if (bounds_check(x_len, h_len, y_len, start, len, step) < 0)
+ return -1;
+
+ memset(y, 0, len * 2 * sizeof(float));
+
+ return _base_convolve_complex(x, x_len,
+ h, h_len,
+ y, y_len,
+ start, len, step, offset);
+}
+
+/* Aligned filter tap allocation */
+void *convolve_h_alloc(int len)
+{
+#ifdef HAVE_SSE3
+ return memalign(16, len * 2 * sizeof(float));
+#else
+ return malloc(len * 2 * sizeof(float));
+#endif
+}
diff --git a/Transceiver52M/convolve.h b/Transceiver52M/convolve.h
new file mode 100644
index 0000000..aef9953
--- /dev/null
+++ b/Transceiver52M/convolve.h
@@ -0,0 +1,30 @@
+#ifndef _CONVOLVE_H_
+#define _CONVOLVE_H_
+
+void *convolve_h_alloc(int num);
+
+int convolve_real(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset);
+
+int convolve_complex(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset);
+
+int base_convolve_real(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset);
+
+int base_convolve_complex(float *x, int x_len,
+ float *h, int h_len,
+ float *y, int y_len,
+ int start, int len,
+ int step, int offset);
+
+#endif /* _CONVOLVE_H_ */
diff --git a/Transceiver52M/sigProcLib.cpp b/Transceiver52M/sigProcLib.cpp
index 2ccc714..2f67a67 100644
--- a/Transceiver52M/sigProcLib.cpp
+++ b/Transceiver52M/sigProcLib.cpp
@@ -27,6 +27,10 @@
#include "sendLPF_961.h"
#include "rcvLPF_651.h"
+extern "C" {
+#include "convolve.h"
+}
+
using namespace GSM;
#define TABLESIZE 1024
@@ -45,28 +49,35 @@ signalVector *GMSKRotation = NULL;
signalVector *GMSKReverseRotation = NULL;
/*
- * RACH and midamble correlation waveforms
+ * RACH and midamble correlation waveforms. Store the buffer separately
+ * because we need to allocate it explicitly outside of the signal vector
+ * constructor. This is because C++ (prior to C++11) is unable to natively
+ * perform 16-byte memory alignment required by many SSE instructions.
*/
struct CorrelationSequence {
- CorrelationSequence() : sequence(NULL)
+ CorrelationSequence() : sequence(NULL), buffer(NULL)
{
}
~CorrelationSequence()
{
delete sequence;
+ free(buffer);
}
signalVector *sequence;
+ void *buffer;
float TOA;
complex gain;
};
/*
- * Gaussian and empty modulation pulses
+ * Gaussian and empty modulation pulses. Like the correlation sequences,
+ * store the runtime (Gaussian) buffer separately because of needed alignment
+ * for SSE instructions.
*/
struct PulseSequence {
- PulseSequence() : gaussian(NULL), empty(NULL)
+ PulseSequence() : gaussian(NULL), empty(NULL), buffer(NULL)
{
}
@@ -74,10 +85,12 @@ struct PulseSequence {
{
delete gaussian;
delete empty;
+ free(buffer);
}
signalVector *gaussian;
signalVector *empty;
+ void *buffer;
};
CorrelationSequence *gMidambles[] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL};
@@ -246,7 +259,7 @@ void initGMSKRotationTables(int sps)
bool sigProcLibSetup(int sps)
{
- if ((sps != 0) && (sps != 2) && (sps != 4))
+ if ((sps != 1) && (sps != 2) && (sps != 4))
return false;
initTrigTables();
@@ -295,174 +308,106 @@ void GMSKReverseRotate(signalVector &x) {
}
}
-
-signalVector* convolve(const signalVector *a,
- const signalVector *b,
- signalVector *c,
- ConvType spanType,
- unsigned startIx,
- unsigned len)
+signalVector *convolve(const signalVector *x,
+ const signalVector *h,
+ signalVector *y,
+ ConvType spanType, int start,
+ unsigned len, unsigned step, int offset)
{
- if ((a==NULL) || (b==NULL)) return NULL;
- int La = a->size();
- int Lb = b->size();
-
- int startIndex;
- unsigned int outSize;
- switch (spanType) {
- case FULL_SPAN:
- startIndex = 0;
- outSize = La+Lb-1;
- break;
- case OVERLAP_ONLY:
- startIndex = La;
- outSize = abs(La-Lb)+1;
- break;
- case START_ONLY:
- startIndex = 0;
- outSize = La;
- break;
- case WITH_TAIL:
- startIndex = Lb;
- outSize = La;
- break;
- case NO_DELAY:
- if (Lb % 2)
- startIndex = Lb/2;
- else
- startIndex = Lb/2-1;
- outSize = La;
- break;
- case CUSTOM:
- startIndex = startIx;
- outSize = len;
- break;
- default:
- return NULL;
- }
+ int rc, head = 0, tail = 0;
+ bool alloc = false, append = false;
+ const signalVector *_x = NULL;
-
- if (c==NULL)
- c = new signalVector(outSize);
- else if (c->size()!=outSize)
+ if (!x || !h)
return NULL;
- signalVector::const_iterator aStart = a->begin();
- signalVector::const_iterator bStart = b->begin();
- signalVector::const_iterator aEnd = a->end();
- signalVector::const_iterator bEnd = b->end();
- signalVector::iterator cPtr = c->begin();
- int t = startIndex;
- int stopIndex = startIndex + outSize;
- switch (b->getSymmetry()) {
- case NONE:
- {
- while (t < stopIndex) {
- signalVector::const_iterator aP = aStart+t;
- signalVector::const_iterator bP = bStart;
- if (a->isRealOnly() && b->isRealOnly()) {
- float sum = 0.0;
- while (bP < bEnd) {
- if (aP < aStart) break;
- if (aP < aEnd) sum += (aP->real())*(bP->real());
- aP--;
- bP++;
- }
- *cPtr++ = sum;
- }
- else if (a->isRealOnly()) {
- complex sum = 0.0;
- while (bP < bEnd) {
- if (aP < aStart) break;
- if (aP < aEnd) sum += (*bP)*(aP->real());
- aP--;
- bP++;
- }
- *cPtr++ = sum;
- }
- else if (b->isRealOnly()) {
- complex sum = 0.0;
- while (bP < bEnd) {
- if (aP < aStart) break;
- if (aP < aEnd) sum += (*aP)*(bP->real());
- aP--;
- bP++;
- }
- *cPtr++ = sum;
- }
- else {
- complex sum = 0.0;
- while (bP < bEnd) {
- if (aP < aStart) break;
- if (aP < aEnd) sum += (*aP)*(*bP);
- aP--;
- bP++;
- }
- *cPtr++ = sum;
- }
- t++;
- }
- }
+ switch (spanType) {
+ case START_ONLY:
+ start = 0;
+ head = h->size();
+ len = x->size();
+ append = true;
break;
- case ABSSYM:
- {
- complex sum = 0.0;
- bool isOdd = (bool) (Lb % 2);
- if (isOdd)
- bEnd = bStart + (Lb+1)/2;
- else
- bEnd = bStart + Lb/2;
- while (t < stopIndex) {
- signalVector::const_iterator aP = aStart+t;
- signalVector::const_iterator aPsym = aP-Lb+1;
- signalVector::const_iterator bP = bStart;
- sum = 0.0;
- if (!b->isRealOnly()) {
- while (bP < bEnd) {
- if (aP < aStart) break;
- if (aP == aPsym)
- sum+= (*aP)*(*bP);
- else if ((aP < aEnd) && (aPsym >= aStart))
- sum+= ((*aP)+(*aPsym))*(*bP);
- else if (aP < aEnd)
- sum += (*aP)*(*bP);
- else if (aPsym >= aStart)
- sum += (*aPsym)*(*bP);
- aP--;
- aPsym++;
- bP++;
- }
- }
- else {
- while (bP < bEnd) {
- if (aP < aStart) break;
- if (aP == aPsym)
- sum+= (*aP)*(bP->real());
- else if ((aP < aEnd) && (aPsym >= aStart))
- sum+= ((*aP)+(*aPsym))*(bP->real());
- else if (aP < aEnd)
- sum += (*aP)*(bP->real());
- else if (aPsym >= aStart)
- sum += (*aPsym)*(bP->real());
- aP--;
- aPsym++;
- bP++;
- }
- }
- *cPtr++ = sum;
- t++;
- }
+ case NO_DELAY:
+ start = h->size() / 2;
+ head = start;
+ tail = start;
+ len = x->size();
+ append = true;
+ break;
+ case CUSTOM:
+ if (start < h->size() - 1) {
+ head = h->size() - start;
+ append = true;
+ }
+ if (start + len > x->size()) {
+ tail = start + len - x->size();
+ append = true;
}
break;
default:
return NULL;
- break;
}
-
-
- return c;
-}
+ /*
+ * Error if the output vector is too small. Create the output vector
+ * if the pointer is NULL.
+ */
+ if (y && (len > y->size()))
+ return NULL;
+ if (!y) {
+ y = new signalVector(len);
+ alloc = true;
+ }
+
+ /* Prepend or post-pend the input vector if the parameters require it */
+ if (append)
+ _x = new signalVector(*x, head, tail);
+ else
+ _x = x;
+
+ /*
+ * Four convovle types:
+ * 1. Complex-Real (aligned)
+ * 2. Complex-Complex (aligned)
+ * 3. Complex-Real (!aligned)
+ * 4. Complex-Complex (!aligned)
+ */
+ if (h->isRealOnly() && h->isAligned()) {
+ rc = convolve_real((float *) _x->begin(), _x->size(),
+ (float *) h->begin(), h->size(),
+ (float *) y->begin(), y->size(),
+ start, len, step, offset);
+ } else if (!h->isRealOnly() && h->isAligned()) {
+ rc = convolve_complex((float *) _x->begin(), _x->size(),
+ (float *) h->begin(), h->size(),
+ (float *) y->begin(), y->size(),
+ start, len, step, offset);
+ } else if (h->isRealOnly() && !h->isAligned()) {
+ rc = base_convolve_real((float *) _x->begin(), _x->size(),
+ (float *) h->begin(), h->size(),
+ (float *) y->begin(), y->size(),
+ start, len, step, offset);
+ } else if (!h->isRealOnly() && !h->isAligned()) {
+ rc = base_convolve_complex((float *) _x->begin(), _x->size(),
+ (float *) h->begin(), h->size(),
+ (float *) y->begin(), y->size(),
+ start, len, step, offset);
+ } else {
+ rc = -1;
+ }
+
+ if (append)
+ delete _x;
+
+ if (rc < 0) {
+ if (alloc)
+ delete y;
+ return NULL;
+ }
+
+ return y;
+}
void generateGSMPulse(int sps, int symbolLength)
{
@@ -477,9 +422,17 @@ void generateGSMPulse(int sps, int symbolLength)
GSMPulse->empty->isRealOnly(true);
*(GSMPulse->empty->begin()) = 1.0f;
+ len = sps * symbolLength;
+ if (len < 4)
+ len = 4;
+
/* GSM pulse approximation */
- GSMPulse->gaussian = new signalVector(len);
+ GSMPulse->buffer = convolve_h_alloc(len);
+ GSMPulse->gaussian = new signalVector((complex *)
+ GSMPulse->buffer, 0, len);
+ GSMPulse->gaussian->setAligned(true);
GSMPulse->gaussian->isRealOnly(true);
+
signalVector::iterator xP = GSMPulse->gaussian->begin();
center = (float) (len - 1.0) / 2.0;
@@ -560,31 +513,6 @@ signalVector* reverseConjugate(signalVector *b)
return tmp;
}
-signalVector* correlate(signalVector *a,
- signalVector *b,
- signalVector *c,
- ConvType spanType,
- bool bReversedConjugated,
- unsigned startIx,
- unsigned len)
-{
- signalVector *tmp = NULL;
-
- if (!bReversedConjugated) {
- tmp = reverseConjugate(b);
- }
- else {
- tmp = b;
- }
-
- c = convolve(a,tmp,c,spanType,startIx,len);
-
- if (!bReversedConjugated) delete tmp;
-
- return c;
-}
-
-
/* soft output slicer */
bool vectorSlicer(signalVector *x)
{
@@ -599,12 +527,13 @@ bool vectorSlicer(signalVector *x)
}
return true;
}
-
+
+/* Assume input bits are not differentially encoded */
signalVector *modulateBurst(const BitVector &wBurst, int guardPeriodLength,
int sps, bool emptyPulse)
{
int burstLen;
- signalVector *pulse, modBurst;
+ signalVector *pulse, *shapedBurst, modBurst;
signalVector::iterator modBurstItr;
if (emptyPulse)
@@ -628,7 +557,9 @@ signalVector *modulateBurst(const BitVector &wBurst, int guardPeriodLength,
modBurst.isRealOnly(false);
// filter w/ pulse shape
- signalVector *shapedBurst = convolve(&modBurst, pulse, NULL, NO_DELAY);
+ shapedBurst = convolve(&modBurst, pulse, NULL, START_ONLY);
+ if (!shapedBurst)
+ return NULL;
return shapedBurst;
}
@@ -639,13 +570,12 @@ float sinc(float x)
return 1.0F;
}
-void delayVector(signalVector &wBurst,
- float delay)
+bool delayVector(signalVector &wBurst, float delay)
{
int intOffset = (int) floor(delay);
float fracOffset = delay - intOffset;
-
+
// do fractional shift first, only do it for reasonable offsets
if (fabs(fracOffset) > 1e-2) {
// create sinc function
@@ -656,7 +586,8 @@ void delayVector(signalVector &wBurst,
*sincBurstItr++ = (complex) sinc(M_PI_F*(i-10-fracOffset));
signalVector shiftedBurst(wBurst.size());
- convolve(&wBurst,&sincVector,&shiftedBurst,NO_DELAY);
+ if (!convolve(&wBurst, &sincVector, &shiftedBurst, NO_DELAY))
+ return false;
wBurst.clone(shiftedBurst);
}
@@ -861,25 +792,25 @@ bool generateMidamble(int sps, int tsc)
bool status = true;
complex *data = NULL;
signalVector *autocorr = NULL, *midamble = NULL;
- signalVector *midMidamble = NULL;
+ signalVector *midMidamble = NULL, *_midMidamble = NULL;
- if ((tsc < 0) || (tsc > 7))
+ if ((tsc < 0) || (tsc > 7))
return false;
delete gMidambles[tsc];
-
+
/* Use middle 16 bits of each TSC. Correlation sequence is not pulse shaped */
midMidamble = modulateBurst(gTrainingSequence[tsc].segment(5,16), 0, sps, true);
if (!midMidamble)
return false;
- /* Simulated receive sequence is pulse shaped */
+ /* Simulated receive sequence is pulse shaped */
midamble = modulateBurst(gTrainingSequence[tsc], 0, sps, false);
if (!midamble) {
status = false;
goto release;
}
-
+
// NOTE: Because ideal TSC 16-bit midamble is 66 symbols into burst,
// the ideal TSC has an + 180 degree phase shift,
// due to the pi/2 frequency shift, that
@@ -890,22 +821,32 @@ bool generateMidamble(int sps, int tsc)
conjugateVector(*midMidamble);
- autocorr = correlate(midamble, midMidamble, NULL, NO_DELAY);
+ /* For SSE alignment, reallocate the midamble sequence on 16-byte boundary */
+ data = (complex *) convolve_h_alloc(midMidamble->size());
+ _midMidamble = new signalVector(data, 0, midMidamble->size());
+ _midMidamble->setAligned(true);
+ memcpy(_midMidamble->begin(), midMidamble->begin(),
+ midMidamble->size() * sizeof(complex));
+
+ autocorr = convolve(midamble, _midMidamble, NULL, NO_DELAY);
if (!autocorr) {
status = false;
goto release;
}
gMidambles[tsc] = new CorrelationSequence;
- gMidambles[tsc]->sequence = midMidamble;
- gMidambles[tsc]->gain = peakDetect(*autocorr,&gMidambles[tsc]->TOA,NULL);
+ gMidambles[tsc]->buffer = data;
+ gMidambles[tsc]->sequence = _midMidamble;
+ gMidambles[tsc]->gain = peakDetect(*autocorr,&gMidambles[tsc]->TOA, NULL);
release:
delete autocorr;
delete midamble;
+ delete midMidamble;
if (!status) {
- delete midMidamble;
+ delete _midMidamble;
+ free(data);
gMidambles[tsc] = NULL;
}
@@ -917,7 +858,7 @@ bool generateRACHSequence(int sps)
bool status = true;
complex *data = NULL;
signalVector *autocorr = NULL;
- signalVector *seq0 = NULL, *seq1 = NULL;
+ signalVector *seq0 = NULL, *seq1 = NULL, *_seq1 = NULL;
delete gRACHSequence;
@@ -933,74 +874,100 @@ bool generateRACHSequence(int sps)
conjugateVector(*seq1);
- autocorr = new signalVector(seq0->size());
- if (!convolve(seq0, seq1, autocorr, NO_DELAY)) {
+ /* For SSE alignment, reallocate the midamble sequence on 16-byte boundary */
+ data = (complex *) convolve_h_alloc(seq1->size());
+ _seq1 = new signalVector(data, 0, seq1->size());
+ _seq1->setAligned(true);
+ memcpy(_seq1->begin(), seq1->begin(), seq1->size() * sizeof(complex));
+
+ autocorr = convolve(seq0, _seq1, autocorr, NO_DELAY);
+ if (!autocorr) {
status = false;
goto release;
}
gRACHSequence = new CorrelationSequence;
- gRACHSequence->sequence = seq1;
- gRACHSequence->gain = peakDetect(*autocorr,&gRACHSequence->TOA,NULL);
+ gRACHSequence->sequence = _seq1;
+ gRACHSequence->buffer = data;
+ gRACHSequence->gain = peakDetect(*autocorr,&gRACHSequence->TOA, NULL);
release:
delete autocorr;
delete seq0;
+ delete seq1;
if (!status) {
- delete seq1;
+ delete _seq1;
+ free(data);
gRACHSequence = NULL;
}
return status;
}
-
-bool detectRACHBurst(signalVector &rxBurst,
- float detectThreshold,
- int sps,
- complex *amplitude,
- float* TOA)
-{
- //static complex staticData[500];
-
- //signalVector correlatedRACH(staticData,0,rxBurst.size());
- signalVector correlatedRACH(rxBurst.size());
- correlate(&rxBurst,gRACHSequence->sequence,&correlatedRACH,NO_DELAY,true);
+int detectRACHBurst(signalVector &rxBurst,
+ float thresh,
+ int sps,
+ complex *amp,
+ float *toa)
+{
+ int start, len, num = 0;
+ float _toa, rms, par, avg = 0.0f;
+ complex _amp, *peak;
+ signalVector corr, *sync = gRACHSequence->sequence;
- float meanPower;
- complex peakAmpl = peakDetect(correlatedRACH,TOA,&meanPower);
+ if ((sps != 1) && (sps != 2) && (sps != 4))
+ return -1;
- float valleyPower = 0.0;
+ start = 40 * sps;
+ len = 24 * sps;
+ corr = signalVector(len);
- // check for bogus results
- if ((*TOA < 0.0) || (*TOA > correlatedRACH.size())) {
- *amplitude = 0.0;
- return false;
+ if (!convolve(&rxBurst, sync, &corr,
+ CUSTOM, start, len, sps, 0)) {
+ return -1;
}
- complex *peakPtr = correlatedRACH.begin() + (int) rint(*TOA);
- float numSamples = 0.0;
- for (int i = 57 * sps; i <= 107 * sps; i++) {
- if (peakPtr+i >= correlatedRACH.end())
- break;
- valleyPower += (peakPtr+i)->norm2();
- numSamples++;
- }
+ _amp = peakDetect(corr, &_toa, NULL);
+ if ((_toa < 3) || (_toa > len - 3))
+ goto notfound;
- if (numSamples < 2) {
- *amplitude = 0.0;
- return false;
+ peak = corr.begin() + (int) rint(_toa);
+
+ for (int i = 2 * sps; i <= 5 * sps; i++) {
+ if (peak - i >= corr.begin()) {
+ avg += (peak - i)->norm2();
+ num++;
+ }
+ if (peak + i < corr.end()) {
+ avg += (peak + i)->norm2();
+ num++;
+ }
}
- float RMS = sqrtf(valleyPower/(float) numSamples)+0.00001;
- float peakToMean = peakAmpl.abs()/RMS;
+ if (num < 2)
+ goto notfound;
+
+ rms = sqrtf(avg / (float) num) + 0.00001;
+ par = _amp.abs() / rms;
+ if (par < thresh)
+ goto notfound;
+
+ /* Subtract forward tail bits from delay */
+ if (toa)
+ *toa = _toa - 8 * sps;
+ if (amp)
+ *amp = _amp / gRACHSequence->gain;
- *amplitude = peakAmpl/(gRACHSequence->gain);
+ return 1;
- *TOA = (*TOA) - gRACHSequence->TOA - 8 * sps;
+notfound:
+ if (amp)
+ *amp = 0.0f;
+ if (toa)
+ *toa = 0.0f;
- return (peakToMean > detectThreshold);
+ return 0;
}
bool energyDetect(signalVector &rxBurst,
@@ -1020,120 +987,95 @@ bool energyDetect(signalVector &rxBurst,
if (avgPwr) *avgPwr = energy/windowLength;
return (energy/windowLength > detectThreshold*detectThreshold);
}
-
-bool analyzeTrafficBurst(signalVector &rxBurst,
- unsigned TSC,
- float detectThreshold,
- int sps,
- complex *amplitude,
- float *TOA,
- unsigned maxTOA,
- bool requestChannel,
- signalVector **channelResponse,
- float *channelResponseOffset)
+int analyzeTrafficBurst(signalVector &rxBurst, unsigned tsc, float thresh,
+ int sps, complex *amp, float *toa, unsigned max_toa,
+ bool chan_req, signalVector **chan, float *chan_offset)
{
+ int start, target, len, num = 0;
+ complex _amp, *peak;
+ float _toa, rms, par, avg = 0.0f;
+ signalVector corr, *sync, *_chan;
- assert(TSC<8);
- assert(amplitude);
- assert(TOA);
- assert(gMidambles[TSC]);
-
- if (maxTOA < 3*sps) maxTOA = 3*sps;
- unsigned spanTOA = maxTOA;
- if (spanTOA < 5*sps) spanTOA = 5*sps;
+ if ((tsc < 0) || (tsc > 7) || ((sps != 1) && (sps != 2) && (sps != 4)))
+ return -1;
- unsigned startIx = 66*sps-spanTOA;
- unsigned endIx = (66+16)*sps+spanTOA;
- unsigned windowLen = endIx - startIx;
- unsigned corrLen = 2*maxTOA+1;
+ target = 3 + 58 + 5 + 16;
+ start = (target - 8) * sps;
+ len = (8 + 8 + max_toa) * sps;
- unsigned expectedTOAPeak = (unsigned) round(gMidambles[TSC]->TOA + (gMidambles[TSC]->sequence->size()-1)/2);
+ sync = gMidambles[tsc]->sequence;
+ sync = gMidambles[tsc]->sequence;
+ corr = signalVector(len);
- signalVector burstSegment(rxBurst.begin(),startIx,windowLen);
-
- //static complex staticData[200];
- //signalVector correlatedBurst(staticData,0,corrLen);
- signalVector correlatedBurst(corrLen);
- correlate(&burstSegment, gMidambles[TSC]->sequence,
- &correlatedBurst, CUSTOM,true,
- expectedTOAPeak-maxTOA,corrLen);
+ if (!convolve(&rxBurst, sync, &corr,
+ CUSTOM, start, len, sps, 0)) {
+ return -1;
+ }
- float meanPower;
- *amplitude = peakDetect(correlatedBurst,TOA,&meanPower);
- float valleyPower = 0.0; //amplitude->norm2();
- complex *peakPtr = correlatedBurst.begin() + (int) rint(*TOA);
+ _amp = peakDetect(corr, &_toa, NULL);
+ peak = corr.begin() + (int) rint(_toa);
- // check for bogus results
- if ((*TOA < 0.0) || (*TOA > correlatedBurst.size())) {
- *amplitude = 0.0;
- return false;
- }
+ /* Check for bogus results */
+ if ((_toa < 0.0) || (_toa > corr.size()))
+ goto notfound;
- int numRms = 0;
- for (int i = 2*sps; i <= 5*sps;i++) {
- if (peakPtr - i >= correlatedBurst.begin()) {
- valleyPower += (peakPtr-i)->norm2();
- numRms++;
+ for (int i = 2 * sps; i <= 5 * sps; i++) {
+ if (peak - i >= corr.begin()) {
+ avg += (peak - i)->norm2();
+ num++;
}
- if (peakPtr + i < correlatedBurst.end()) {
- valleyPower += (peakPtr+i)->norm2();
- numRms++;
+ if (peak + i < corr.end()) {
+ avg += (peak + i)->norm2();
+ num++;
}
}
- if (numRms < 2) {
- // check for bogus results
- *amplitude = 0.0;
- return false;
- }
+ if (num < 2)
+ goto notfound;
- float RMS = sqrtf(valleyPower/(float)numRms)+0.00001;
- float peakToMean = (amplitude->abs())/RMS;
+ rms = sqrtf(avg / (float) num) + 0.00001;
+ par = (_amp.abs()) / rms;
+ if (par < thresh)
+ goto notfound;
- // NOTE: Because ideal TSC is 66 symbols into burst,
- // the ideal TSC has an +/- 180 degree phase shift,
- // due to the pi/4 frequency shift, that
- // needs to be accounted for.
-
- *amplitude = (*amplitude)/gMidambles[TSC]->gain;
- *TOA = (*TOA) - (maxTOA);
-
- if (requestChannel && (peakToMean > detectThreshold)) {
- float TOAoffset = maxTOA;
- delayVector(correlatedBurst,-(*TOA));
- // midamble only allows estimation of a 6-tap channel
- signalVector chanVector(6 * sps);
- float maxEnergy = -1.0;
- int maxI = -1;
- for (int i = 0; i < 7; i++) {
- if (TOAoffset + (i-5) * sps + chanVector.size() > correlatedBurst.size())
- continue;
- if (TOAoffset + (i-5) * sps < 0)
- continue;
- correlatedBurst.segmentCopyTo(chanVector,
- (int) floor(TOAoffset + (i - 5) * sps),
- chanVector.size());
- float energy = vectorNorm2(chanVector);
- if (energy > 0.95*maxEnergy) {
- maxI = i;
- maxEnergy = energy;
- }
- }
-
- *channelResponse = new signalVector(chanVector.size());
- correlatedBurst.segmentCopyTo(**channelResponse,
- (int) floor(TOAoffset + (maxI - 5) * sps),
- (*channelResponse)->size());
- scaleVector(**channelResponse, complex(1.0, 0.0) / gMidambles[TSC]->gain);
-
- if (channelResponseOffset)
- *channelResponseOffset = 5 * sps - maxI;
-
+ /*
+ * NOTE: Because ideal TSC is 66 symbols into burst,
+ * the ideal TSC has an +/- 180 degree phase shift,
+ * due to the pi/4 frequency shift, that
+ * needs to be accounted for.
+ */
+ if (amp)
+ *amp = _amp / gMidambles[tsc]->gain;
+
+ /* Delay one half of peak-centred correlation length */
+ _toa -= sps * 8;
+
+ if (toa)
+ *toa = _toa;
+
+ if (chan_req) {
+ _chan = new signalVector(6 * sps);
+
+ delayVector(corr, -_toa);
+ corr.segmentCopyTo(*_chan, target - 3, _chan->size());
+ scaleVector(*_chan, complex(1.0, 0.0) / gMidambles[tsc]->gain);
+
+ *chan = _chan;
+
+ if (chan_offset)
+ *chan_offset = 3.0 * sps;;
}
- return (peakToMean > detectThreshold);
-
+ return 1;
+
+notfound:
+ if (amp)
+ *amp = 0.0f;
+ if (toa)
+ *toa = 0.0f;
+
+ return 0;
}
signalVector *decimateVector(signalVector &wVector,
@@ -1479,10 +1421,15 @@ SoftVector *equalizeBurst(signalVector &rxBurst,
signalVector &w, // feedforward filter
signalVector &b) // feedback filter
{
+ signalVector *postForwardFull;
- delayVector(rxBurst,-TOA);
+ if (!delayVector(rxBurst, -TOA))
+ return NULL;
- signalVector* postForwardFull = convolve(&rxBurst,&w,NULL,FULL_SPAN);
+ postForwardFull = convolve(&rxBurst, &w, NULL,
+ CUSTOM, 0, rxBurst.size() + w.size() - 1);
+ if (!postForwardFull)
+ return NULL;
signalVector* postForward = new signalVector(rxBurst.size());
postForwardFull->segmentCopyTo(*postForward,w.size()-1,rxBurst.size());
diff --git a/Transceiver52M/sigProcLib.h b/Transceiver52M/sigProcLib.h
index ee152d5..194002f 100644
--- a/Transceiver52M/sigProcLib.h
+++ b/Transceiver52M/sigProcLib.h
@@ -27,13 +27,10 @@ enum Symmetry {
/** Convolution type indicator */
enum ConvType {
- FULL_SPAN = 0,
- OVERLAP_ONLY = 1,
- START_ONLY = 2,
- WITH_TAIL = 3,
- NO_DELAY = 4,
- CUSTOM = 5,
- UNDEFINED = 255
+ START_ONLY,
+ NO_DELAY,
+ CUSTOM,
+ UNDEFINED,
};
/** the core data structure of the Transceiver */
@@ -44,13 +41,14 @@ class signalVector: public Vector<complex>
Symmetry symmetry; ///< the symmetry of the vector
bool realOnly; ///< true if vector is real-valued, not complex-valued
-
+ bool aligned;
+
public:
/** Constructors */
signalVector(int dSize=0, Symmetry wSymmetry = NONE):
Vector<complex>(dSize),
- realOnly(false)
+ realOnly(false), aligned(false)
{
symmetry = wSymmetry;
};
@@ -58,26 +56,45 @@ class signalVector: public Vector<complex>
signalVector(complex* wData, size_t start,
size_t span, Symmetry wSymmetry = NONE):
Vector<complex>(NULL,wData+start,wData+start+span),
- realOnly(false)
+ realOnly(false), aligned(false)
{
symmetry = wSymmetry;
};
signalVector(const signalVector &vec1, const signalVector &vec2):
Vector<complex>(vec1,vec2),
- realOnly(false)
+ realOnly(false), aligned(false)
{
symmetry = vec1.symmetry;
};
signalVector(const signalVector &wVector):
Vector<complex>(wVector.size()),
- realOnly(false)
+ realOnly(false), aligned(false)
{
wVector.copyTo(*this);
symmetry = wVector.getSymmetry();
};
+ signalVector(size_t size, size_t start):
+ Vector<complex>(size + start),
+ realOnly(false), aligned(false)
+ {
+ mStart = mData + start;
+ symmetry = NONE;
+ };
+
+ signalVector(const signalVector &wVector, size_t start, size_t tail = 0):
+ Vector<complex>(start + wVector.size() + tail),
+ realOnly(false), aligned(false)
+ {
+ mStart = mData + start;
+ wVector.copyTo(*this);
+ memset(mData, 0, start * sizeof(complex));
+ memset(mStart + wVector.size(), 0, tail * sizeof(complex));
+ symmetry = NONE;
+ };
+
/** symmetry operators */
Symmetry getSymmetry() const { return symmetry;};
void setSymmetry(Symmetry wSymmetry) { symmetry = wSymmetry;};
@@ -85,6 +102,10 @@ class signalVector: public Vector<complex>
/** real-valued operators */
bool isRealOnly() const { return realOnly;};
void isRealOnly(bool wOnly) { realOnly = wOnly;};
+
+ /** alignment markers */
+ bool isAligned() const { return aligned; };
+ void setAligned(bool aligned) { this->aligned = aligned; };
};
/** Convert a linear number to a dB value */
@@ -110,14 +131,15 @@ void sigProcLibDestroy(void);
@param a,b The vectors to be convolved.
@param c, A preallocated vector to hold the convolution result.
@param spanType The type/span of the convolution.
- @return The convolution result.
+ @return The convolution result or NULL on error.
*/
-signalVector* convolve(const signalVector *a,
- const signalVector *b,
- signalVector *c,
- ConvType spanType,
- unsigned startIx = 0,
- unsigned len = 0);
+signalVector *convolve(const signalVector *a,
+ const signalVector *b,
+ signalVector *c,
+ ConvType spanType,
+ int start = 0,
+ unsigned len = 0,
+ unsigned step = 1, int offset = 0);
/**
Generate the GSM pulse.
@@ -169,8 +191,7 @@ signalVector *modulateBurst(const BitVector &wBurst,
float sinc(float x);
/** Delay a vector */
-void delayVector(signalVector &wBurst,
- float delay);
+bool delayVector(signalVector &wBurst, float delay);
/** Add two vectors in-place */
bool addVector(signalVector &x,
@@ -257,13 +278,13 @@ bool energyDetect(signalVector &rxBurst,
@param sps The number of samples per GSM symbol.
@param amplitude The estimated amplitude of received RACH burst.
@param TOA The estimate time-of-arrival of received RACH burst.
- @return True if burst SNR is larger that the detectThreshold value.
+ @return positive if threshold value is reached, negative on error, zero otherwise
*/
-bool detectRACHBurst(signalVector &rxBurst,
- float detectThreshold,
- int sps,
- complex *amplitude,
- float* TOA);
+int detectRACHBurst(signalVector &rxBurst,
+ float detectThreshold,
+ int sps,
+ complex *amplitude,
+ float* TOA);
/**
Normal burst correlator, detector, channel estimator.
@@ -277,18 +298,18 @@ bool detectRACHBurst(signalVector &rxBurst,
@param requestChannel Set to true if channel estimation is desired.
@param channelResponse The estimated channel.
@param channelResponseOffset The time offset b/w the first sample of the channel response and the reported TOA.
- @return True if burst SNR is larger that the detectThreshold value.
+ @return positive if threshold value is reached, negative on error, zero otherwise
*/
-bool analyzeTrafficBurst(signalVector &rxBurst,
- unsigned TSC,
- float detectThreshold,
- int sps,
- complex *amplitude,
- float *TOA,
- unsigned maxTOA,
- bool requestChannel = false,
- signalVector** channelResponse = NULL,
- float *channelResponseOffset = NULL);
+int analyzeTrafficBurst(signalVector &rxBurst,
+ unsigned TSC,
+ float detectThreshold,
+ int sps,
+ complex *amplitude,
+ float *TOA,
+ unsigned maxTOA,
+ bool requestChannel = false,
+ signalVector** channelResponse = NULL,
+ float *channelResponseOffset = NULL);
/**
Decimate a vector.
diff --git a/Transceiver52M/sigProcLibTest.cpp b/Transceiver52M/sigProcLibTest.cpp
deleted file mode 100644
index f673fcd..0000000
--- a/Transceiver52M/sigProcLibTest.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
-* Copyright 2011 Free Software Foundation, Inc.
-* Copyright 2008, 2010 Kestrel Signal Processing, Inc.
-*
-* This software is distributed under the terms of the GNU Affero Public License.
-* See the COPYING file in the main directory for details.
-*
-* This use of this software may be subject to additional restrictions.
-* See the LEGAL file in the main directory for details.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-*/
-
-/*
-Contributors:
-Harvind S. Samra, hssamra@kestrelsp.com
-*/
-
-
-#include "sigProcLib.h"
-//#include "radioInterface.h"
-#include <Logger.h>
-#include <Configuration.h>
-#include <GSMCommon.h>
-
-using namespace std;
-using namespace GSM;
-
-ConfigurationTable gConfig;
-
-int main(int argc, char **argv) {
-
- gLogInit("sigProcLibTest","DEBUG");
-
- int sps = 1;
-
- int TSC = 2;
-
- sigProcLibSetup(sps);
-
- signalVector *gsmPulse = generateGSMPulse(2,sps);
- cout << *gsmPulse << endl;
-
- BitVector RACHBurstStart = "01010101";
- BitVector RACHBurstRest = "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000";
-
- BitVector RACHBurst(BitVector(RACHBurstStart,gRACHSynchSequence),RACHBurstRest);
-
-
- signalVector *RACHSeq = modulateBurst(RACHBurst,
- *gsmPulse,
- 9,
- sps);
-
- generateRACHSequence(*gsmPulse,sps);
-
- complex a; float t;
- detectRACHBurst(*RACHSeq, 5, sps,&a,&t);
-
- //cout << *RACHSeq << endl;
- //signalVector *autocorr = correlate(RACHSeq,RACHSeq,NULL,NO_DELAY);
-
- //cout << *autocorr;
-
- //exit(1);
-
-
- /*signalVector x(6500);
- x.fill(1.0);
-
- frequencyShift(&x,&x,0.48*M_PI);
-
- signalVector *y = polyphaseResampleVector(x,96,65,NULL);
-
- cout << *y << endl;
-
- exit(1);*/
-
- //CommSig normalBurstSeg = "0000000000000000000000000000000000000000000000000000000000000";
-
- BitVector normalBurstSeg = "0000101010100111110010101010010110101110011000111001101010000";
-
- BitVector normalBurst(BitVector(normalBurstSeg,gTrainingSequence[TSC]),normalBurstSeg);
-
-
- generateMidamble(*gsmPulse,sps,TSC);
-
-
- signalVector *modBurst = modulateBurst(normalBurst,*gsmPulse,
- 0,sps);
-
-
- //delayVector(*rsVector2,6.932);
-
- complex ampl = 1;
- float TOA = 0;
-
- //modBurst = rsVector2;
- //delayVector(*modBurst,0.8);
-
- /*
- signalVector channelResponse(4);
- signalVector::iterator c=channelResponse.begin();
- *c = (complex) 9000.0; c++;
- *c = (complex) 0.4*9000.0; c++; c++;
- *c = (complex) -1.2*0;
-
- signalVector *guhBurst = convolve(modBurst,&channelResponse,NULL,NO_DELAY);
- delete modBurst; modBurst = guhBurst;
- */
-
- signalVector *chanResp;
- /*
- double noisePwr = 0.001/sqrtf(2);
- signalVector *noise = gaussianNoise(modBurst->size(),noisePwr);
- */
- float chanRespOffset;
- analyzeTrafficBurst(*modBurst,TSC,8.0,sps,&ampl,&TOA,1,true,&chanResp,&chanRespOffset);
- //addVector(*modBurst,*noise);
-
- cout << "ampl:" << ampl << endl;
- cout << "TOA: " << TOA << endl;
- //cout << "chanResp: " << *chanResp << endl;
- SoftVector *demodBurst = demodulateBurst(*modBurst,*gsmPulse,sps,(complex) ampl, TOA);
-
- cout << *demodBurst << endl;
-
- /*
- COUT("chanResp: " << *chanResp);
-
- signalVector *w,*b;
- designDFE(*chanResp,1.0/noisePwr,7,&w,&b);
- COUT("w: " << *w);
- COUT("b: " << *b);
-
-
- SoftSig *DFEBurst = equalizeBurst(*modBurst,TOA-chanRespOffset,sps,*w,*b);
- COUT("DFEBurst: " << *DFEBurst);
-
- delete gsmPulse;
- delete RACHSeq;
- delete modBurst;
- delete sendLPF;
- delete rcvLPF;
- delete rsVector;
- //delete rsVector2;
- delete autocorr;
- delete chanResp;
- delete noise;
- delete demodBurst;
- delete w;
- delete b;
- delete DFEBurst;
- */
-
- sigProcLibDestroy();
-
-}
diff --git a/config/ax_check_compile_flag.m4 b/config/ax_check_compile_flag.m4
new file mode 100644
index 0000000..c3a8d69
--- /dev/null
+++ b/config/ax_check_compile_flag.m4
@@ -0,0 +1,72 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
+#
+# DESCRIPTION
+#
+# Check whether the given FLAG works with the current language's compiler
+# or gives an error. (Warnings, however, are ignored)
+#
+# ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
+# success/failure.
+#
+# If EXTRA-FLAGS is defined, it is added to the current language's default
+# flags (e.g. CFLAGS) when the check is done. The check is thus made with
+# the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to
+# force the compiler to issue an error when a bad flag is given.
+#
+# NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
+# macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
+# Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception, the respective Autoconf Macro's copyright owner
+# gives unlimited permission to copy, distribute and modify the configure
+# scripts that are the output of Autoconf when processing the Macro. You
+# need not follow the terms of the GNU General Public License when using
+# or distributing such scripts, even though portions of the text of the
+# Macro appear in them. The GNU General Public License (GPL) does govern
+# all other use of the material that constitutes the Autoconf Macro.
+#
+# This special exception to the GPL applies to versions of the Autoconf
+# Macro released by the Autoconf Archive. When you make and distribute a
+# modified version of the Autoconf Macro, you may extend this special
+# exception to the GPL to apply to your modified version as well.
+
+#serial 2
+
+AC_DEFUN([AX_CHECK_COMPILE_FLAG],
+[AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
+AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
+AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
+ ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
+ _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+ [AS_VAR_SET(CACHEVAR,[yes])],
+ [AS_VAR_SET(CACHEVAR,[no])])
+ _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
+AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
+ [m4_default([$2], :)],
+ [m4_default([$3], :)])
+AS_VAR_POPDEF([CACHEVAR])dnl
+])dnl AX_CHECK_COMPILE_FLAGS
diff --git a/config/ax_ext.m4 b/config/ax_ext.m4
new file mode 100644
index 0000000..fbd10cc
--- /dev/null
+++ b/config/ax_ext.m4
@@ -0,0 +1,221 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_ext.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_EXT
+#
+# DESCRIPTION
+#
+# Find supported SIMD extensions by requesting cpuid. When an SIMD
+# extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
+# compiler supports it. For example, if "sse2" is available, then "-msse2"
+# is added to SIMD_FLAGS.
+#
+# This macro calls:
+#
+# AC_SUBST(SIMD_FLAGS)
+#
+# And defines:
+#
+# HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
+#
+# LICENSE
+#
+# Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
+# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice
+# and this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+#serial 12
+
+AC_DEFUN([AX_EXT],
+[
+ AC_REQUIRE([AC_CANONICAL_HOST])
+
+ case $host_cpu in
+ i[[3456]]86*|x86_64*|amd64*)
+
+ AC_REQUIRE([AX_GCC_X86_CPUID])
+ AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
+
+ AX_GCC_X86_CPUID(0x00000001)
+ ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
+ edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
+
+ AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
+ [
+ ax_cv_have_mmx_ext=no
+ if test "$((0x$edx>>23&0x01))" = 1; then
+ ax_cv_have_mmx_ext=yes
+ fi
+ ])
+
+ AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
+ [
+ ax_cv_have_sse_ext=no
+ if test "$((0x$edx>>25&0x01))" = 1; then
+ ax_cv_have_sse_ext=yes
+ fi
+ ])
+
+ AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
+ [
+ ax_cv_have_sse2_ext=no
+ if test "$((0x$edx>>26&0x01))" = 1; then
+ ax_cv_have_sse2_ext=yes
+ fi
+ ])
+
+ AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
+ [
+ ax_cv_have_sse3_ext=no
+ if test "$((0x$ecx&0x01))" = 1; then
+ ax_cv_have_sse3_ext=yes
+ fi
+ ])
+
+ AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
+ [
+ ax_cv_have_ssse3_ext=no
+ if test "$((0x$ecx>>9&0x01))" = 1; then
+ ax_cv_have_ssse3_ext=yes
+ fi
+ ])
+
+ AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
+ [
+ ax_cv_have_sse41_ext=no
+ if test "$((0x$ecx>>19&0x01))" = 1; then
+ ax_cv_have_sse41_ext=yes
+ fi
+ ])
+
+ AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
+ [
+ ax_cv_have_sse42_ext=no
+ if test "$((0x$ecx>>20&0x01))" = 1; then
+ ax_cv_have_sse42_ext=yes
+ fi
+ ])
+
+ AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
+ [
+ ax_cv_have_avx_cpu_ext=no
+ if test "$((0x$ecx>>28&0x01))" = 1; then
+ ax_cv_have_avx_cpu_ext=yes
+ fi
+ ])
+
+ if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
+ AX_GCC_X86_AVX_XGETBV(0x00000000)
+
+ xgetbv_eax="0"
+ if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
+ xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
+ fi
+
+ AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
+ [
+ ax_cv_have_avx_ext=no
+
+ if test "$((0x$ecx>>27&0x01))" = 1; then
+ if test "$((0x$xgetbv_eax&0x6))" = 6; then
+ ax_cv_have_avx_ext=yes
+ fi
+ fi
+ ])
+ if test x"$ax_cv_have_avx_ext" = x"no"; then
+ AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
+ fi
+ fi
+
+ if test "$ax_cv_have_mmx_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
+ if test x"$ax_cv_support_mmx_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -mmmx"
+ AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
+ else
+ AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ if test "$ax_cv_have_sse_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
+ if test x"$ax_cv_support_sse_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -msse"
+ AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
+ else
+ AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ if test "$ax_cv_have_sse2_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
+ if test x"$ax_cv_support_sse2_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -msse2"
+ AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
+ else
+ AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ if test "$ax_cv_have_sse3_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
+ if test x"$ax_cv_support_sse3_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -msse3"
+ AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
+ else
+ AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ if test "$ax_cv_have_ssse3_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
+ if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -mssse3"
+ AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
+ else
+ AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ if test "$ax_cv_have_sse41_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
+ if test x"$ax_cv_support_sse41_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -msse4.1"
+ AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
+ else
+ AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ if test "$ax_cv_have_sse42_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
+ if test x"$ax_cv_support_sse42_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -msse4.2"
+ AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
+ else
+ AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ if test "$ax_cv_have_avx_ext" = yes; then
+ AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
+ if test x"$ax_cv_support_avx_ext" = x"yes"; then
+ SIMD_FLAGS="$SIMD_FLAGS -mavx"
+ AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
+ else
+ AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
+ fi
+ fi
+
+ ;;
+ esac
+
+ AC_SUBST(SIMD_FLAGS)
+])
diff --git a/config/ax_gcc_x86_avx_xgetbv.m4 b/config/ax_gcc_x86_avx_xgetbv.m4
new file mode 100644
index 0000000..0624eeb
--- /dev/null
+++ b/config/ax_gcc_x86_avx_xgetbv.m4
@@ -0,0 +1,79 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_GCC_X86_AVX_XGETBV
+#
+# DESCRIPTION
+#
+# On later x86 processors with AVX SIMD support, with gcc or a compiler
+# that has a compatible syntax for inline assembly instructions, run a
+# small program that executes the xgetbv instruction with input OP. This
+# can be used to detect if the OS supports AVX instruction usage.
+#
+# On output, the values of the eax and edx registers are stored as
+# hexadecimal strings as "eax:edx" in the cache variable
+# ax_cv_gcc_x86_avx_xgetbv.
+#
+# If the xgetbv instruction fails (because you are running a
+# cross-compiler, or because you are not using gcc, or because you are on
+# a processor that doesn't have this instruction),
+# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
+#
+# This macro mainly exists to be used in AX_EXT.
+#
+# LICENSE
+#
+# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception, the respective Autoconf Macro's copyright owner
+# gives unlimited permission to copy, distribute and modify the configure
+# scripts that are the output of Autoconf when processing the Macro. You
+# need not follow the terms of the GNU General Public License when using
+# or distributing such scripts, even though portions of the text of the
+# Macro appear in them. The GNU General Public License (GPL) does govern
+# all other use of the material that constitutes the Autoconf Macro.
+#
+# This special exception to the GPL applies to versions of the Autoconf
+# Macro released by the Autoconf Archive. When you make and distribute a
+# modified version of the Autoconf Macro, you may extend this special
+# exception to the GPL to apply to your modified version as well.
+
+#serial 1
+
+AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
+[AC_REQUIRE([AC_PROG_CC])
+AC_LANG_PUSH([C])
+AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
+ [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
+ int op = $1, eax, edx;
+ FILE *f;
+ /* Opcodes for xgetbv */
+ __asm__(".byte 0x0f, 0x01, 0xd0"
+ : "=a" (eax), "=d" (edx)
+ : "c" (op));
+ f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
+ fprintf(f, "%x:%x\n", eax, edx);
+ fclose(f);
+ return 0;
+])],
+ [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
+ [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
+ [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
+AC_LANG_POP([C])
+])
diff --git a/config/ax_gcc_x86_cpuid.m4 b/config/ax_gcc_x86_cpuid.m4
new file mode 100644
index 0000000..7d46fee
--- /dev/null
+++ b/config/ax_gcc_x86_cpuid.m4
@@ -0,0 +1,79 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_GCC_X86_CPUID(OP)
+#
+# DESCRIPTION
+#
+# On Pentium and later x86 processors, with gcc or a compiler that has a
+# compatible syntax for inline assembly instructions, run a small program
+# that executes the cpuid instruction with input OP. This can be used to
+# detect the CPU type.
+#
+# On output, the values of the eax, ebx, ecx, and edx registers are stored
+# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
+# ax_cv_gcc_x86_cpuid_OP.
+#
+# If the cpuid instruction fails (because you are running a
+# cross-compiler, or because you are not using gcc, or because you are on
+# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
+# is set to the string "unknown".
+#
+# This macro mainly exists to be used in AX_GCC_ARCHFLAG.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
+# Copyright (c) 2008 Matteo Frigo
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# As a special exception, the respective Autoconf Macro's copyright owner
+# gives unlimited permission to copy, distribute and modify the configure
+# scripts that are the output of Autoconf when processing the Macro. You
+# need not follow the terms of the GNU General Public License when using
+# or distributing such scripts, even though portions of the text of the
+# Macro appear in them. The GNU General Public License (GPL) does govern
+# all other use of the material that constitutes the Autoconf Macro.
+#
+# This special exception to the GPL applies to versions of the Autoconf
+# Macro released by the Autoconf Archive. When you make and distribute a
+# modified version of the Autoconf Macro, you may extend this special
+# exception to the GPL to apply to your modified version as well.
+
+#serial 7
+
+AC_DEFUN([AX_GCC_X86_CPUID],
+[AC_REQUIRE([AC_PROG_CC])
+AC_LANG_PUSH([C])
+AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
+ [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
+ int op = $1, eax, ebx, ecx, edx;
+ FILE *f;
+ __asm__("cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a" (op));
+ f = fopen("conftest_cpuid", "w"); if (!f) return 1;
+ fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
+ fclose(f);
+ return 0;
+])],
+ [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
+ [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
+ [ax_cv_gcc_x86_cpuid_$1=unknown])])
+AC_LANG_POP([C])
+])
diff --git a/configure.ac b/configure.ac
index 3cf2783..a63b05a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -22,6 +22,7 @@ AC_INIT(openbts,P2.8TRUNK)
AC_PREREQ(2.57)
AC_CONFIG_SRCDIR([Transceiver52M/Makefile.am])
AC_CONFIG_AUX_DIR([.])
+AC_CONFIG_MACRO_DIR([config])
AM_CONFIG_HEADER(config.h)
AC_CANONICAL_BUILD
@@ -95,6 +96,8 @@ AS_IF([test "x$with_usrp1" = "xyes"], [
AS_IF([test "x$with_uhd" = "xyes"],[
PKG_CHECK_MODULES(UHD, uhd >= 003.004.000)
AC_DEFINE(USE_UHD, 1, Define to 1 if using UHD)
+ # Find and define supported SIMD extensions
+ AX_EXT
])
AS_IF([test "x$with_extref" = "xyes"], [