aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Tsou <tom@tsou.cc>2013-09-02 14:26:55 +0800
committerThomas Tsou <tom@tsou.cc>2013-09-05 06:07:50 -0400
commit23ee9002b4cafa22b3a905d1b9dda7eafd8dc90d (patch)
treec705a775c37bd76b60e955f8c8b213e4ea66e87c
parent98e58b911108ad5dd5b15afbee5ccf971745754f (diff)
Transceiver52M: Add SSE floating point / integer conversion
Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc>
-rw-r--r--Transceiver52M/Makefile.am6
-rw-r--r--Transceiver52M/convert.c171
-rw-r--r--Transceiver52M/convert.h7
-rw-r--r--Transceiver52M/radioInterface.cpp26
4 files changed, 188 insertions, 22 deletions
diff --git a/Transceiver52M/Makefile.am b/Transceiver52M/Makefile.am
index da22c21..a7da44e 100644
--- a/Transceiver52M/Makefile.am
+++ b/Transceiver52M/Makefile.am
@@ -54,7 +54,8 @@ COMMON_SOURCES = \
DriveLoop.cpp \
Transceiver.cpp \
DummyLoad.cpp \
- convolve.c
+ convolve.c \
+ convert.c
libtransceiver_la_SOURCES = \
$(COMMON_SOURCES)
@@ -74,7 +75,8 @@ noinst_HEADERS = \
DummyLoad.h \
rcvLPF_651.h \
sendLPF_961.h \
- convolve.h
+ convolve.h \
+ convert.h
transceiver_SOURCES = multiTRX.cpp
transceiver_LDADD = \
diff --git a/Transceiver52M/convert.c b/Transceiver52M/convert.c
new file mode 100644
index 0000000..d9b6e74
--- /dev/null
+++ b/Transceiver52M/convert.c
@@ -0,0 +1,171 @@
+/*
+ * SSE type conversions
+ * Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <malloc.h>
+#include <string.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SSE3
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#ifdef HAVE_SSE4_1
+#include <smmintrin.h>
+
+/* 16*N 16-bit signed integer converted to single precision floats */
+static void _sse_convert_si16_ps_16n(float *restrict out,
+ short *restrict in,
+ int len)
+{
+ __m128i m0, m1, m2, m3, m4, m5;
+ __m128 m6, m7, m8, m9;
+
+ for (int i = 0; i < len / 16; i++) {
+ /* Load (unaligned) packed floats */
+ m0 = _mm_load_si128((__m128i *) &in[16 * i + 0]);
+ m1 = _mm_load_si128((__m128i *) &in[16 * i + 8]);
+
+ /* Unpack */
+ m2 = _mm_cvtepi16_epi32(m0);
+ m4 = _mm_cvtepi16_epi32(m1);
+ m0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2));
+ m1 = _mm_shuffle_epi32(m1, _MM_SHUFFLE(1, 0, 3, 2));
+ m3 = _mm_cvtepi16_epi32(m0);
+ m5 = _mm_cvtepi16_epi32(m1);
+
+ /* Convert */
+ m6 = _mm_cvtepi32_ps(m2);
+ m7 = _mm_cvtepi32_ps(m3);
+ m8 = _mm_cvtepi32_ps(m4);
+ m9 = _mm_cvtepi32_ps(m5);
+
+ /* Store */
+ _mm_store_ps(&out[16 * i + 0], m6);
+ _mm_store_ps(&out[16 * i + 4], m7);
+ _mm_store_ps(&out[16 * i + 8], m8);
+ _mm_store_ps(&out[16 * i + 12], m9);
+ }
+}
+#endif /* HAVE_SSE4_1 */
+
+/* 8*N single precision floats scaled and converted to 16-bit signed integer */
+static void _sse_convert_scale_ps_si16_8n(short *restrict out,
+ float *restrict in,
+ float scale, int len)
+{
+ __m128 m0, m1, m2;
+ __m128i m4, m5;
+
+ for (int i = 0; i < len / 8; i++) {
+ /* Load (unaligned) packed floats */
+ m0 = _mm_loadu_ps(&in[8 * i + 0]);
+ m1 = _mm_loadu_ps(&in[8 * i + 4]);
+ m2 = _mm_load1_ps(&scale);
+
+ /* Scale */
+ m0 = _mm_mul_ps(m0, m2);
+ m1 = _mm_mul_ps(m1, m2);
+
+ /* Convert */
+ m4 = _mm_cvtps_epi32(m0);
+ m5 = _mm_cvtps_epi32(m1);
+
+ /* Pack and store */
+ m5 = _mm_packs_epi32(m4, m5);
+ _mm_store_si128((__m128i *) &out[8 * i], m5);
+ }
+}
+
+/* 16*N single precision floats scaled and converted to 16-bit signed integer */
+static void _sse_convert_scale_ps_si16_16n(short *restrict out,
+ float *restrict in,
+ float scale, int len)
+{
+ __m128 m0, m1, m2, m3, m4;
+ __m128i m5, m6, m7, m8;
+
+ for (int i = 0; i < len / 16; i++) {
+ /* Load (unaligned) packed floats */
+ m0 = _mm_loadu_ps(&in[16 * i + 0]);
+ m1 = _mm_loadu_ps(&in[16 * i + 4]);
+ m2 = _mm_loadu_ps(&in[16 * i + 8]);
+ m3 = _mm_loadu_ps(&in[16 * i + 12]);
+ m4 = _mm_load1_ps(&scale);
+
+ /* Scale */
+ m0 = _mm_mul_ps(m0, m4);
+ m1 = _mm_mul_ps(m1, m4);
+ m2 = _mm_mul_ps(m2, m4);
+ m3 = _mm_mul_ps(m3, m4);
+
+ /* Convert */
+ m5 = _mm_cvtps_epi32(m0);
+ m6 = _mm_cvtps_epi32(m1);
+ m7 = _mm_cvtps_epi32(m2);
+ m8 = _mm_cvtps_epi32(m3);
+
+ /* Pack and store */
+ m5 = _mm_packs_epi32(m5, m6);
+ m7 = _mm_packs_epi32(m7, m8);
+ _mm_store_si128((__m128i *) &out[16 * i + 0], m5);
+ _mm_store_si128((__m128i *) &out[16 * i + 8], m7);
+ }
+}
+#endif /* HAVE_SSE3 */
+
+static void convert_si16_ps(float *out, short *in, int len)
+{
+ for (int i = 0; i < len; i++)
+ out[i] = in[i];
+}
+
+static void convert_scale_ps_si16(short *out, float *in, float scale, int len)
+{
+ for (int i = 0; i < len; i++)
+ out[i] = in[i] * scale;
+}
+
+void convert_float_short(short *out, float *in, float scale, int len)
+{
+#ifdef HAVE_SSE3
+ if (!(len % 16))
+ _sse_convert_scale_ps_si16_16n(out, in, scale, len);
+ else if (!(len % 8))
+ _sse_convert_scale_ps_si16_8n(out, in, scale, len);
+ else
+ convert_scale_ps_si16(out, in, scale, len);
+#else
+ convert_scale_ps_si16(out, in, scale, len);
+#endif
+}
+
+void convert_short_float(float *out, short *in, int len)
+{
+#ifdef HAVE_SSE4_1
+ if (!(len % 16))
+ _sse_convert_si16_ps_16n(out, in, len);
+ else
+ convert_si16_ps(out, in, len);
+#else
+ convert_si16_ps(out, in, len);
+#endif
+}
diff --git a/Transceiver52M/convert.h b/Transceiver52M/convert.h
new file mode 100644
index 0000000..5b557bf
--- /dev/null
+++ b/Transceiver52M/convert.h
@@ -0,0 +1,7 @@
+#ifndef _CONVERT_H_
+#define _CONVERT_H_
+
+void convert_float_short(short *out, float *in, float scale, int len);
+void convert_short_float(float *out, short *in, int len);
+
+#endif /* _CONVERT_H_ */
diff --git a/Transceiver52M/radioInterface.cpp b/Transceiver52M/radioInterface.cpp
index 15a6b28..45d2471 100644
--- a/Transceiver52M/radioInterface.cpp
+++ b/Transceiver52M/radioInterface.cpp
@@ -25,30 +25,16 @@
#include "radioInterface.h"
#include <Logger.h>
+extern "C" {
+#include "convert.h"
+}
+
bool started = false;
/* Device side buffers */
static short *rx_buf[CHAN_MAX];
static short *tx_buf[CHAN_MAX];
-/* Complex float to short conversion */
-static void floatToShort(short *out, float *in, int num)
-{
- for (int i = 0; i < num; i++) {
- out[2 * i + 0] = (short) in[2 * i + 0];
- out[2 * i + 1] = (short) in[2 * i + 1];
- }
-}
-
-/* Complex short to float conversion */
-static void shortToFloat(float *out, short *in, int num)
-{
- for (int i = 0; i < num; i++) {
- out[2 * i + 0] = (float) in[2 * i + 0];
- out[2 * i + 1] = (float) in[2 * i + 1];
- }
-}
-
RadioInterface::RadioInterface(RadioDevice *wRadio,
int wChanM,
int wSPS,
@@ -332,7 +318,7 @@ void RadioInterface::pullBuffer()
readTimestamp += (TIMESTAMP) num_rd;
for (int i = 0; i < mChanM; i++)
- shortToFloat(rcvBuffer[i] + 2 * rcvCursor, rx_buf[i], num_rd);
+ convert_short_float(rcvBuffer[i] + 2 * rcvCursor, rx_buf[i], num_rd * 2);
rcvCursor += num_rd;
}
@@ -344,7 +330,7 @@ void RadioInterface::pushBuffer()
return;
for (int i = 0; i < mChanM; i++)
- floatToShort(tx_buf[i], sendBuffer[i], sendCursor);
+ convert_float_short(tx_buf[i], sendBuffer[i], 1.0, sendCursor * 2);
/* Write samples. Fail if we don't get what we want. */
int num_smpls = mRadio->writeSamples(tx_buf, mChanM, sendCursor,