aboutsummaryrefslogtreecommitdiffstats
path: root/Transceiver52M
diff options
context:
space:
mode:
authorThomas Tsou <tom@tsou.cc>2013-08-20 21:24:24 -0400
committerThomas Tsou <tom@tsou.cc>2013-10-18 13:10:17 -0400
commit9471d7635abeb9b5c87227d64e452890da0e65bd (patch)
tree28c28e373f42da81dac7363f89d6d0356ed8e72a /Transceiver52M
parent03e6ecf9771ea029e69fd4cdc2f2e289e93d3978 (diff)
Transceiver52M: Add SSE floating point / integer conversion
Convertions are performed in multiples of 4 or 8. All loads are considered unaligned. Signed-off-by: Thomas Tsou <tom@tsou.cc>
Diffstat (limited to 'Transceiver52M')
-rw-r--r--Transceiver52M/Makefile.am6
-rw-r--r--Transceiver52M/convert.c199
-rw-r--r--Transceiver52M/convert.h7
-rw-r--r--Transceiver52M/radioInterface.cpp95
-rw-r--r--Transceiver52M/radioInterface.h1
-rw-r--r--Transceiver52M/radioInterfaceResamp.cpp24
6 files changed, 258 insertions, 74 deletions
diff --git a/Transceiver52M/Makefile.am b/Transceiver52M/Makefile.am
index c2f8ece..d002b04 100644
--- a/Transceiver52M/Makefile.am
+++ b/Transceiver52M/Makefile.am
@@ -53,7 +53,8 @@ COMMON_SOURCES = \
sigProcLib.cpp \
Transceiver.cpp \
DummyLoad.cpp \
- convolve.c
+ convolve.c \
+ convert.c
libtransceiver_la_SOURCES = \
$(COMMON_SOURCES) \
@@ -78,7 +79,8 @@ noinst_HEADERS = \
rcvLPF_651.h \
sendLPF_961.h \
Resampler.h \
- convolve.h
+ convolve.h \
+ convert.h
USRPping_SOURCES = USRPping.cpp
USRPping_LDADD = \
diff --git a/Transceiver52M/convert.c b/Transceiver52M/convert.c
new file mode 100644
index 0000000..dc5e748
--- /dev/null
+++ b/Transceiver52M/convert.c
@@ -0,0 +1,199 @@
+/*
+ * SSE type conversions
+ * Copyright (C) 2013 Thomas Tsou <tom@tsou.cc>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <malloc.h>
+#include <string.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SSE3
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#ifdef HAVE_SSE4_1
+#include <smmintrin.h>
+
+/* 16*N 16-bit signed integer converted to single precision floats */
+static void _sse_convert_si16_ps_16n(float *restrict out,
+ short *restrict in,
+ int len)
+{
+ __m128i m0, m1, m2, m3, m4, m5;
+ __m128 m6, m7, m8, m9;
+
+ for (int i = 0; i < len / 16; i++) {
+ /* Load (unaligned) packed floats */
+ m0 = _mm_loadu_si128((__m128i *) &in[16 * i + 0]);
+ m1 = _mm_loadu_si128((__m128i *) &in[16 * i + 8]);
+
+ /* Unpack */
+ m2 = _mm_cvtepi16_epi32(m0);
+ m4 = _mm_cvtepi16_epi32(m1);
+ m0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2));
+ m1 = _mm_shuffle_epi32(m1, _MM_SHUFFLE(1, 0, 3, 2));
+ m3 = _mm_cvtepi16_epi32(m0);
+ m5 = _mm_cvtepi16_epi32(m1);
+
+ /* Convert */
+ m6 = _mm_cvtepi32_ps(m2);
+ m7 = _mm_cvtepi32_ps(m3);
+ m8 = _mm_cvtepi32_ps(m4);
+ m9 = _mm_cvtepi32_ps(m5);
+
+ /* Store */
+ _mm_storeu_ps(&out[16 * i + 0], m6);
+ _mm_storeu_ps(&out[16 * i + 4], m7);
+ _mm_storeu_ps(&out[16 * i + 8], m8);
+ _mm_storeu_ps(&out[16 * i + 12], m9);
+ }
+}
+
+/* 16*N 16-bit signed integer conversion with remainder */
+static void _sse_convert_si16_ps(float *restrict out,
+ short *restrict in,
+ int len)
+{
+ int start = len / 16 * 16;
+
+ _sse_convert_si16_ps_16n(out, in, len);
+
+ for (int i = 0; i < len % 16; i++)
+ out[start + i] = in[start + i];
+}
+#endif /* HAVE_SSE4_1 */
+
+/* 8*N single precision floats scaled and converted to 16-bit signed integer */
+static void _sse_convert_scale_ps_si16_8n(short *restrict out,
+ float *restrict in,
+ float scale, int len)
+{
+ __m128 m0, m1, m2;
+ __m128i m4, m5;
+
+ for (int i = 0; i < len / 8; i++) {
+ /* Load (unaligned) packed floats */
+ m0 = _mm_loadu_ps(&in[8 * i + 0]);
+ m1 = _mm_loadu_ps(&in[8 * i + 4]);
+ m2 = _mm_load1_ps(&scale);
+
+ /* Scale */
+ m0 = _mm_mul_ps(m0, m2);
+ m1 = _mm_mul_ps(m1, m2);
+
+ /* Convert */
+ m4 = _mm_cvtps_epi32(m0);
+ m5 = _mm_cvtps_epi32(m1);
+
+ /* Pack and store */
+ m5 = _mm_packs_epi32(m4, m5);
+ _mm_storeu_si128((__m128i *) &out[8 * i], m5);
+ }
+}
+
+/* 8*N single precision floats scaled and converted with remainder */
+static void _sse_convert_scale_ps_si16(short *restrict out,
+ float *restrict in,
+ float scale, int len)
+{
+ int start = len / 8 * 8;
+
+ _sse_convert_scale_ps_si16_8n(out, in, scale, len);
+
+ for (int i = 0; i < len % 8; i++)
+ out[start + i] = in[start + i] * scale;
+}
+
+/* 16*N single precision floats scaled and converted to 16-bit signed integer */
+static void _sse_convert_scale_ps_si16_16n(short *restrict out,
+ float *restrict in,
+ float scale, int len)
+{
+ __m128 m0, m1, m2, m3, m4;
+ __m128i m5, m6, m7, m8;
+
+ for (int i = 0; i < len / 16; i++) {
+ /* Load (unaligned) packed floats */
+ m0 = _mm_loadu_ps(&in[16 * i + 0]);
+ m1 = _mm_loadu_ps(&in[16 * i + 4]);
+ m2 = _mm_loadu_ps(&in[16 * i + 8]);
+ m3 = _mm_loadu_ps(&in[16 * i + 12]);
+ m4 = _mm_load1_ps(&scale);
+
+ /* Scale */
+ m0 = _mm_mul_ps(m0, m4);
+ m1 = _mm_mul_ps(m1, m4);
+ m2 = _mm_mul_ps(m2, m4);
+ m3 = _mm_mul_ps(m3, m4);
+
+ /* Convert */
+ m5 = _mm_cvtps_epi32(m0);
+ m6 = _mm_cvtps_epi32(m1);
+ m7 = _mm_cvtps_epi32(m2);
+ m8 = _mm_cvtps_epi32(m3);
+
+ /* Pack and store */
+ m5 = _mm_packs_epi32(m5, m6);
+ m7 = _mm_packs_epi32(m7, m8);
+ _mm_storeu_si128((__m128i *) &out[16 * i + 0], m5);
+ _mm_storeu_si128((__m128i *) &out[16 * i + 8], m7);
+ }
+}
+#else /* HAVE_SSE3 */
+static void convert_scale_ps_si16(short *out, float *in, float scale, int len)
+{
+ for (int i = 0; i < len; i++)
+ out[i] = in[i] * scale;
+}
+#endif
+
+#ifndef HAVE_SSE_4_1
+static void convert_si16_ps(float *out, short *in, int len)
+{
+ for (int i = 0; i < len; i++)
+ out[i] = in[i];
+}
+#endif
+
+void convert_float_short(short *out, float *in, float scale, int len)
+{
+#ifdef HAVE_SSE3
+ if (!(len % 16))
+ _sse_convert_scale_ps_si16_16n(out, in, scale, len);
+ else if (!(len % 8))
+ _sse_convert_scale_ps_si16_8n(out, in, scale, len);
+ else
+ _sse_convert_scale_ps_si16(out, in, scale, len);
+#else
+ convert_scale_ps_si16(out, in, scale, len);
+#endif
+}
+
+void convert_short_float(float *out, short *in, int len)
+{
+#ifdef HAVE_SSE4_1
+ if (!(len % 16))
+ _sse_convert_si16_ps_16n(out, in, len);
+ else
+ _sse_convert_si16_ps(out, in, len);
+#else
+ convert_si16_ps(out, in, len);
+#endif
+}
diff --git a/Transceiver52M/convert.h b/Transceiver52M/convert.h
new file mode 100644
index 0000000..5b557bf
--- /dev/null
+++ b/Transceiver52M/convert.h
@@ -0,0 +1,7 @@
+#ifndef _CONVERT_H_
+#define _CONVERT_H_
+
+void convert_float_short(short *out, float *in, float scale, int len);
+void convert_short_float(float *out, short *in, int len);
+
+#endif /* _CONVERT_H_ */
diff --git a/Transceiver52M/radioInterface.cpp b/Transceiver52M/radioInterface.cpp
index 1e25b5d..f39a470 100644
--- a/Transceiver52M/radioInterface.cpp
+++ b/Transceiver52M/radioInterface.cpp
@@ -25,29 +25,11 @@
#include "radioInterface.h"
#include <Logger.h>
-bool started = false;
-
-/* Device side buffers */
-static short rx_buf[OUTCHUNK * 2 * 2];
-static short tx_buf[INCHUNK * 2 * 2];
-
-/* Complex float to short conversion */
-static void floatToShort(short *out, float *in, int num)
-{
- for (int i = 0; i < num; i++) {
- out[2 * i + 0] = (short) in[2 * i + 0];
- out[2 * i + 1] = (short) in[2 * i + 1];
- }
+extern "C" {
+#include "convert.h"
}
-/* Complex short to float conversion */
-static void shortToFloat(float *out, short *in, int num)
-{
- for (int i = 0; i < num; i++) {
- out[2 * i + 0] = (float) in[2 * i + 0];
- out[2 * i + 1] = (float) in[2 * i + 1];
- }
-}
+bool started = false;
RadioInterface::RadioInterface(RadioDevice *wRadio,
int wReceiveOffset,
@@ -96,6 +78,7 @@ void RadioInterface::close()
convertSendBuffer = NULL;
}
+
double RadioInterface::fullScaleInputValue(void) {
return mRadio->fullScaleInputValue();
}
@@ -120,22 +103,14 @@ void RadioInterface::setPowerAttenuation(double atten)
int RadioInterface::radioifyVector(signalVector &wVector,
float *retVector,
- float scale,
bool zero)
{
- int i;
- signalVector::iterator itr = wVector.begin();
-
if (zero) {
memset(retVector, 0, wVector.size() * 2 * sizeof(float));
return wVector.size();
}
- for (i = 0; i < wVector.size(); i++) {
- retVector[2 * i + 0] = itr->real() * scale;
- retVector[2 * i + 1] = itr->imag() * scale;
- itr++;
- }
+ memcpy(retVector, wVector.begin(), wVector.size() * 2 * sizeof(float));
return wVector.size();
}
@@ -143,10 +118,14 @@ int RadioInterface::radioifyVector(signalVector &wVector,
int RadioInterface::unRadioifyVector(float *floatVector,
signalVector& newVector)
{
- int i;
signalVector::iterator itr = newVector.begin();
- for (i = 0; i < newVector.size(); i++) {
+ if (newVector.size() > recvCursor) {
+ LOG(ALERT) << "Insufficient number of samples in receive buffer";
+ return -1;
+ }
+
+ for (int i = 0; i < newVector.size(); i++) {
*itr++ = Complex<float>(floatVector[2 * i + 0],
floatVector[2 * i + 1]);
}
@@ -205,8 +184,7 @@ void RadioInterface::driveTransmitRadio(signalVector &radioBurst, bool zeroBurst
return;
radioifyVector(radioBurst,
- (float *) (sendBuffer->begin() + sendCursor),
- powerScaling, zeroBurst);
+ (float *) (sendBuffer->begin() + sendCursor), zeroBurst);
sendCursor += radioBurst.size();
@@ -293,36 +271,49 @@ double RadioInterface::getRxGain()
void RadioInterface::pullBuffer()
{
bool local_underrun;
+ int num_recv;
+
+ /* Outer buffer access size is fixed */
+ num_recv = mRadio->readSamples(convertRecvBuffer,
+ OUTCHUNK,
+ &overrun,
+ readTimestamp,
+ &local_underrun);
+ if (num_recv != OUTCHUNK) {
+ LOG(ALERT) << "Receive error " << num_recv;
+ return;
+ }
- /* Read samples. Fail if we don't get what we want. */
- int num_rd = mRadio->readSamples(rx_buf, OUTCHUNK, &overrun,
- readTimestamp, &local_underrun);
-
- LOG(DEBUG) << "Rx read " << num_rd << " samples from device";
- assert(num_rd == OUTCHUNK);
+ convert_short_float((float *) (recvBuffer->begin() + recvCursor),
+ convertRecvBuffer, 2 * OUTCHUNK);
underrun |= local_underrun;
- readTimestamp += (TIMESTAMP) num_rd;
+ readTimestamp += num_recv;
- shortToFloat((float *) recvBuffer->begin() + recvCursor, rx_buf, num_rd);
- recvCursor += num_rd;
+ recvCursor += num_recv;
}
/* Send timestamped chunk to the device with arbitrary size */
void RadioInterface::pushBuffer()
{
+ int num_sent;
+
if (sendCursor < INCHUNK)
return;
- floatToShort(tx_buf, (float *) sendBuffer->begin(), sendCursor);
-
- /* Write samples. Fail if we don't get what we want. */
- int num_smpls = mRadio->writeSamples(tx_buf,
- sendCursor,
- &underrun,
- writeTimestamp);
- assert(num_smpls == sendCursor);
+ convert_float_short(convertSendBuffer,
+ (float *) sendBuffer->begin(),
+ powerScaling, 2 * sendCursor);
+
+ /* Send the all samples in the send buffer */
+ num_sent = mRadio->writeSamples(convertSendBuffer,
+ sendCursor,
+ &underrun,
+ writeTimestamp);
+ if (num_sent != sendCursor) {
+ LOG(ALERT) << "Transmit error " << num_sent;
+ }
- writeTimestamp += (TIMESTAMP) num_smpls;
+ writeTimestamp += num_sent;
sendCursor = 0;
}
diff --git a/Transceiver52M/radioInterface.h b/Transceiver52M/radioInterface.h
index 690d572..a590c32 100644
--- a/Transceiver52M/radioInterface.h
+++ b/Transceiver52M/radioInterface.h
@@ -70,7 +70,6 @@ private:
/** format samples to USRP */
int radioifyVector(signalVector &wVector,
float *floatVector,
- float scale,
bool zero);
/** format samples from USRP */
diff --git a/Transceiver52M/radioInterfaceResamp.cpp b/Transceiver52M/radioInterfaceResamp.cpp
index d3dc82e..2b59203 100644
--- a/Transceiver52M/radioInterfaceResamp.cpp
+++ b/Transceiver52M/radioInterfaceResamp.cpp
@@ -49,20 +49,6 @@ static Resampler *dnsampler = NULL;
short *convertRecvBuffer = NULL;
short *convertSendBuffer = NULL;
-/* Complex float to short conversion */
-static void floatToShort(short *out, float *in, int num)
-{
- for (int i = 0; i < num; i++)
- out[i] = (short) in[i];
-}
-
-/* Complex short to float conversion */
-static void shortToFloat(float *out, short *in, int num)
-{
- for (int i = 0; i < num; i++)
- out[i] = (float) in[i];
-}
-
RadioInterfaceResamp::RadioInterfaceResamp(RadioDevice *wRadio,
int wReceiveOffset,
int wSPS,
@@ -166,8 +152,8 @@ void RadioInterfaceResamp::pullBuffer()
return;
}
- shortToFloat((float *) outerRecvBuffer->begin(),
- convertRecvBuffer, 2 * outer_len);
+ convert_short_float((float *) outerRecvBuffer->begin(),
+ convertRecvBuffer, 2 * outer_len);
underrun |= local_underrun;
readTimestamp += (TIMESTAMP) num_recv;
@@ -206,9 +192,9 @@ void RadioInterfaceResamp::pushBuffer()
LOG(ALERT) << "Sample rate downsampling error";
}
- floatToShort(convertSendBuffer,
- (float *) outerSendBuffer->begin(),
- 2 * outer_len);
+ convert_float_short(convertSendBuffer,
+ (float *) outerSendBuffer->begin(),
+ powerScaling, 2 * outer_len);
num_sent = mRadio->writeSamples(convertSendBuffer,
outer_len,