aboutsummaryrefslogtreecommitdiffstats
path: root/src/conv_acc.c
diff options
context:
space:
mode:
authorEric <ewild@sysmocom.de>2020-07-23 02:16:46 +0200
committerlaforge <laforge@osmocom.org>2020-08-06 16:47:40 +0000
commit3afc1d1777e1c0dc0fe832db2c9746fb8a767fe3 (patch)
tree96f104934eb857ba1d66516f4df7e8e6865c9790 /src/conv_acc.c
parent2c962f5de1eeea119cfac7d9d92db31c570353b9 (diff)
libomsocoding: NEON viterbi acceleration
configure flag required to enable this: --enable-neon Although autodetection according to __ARM_NEON would work because this is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4 neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown performance impact, so it needs to be enabled manually. Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This requires handling a special case for RACH with len 14 which is far too short for neon and would actually incur a performance penalty of 25%. Related: OS#4585 Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Diffstat (limited to 'src/conv_acc.c')
-rw-r--r--src/conv_acc.c28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/conv_acc.c b/src/conv_acc.c
index c16e4364..0f6f7ca2 100644
--- a/src/conv_acc.c
+++ b/src/conv_acc.c
@@ -85,6 +85,11 @@ int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n);
void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
#endif
+#ifdef HAVE_NEON
+int16_t *osmo_conv_neon_vdec_malloc(size_t n);
+void osmo_conv_neon_vdec_free(int16_t *ptr);
+#endif
+
/* Forward Metric Units */
void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
@@ -129,6 +134,21 @@ void osmo_conv_sse_avx_metrics_k7_n4(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
#endif
+#if defined(HAVE_NEON)
+void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out,
+ int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out,
+ int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out,
+ int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out,
+ int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out,
+ int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out,
+ int16_t *sums, int16_t *paths, int norm);
+#endif
+
/* Trellis State
* state - Internal lshift register value
* prev - Register values of previous 0 and 1 states
@@ -528,6 +548,12 @@ static int vdec_init(struct vdecoder *dec, const struct osmo_conv_code *code)
if (dec->k == 5) {
switch (dec->n) {
case 2:
+/* rach len 14 is too short for neon */
+#ifdef HAVE_NEON
+ if (code->len < 100)
+ dec->metric_func = osmo_conv_gen_metrics_k5_n2;
+ else
+#endif
dec->metric_func = osmo_conv_metrics_k5_n2;
break;
case 3:
@@ -681,6 +707,8 @@ static void osmo_conv_init(void)
} else {
INIT_POINTERS(gen);
}
+#elif defined(HAVE_NEON)
+ INIT_POINTERS(neon);
#else
INIT_POINTERS(gen);
#endif