diff options
author | Eric <ewild@sysmocom.de> | 2020-07-23 02:16:46 +0200 |
---|---|---|
committer | laforge <laforge@osmocom.org> | 2020-08-06 16:47:40 +0000 |
commit | 3afc1d1777e1c0dc0fe832db2c9746fb8a767fe3 (patch) | |
tree | 96f104934eb857ba1d66516f4df7e8e6865c9790 /src/conv_acc.c | |
parent | 2c962f5de1eeea119cfac7d9d92db31c570353b9 (diff) |
libomsocoding: NEON viterbi acceleration
configure flag required to enable this: --enable-neon
Although autodetection according to __ARM_NEON would work because this
is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4
neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown
performance impact, so it needs to be enabled manually.
Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This
requires handling a special case for RACH with len 14 which is far too
short for neon and would actually incur a performance penalty of 25%.
Related: OS#4585
Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Diffstat (limited to 'src/conv_acc.c')
-rw-r--r-- | src/conv_acc.c | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/src/conv_acc.c b/src/conv_acc.c index c16e4364..0f6f7ca2 100644 --- a/src/conv_acc.c +++ b/src/conv_acc.c @@ -85,6 +85,11 @@ int16_t *osmo_conv_sse_avx_vdec_malloc(size_t n); void osmo_conv_sse_avx_vdec_free(int16_t *ptr); #endif +#ifdef HAVE_NEON +int16_t *osmo_conv_neon_vdec_malloc(size_t n); +void osmo_conv_neon_vdec_free(int16_t *ptr); +#endif + /* Forward Metric Units */ void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); @@ -129,6 +134,21 @@ void osmo_conv_sse_avx_metrics_k7_n4(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); #endif +#if defined(HAVE_NEON) +void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +#endif + /* Trellis State * state - Internal lshift register value * prev - Register values of previous 0 and 1 states @@ -528,6 +548,12 @@ static int vdec_init(struct vdecoder *dec, const struct osmo_conv_code *code) if (dec->k == 5) { switch (dec->n) { case 2: +/* rach len 14 is too short for neon */ +#ifdef HAVE_NEON + if (code->len < 100) + dec->metric_func = osmo_conv_gen_metrics_k5_n2; + else +#endif dec->metric_func = osmo_conv_metrics_k5_n2; break; case 3: @@ -681,6 +707,8 @@ static void osmo_conv_init(void) } else { INIT_POINTERS(gen); } +#elif defined(HAVE_NEON) + INIT_POINTERS(neon); #else INIT_POINTERS(gen); #endif |