From a87669fead1172ebff3c68b4f436e3e829efdcdd Mon Sep 17 00:00:00 2001 From: markster Date: Tue, 24 Apr 2001 02:02:21 +0000 Subject: Version 0.1.8 from FTP git-svn-id: http://svn.digium.com/svn/asterisk/trunk@295 f38db490-d61c-443f-a65b-d21fe96a405b --- codecs/gsm/src/long_term.c | 9 +++- codecs/gsm/src/lpc.c | 32 +++++++++++- codecs/gsm/src/preprocess.c | 34 +++++++++---- codecs/gsm/src/rpe.c | 5 +- codecs/gsm/src/short_term.c | 118 ++++++++++++++++++++++++++------------------ 5 files changed, 138 insertions(+), 60 deletions(-) (limited to 'codecs/gsm/src') diff --git a/codecs/gsm/src/long_term.c b/codecs/gsm/src/long_term.c index fd67bda19..ee01c146b 100755 --- a/codecs/gsm/src/long_term.c +++ b/codecs/gsm/src/long_term.c @@ -13,7 +13,9 @@ #include "gsm.h" #include "proto.h" - +#ifdef K6OPT +#include "k6opt.h" +#endif /* * 4.2.11 .. 4.2.12 LONG TERM PREDICTOR (LTP) SECTION */ @@ -197,6 +199,9 @@ static void Calculation_of_the_LTP_parameters P4((d,dp,bc_out,Nc_out), /* Search for the maximum cross-correlation and coding of the LTP lag */ +# ifdef K6OPT + L_max = k6maxcc(wt,dp,&Nc); +# else L_max = 0; Nc = 40; /* index for the maximum cross-correlation */ @@ -234,7 +239,7 @@ static void Calculation_of_the_LTP_parameters P4((d,dp,bc_out,Nc_out), L_max = L_result; } } - +# endif *Nc_out = Nc; L_max <<= 1; diff --git a/codecs/gsm/src/lpc.c b/codecs/gsm/src/lpc.c index ac2b8a9eb..4ec52ee01 100755 --- a/codecs/gsm/src/lpc.c +++ b/codecs/gsm/src/lpc.c @@ -14,6 +14,10 @@ #include "gsm.h" #include "proto.h" +#ifdef K6OPT +#include "k6opt.h" +#endif + #undef P /* @@ -44,12 +48,19 @@ static void Autocorrelation P2((s, L_ACF), /* Search for the maximum. */ +#ifndef K6OPT smax = 0; for (k = 0; k <= 159; k++) { temp = GSM_ABS( s[k] ); if (temp > smax) smax = temp; } - +#else + { + longword lmax; + lmax = k6maxmin(s,160,NULL); + smax = (lmax > MAX_WORD) ? MAX_WORD : lmax; + } +#endif /* Computation of the scaling factor. */ if (smax == 0) scalauto = 0; @@ -62,6 +73,7 @@ static void Autocorrelation P2((s, L_ACF), */ if (scalauto > 0) { +# ifndef K6OPT # ifdef USE_FLOAT_MUL # define SCALE(n) \ @@ -83,6 +95,10 @@ static void Autocorrelation P2((s, L_ACF), SCALE(4) } # undef SCALE + +# else /* K6OPT */ + k6vsraw(s,160,scalauto); +# endif } # ifdef USE_FLOAT_MUL else for (k = 0; k <= 159; k++) float_s[k] = (float) s[k]; @@ -90,6 +106,7 @@ static void Autocorrelation P2((s, L_ACF), /* Compute the L_ACF[..]. */ +#ifndef K6OPT { # ifdef USE_FLOAT_MUL register float * sp = float_s; @@ -136,11 +153,24 @@ static void Autocorrelation P2((s, L_ACF), for (k = 9; k--; L_ACF[k] <<= 1) ; } + +#else + { + int k; + for (k=0; k<9; k++) { + L_ACF[k] = 2*k6iprod(s,s+k,160-k); + } + } +#endif /* Rescaling of the array s[0..159] */ if (scalauto > 0) { assert(scalauto <= 4); +#ifndef K6OPT for (k = 160; k--; *s++ <<= scalauto) ; +# else /* K6OPT */ + k6vsllw(s,160,scalauto); +# endif } } diff --git a/codecs/gsm/src/preprocess.c b/codecs/gsm/src/preprocess.c index 99c0709dc..83c3f6a56 100755 --- a/codecs/gsm/src/preprocess.c +++ b/codecs/gsm/src/preprocess.c @@ -42,11 +42,8 @@ void Gsm_Preprocess P3((S, s, so), word mp = S->mp; word s1; - longword L_s2; - longword L_temp; - word msp, lsp; word SO; longword ltmp; /* for ADD */ @@ -58,7 +55,8 @@ void Gsm_Preprocess P3((S, s, so), /* 4.2.1 Downscaling of the input signal */ - SO = SASR( *s, 3 ) << 2; + /* SO = SASR( *s, 3 ) << 2;*/ + SO = SASR( *s, 1 ) & ~3; s++; assert (SO >= -0x4000); /* downscaled by */ @@ -80,21 +78,38 @@ void Gsm_Preprocess P3((S, s, so), assert(s1 != MIN_WORD); + /* SJB Remark: float might be faster than the mess that follows */ + /* Compute the recursive part */ - L_s2 = s1; - L_s2 <<= 15; /* Execution of a 31 bv 16 bits multiplication */ - + { + word msp, lsp; + longword L_s2; + longword L_temp; + + L_s2 = s1; + L_s2 <<= 15; +#ifndef __GNUC__ msp = SASR( L_z2, 15 ); - lsp = L_z2-((longword)msp<<15); /* gsm_L_sub(L_z2,(msp<<15)); */ + lsp = L_z2 & 0x7fff; /* gsm_L_sub(L_z2,(msp<<15)); */ L_s2 += GSM_MULT_R( lsp, 32735 ); L_temp = (longword)msp * 32735; /* GSM_L_MULT(msp,32735) >> 1;*/ L_z2 = GSM_L_ADD( L_temp, L_s2 ); - + /* above does L_z2 = L_z2 * 0x7fd5/0x8000 + L_s2 */ +#else + L_z2 = ((long long)L_z2*32735 + 0x4000)>>15; + /* alternate (ansi) version of above line does slightly different rounding: + * L_temp = L_z2 >> 9; + * L_temp += L_temp >> 5; + * L_temp = (++L_temp) >> 1; + * L_z2 = L_z2 - L_temp; + */ + L_z2 = GSM_L_ADD(L_z2,L_s2); +#endif /* Compute sof[k] with rounding */ L_temp = GSM_L_ADD( L_z2, 16384 ); @@ -105,6 +120,7 @@ void Gsm_Preprocess P3((S, s, so), msp = GSM_MULT_R( mp, -28180 ); mp = SASR( L_temp, 15 ); *so++ = GSM_ADD( mp, msp ); + } } S->z1 = z1; diff --git a/codecs/gsm/src/rpe.c b/codecs/gsm/src/rpe.c index 8a6b81fae..6644e3797 100755 --- a/codecs/gsm/src/rpe.c +++ b/codecs/gsm/src/rpe.c @@ -18,7 +18,9 @@ */ /* 4.2.13 */ - +#ifdef K6OPT +#include "k6opt.h" +#else static void Weighting_filter P2((e, x), register word * e, /* signal [-5..0.39.44] IN */ word * x /* signal [0..39] OUT */ @@ -110,6 +112,7 @@ static void Weighting_filter P2((e, x), : (L_result > MAX_WORD ? MAX_WORD : L_result )); } } +#endif /* K6OPT */ /* 4.2.14 */ diff --git a/codecs/gsm/src/short_term.c b/codecs/gsm/src/short_term.c index 4f5fd7be7..c1921f551 100755 --- a/codecs/gsm/src/short_term.c +++ b/codecs/gsm/src/short_term.c @@ -13,7 +13,12 @@ #include "gsm.h" #include "proto.h" +#ifdef K6OPT +#include "k6opt.h" +#define Short_term_analysis_filtering Short_term_analysis_filteringx + +#endif /* * SHORT TERM ANALYSIS FILTERING SECTION */ @@ -180,9 +185,16 @@ static void LARp_to_rp P1((LARp), /* 4.2.10 */ -static void Short_term_analysis_filtering P4((S,rp,k_n,s), - struct gsm_state * S, - register word * rp, /* [0..7] IN */ +#ifndef Short_term_analysis_filtering + +/* SJB Remark: + * I tried 2 MMX versions of this function, neither is significantly + * faster than the C version which follows. MMX might be useful if + * one were processing 2 input streams in parallel. + */ +static void Short_term_analysis_filtering P4((u0,rp0,k_n,s), + register word * u0, + register word * rp0, /* [0..7] IN */ register int k_n, /* k_end - k_start */ register word * s /* [0..n-1] IN/OUT */ ) @@ -194,45 +206,45 @@ static void Short_term_analysis_filtering P4((S,rp,k_n,s), * coefficient), it is assumed that the computation begins with index * k_start (for arrays d[..] and s[..]) and stops with index k_end * (k_start and k_end are defined in 4.2.9.1). This procedure also - * needs to keep the array u[0..7] in memory for each call. + * needs to keep the array u0[0..7] in memory for each call. */ { - register word * u = S->u; - register int i; - register word di, zzz, ui, sav, rpi; - register longword ltmp; - - for (; k_n--; s++) { - - di = sav = *s; - - for (i = 0; i < 8; i++) { /* YYY */ - - ui = u[i]; - rpi = rp[i]; - u[i] = sav; - - zzz = GSM_MULT_R(rpi, di); - sav = GSM_ADD( ui, zzz); - - zzz = GSM_MULT_R(rpi, ui); - di = GSM_ADD( di, zzz ); + register word * u_top = u0 + 8; + register word * s_top = s + k_n; + + while (s < s_top) { + register word *u, *rp ; + register longword di, u_out; + di = u_out = *s; + for (rp=rp0, u=u0; u>15); + di = di + (((rpi*ui)+0x4000)>>15); + /* make the common case fastest: */ + if ((u_out == (word)u_out) && (di == (word)di)) continue; + /* otherwise do slower fixup (saturation) */ + if (u_out>MAX_WORD) u_out=MAX_WORD; + else if (u_outMAX_WORD) di=MAX_WORD; + else if (diu; register int i; float uf[8], @@ -262,6 +274,15 @@ static void Fast_Short_term_analysis_filtering P4((S,rp,k_n,s), } #endif /* ! (defined (USE_FLOAT_MUL) && defined (FAST)) */ +/* + * SJB Remark: modified Short_term_synthesis_filtering() below + * for significant (abt 35%) speedup of decompression. + * (gcc-2.95, k6 cpu) + * Please don't change this without benchmarking decompression + * to see that you haven't harmed speed. + * This function burns most of CPU time for untoasting. + * Unfortunately, didn't see any good way to benefit from mmx. + */ static void Short_term_synthesis_filtering P5((S,rrp,k,wt,sr), struct gsm_state * S, register word * rrp, /* [0..7] IN */ @@ -272,32 +293,34 @@ static void Short_term_synthesis_filtering P5((S,rrp,k,wt,sr), { register word * v = S->v; register int i; - register word sri, tmp1, tmp2; - register longword ltmp; /* for GSM_ADD & GSM_SUB */ + register longword sri; while (k--) { sri = *wt++; for (i = 8; i--;) { + register longword tmp1, tmp2; /* sri = GSM_SUB( sri, gsm_mult_r( rrp[i], v[i] ) ); */ tmp1 = rrp[i]; tmp2 = v[i]; - tmp2 = ( tmp1 == MIN_WORD && tmp2 == MIN_WORD - ? MAX_WORD - : 0x0FFFF & (( (longword)tmp1 * (longword)tmp2 - + 16384) >> 15)) ; - - sri = GSM_SUB( sri, tmp2 ); + tmp2 = (( tmp1 * tmp2 + 16384) >> 15) ; + /* saturation done below */ + sri -= tmp2; + if (sri != (word)sri) { + sri = (sri<0)? MIN_WORD:MAX_WORD; + } /* v[i+1] = GSM_ADD( v[i], gsm_mult_r( rrp[i], sri ) ); */ - tmp1 = ( tmp1 == MIN_WORD && sri == MIN_WORD - ? MAX_WORD - : 0x0FFFF & (( (longword)tmp1 * (longword)sri - + 16384) >> 15)) ; - v[i+1] = GSM_ADD( v[i], tmp1); + tmp1 = (( tmp1 * sri + 16384) >> 15) ; + /* saturation done below */ + tmp1 += v[i]; + if (tmp1 != (word)tmp1) { + tmp1 = (tmp1<0)? MIN_WORD:MAX_WORD; + } + v[i+1] = tmp1; } *sr++ = v[0] = sri; } @@ -355,7 +378,7 @@ void Gsm_Short_Term_Analysis_Filter P3((S,LARc,s), word * LARpp_j_1 = S->LARpp[ S->j ^= 1 ]; word LARp[8]; - +int i; #undef FILTER #if defined(FAST) && defined(USE_FLOAT_MUL) # define FILTER (* (S->fast \ @@ -370,19 +393,20 @@ void Gsm_Short_Term_Analysis_Filter P3((S,LARc,s), Coefficients_0_12( LARpp_j_1, LARpp_j, LARp ); LARp_to_rp( LARp ); - FILTER( S, LARp, 13, s); + FILTER( S->u, LARp, 13, s); Coefficients_13_26( LARpp_j_1, LARpp_j, LARp); LARp_to_rp( LARp ); - FILTER( S, LARp, 14, s + 13); + FILTER( S->u, LARp, 14, s + 13); Coefficients_27_39( LARpp_j_1, LARpp_j, LARp); LARp_to_rp( LARp ); - FILTER( S, LARp, 13, s + 27); + FILTER( S->u, LARp, 13, s + 27); Coefficients_40_159( LARpp_j, LARp); LARp_to_rp( LARp ); - FILTER( S, LARp, 120, s + 40); + FILTER( S->u, LARp, 120, s + 40); + } void Gsm_Short_Term_Synthesis_Filter P4((S, LARcr, wt, s), -- cgit v1.2.3