aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormarkster <markster@f38db490-d61c-443f-a65b-d21fe96a405b>2001-04-24 02:02:21 +0000
committermarkster <markster@f38db490-d61c-443f-a65b-d21fe96a405b>2001-04-24 02:02:21 +0000
commita87669fead1172ebff3c68b4f436e3e829efdcdd (patch)
treeab0754a7aeb97b46d7fb9eabb78f87805b980b19
parent1f9a30535942c9b35212651b7346f33824eaf55c (diff)
Version 0.1.8 from FTP
git-svn-id: http://svn.digium.com/svn/asterisk/trunk@295 f38db490-d61c-443f-a65b-d21fe96a405b
-rwxr-xr-xcodecs/gsm/Makefile37
-rwxr-xr-xcodecs/gsm/inc/private.h40
-rwxr-xr-xcodecs/gsm/src/long_term.c9
-rwxr-xr-xcodecs/gsm/src/lpc.c32
-rwxr-xr-xcodecs/gsm/src/preprocess.c34
-rwxr-xr-xcodecs/gsm/src/rpe.c5
-rwxr-xr-xcodecs/gsm/src/short_term.c118
7 files changed, 206 insertions, 69 deletions
diff --git a/codecs/gsm/Makefile b/codecs/gsm/Makefile
index 23058a171..1c674f725 100755
--- a/codecs/gsm/Makefile
+++ b/codecs/gsm/Makefile
@@ -30,6 +30,17 @@ WAV49 = -DWAV49
######### define this, and read about the GSM_OPT_WAV49 option in the
######### manual page on gsm_option(3).
+K6OPT = -DK6OPT
+#K6OPT =
+######### Define to enable MMXTM optimizations for x86 architecture CPU's
+######### which support MMX instructions. This should be newer pentiums,
+######### ppro's, etc, as well as the AMD K6 and K7. The compile will
+######### probably require gcc.
+
+PG =
+#PG = -g -pg
+######### Profiling flags. If you don't know what that means, leave it blank.
+
# Choose a compiler. The code works both with ANSI and K&R-C.
# Use -DNeedFunctionPrototypes to compile with, -UNeedFunctionPrototypes to
# compile without, function prototypes in the header files.
@@ -43,7 +54,7 @@ WAV49 = -DWAV49
# CC = /usr/lang/acc
# CCFLAGS = -c -O
-CC = gcc -ansi -pedantic -O6 -mpentium -fomit-frame-pointer -fschedule-insns2
+CC = gcc -ansi -pedantic -O6 -mpentium -fschedule-insns2 -fomit-frame-pointer
CCFLAGS += -c -DNeedFunctionPrototypes=1 -finline-functions -funroll-loops
LD = $(CC)
@@ -126,20 +137,21 @@ INC = $(ROOT)/inc
# Flags
-# DEBUG = -DNDEBUG
+DEBUG = -DNDEBUG
######### Remove -DNDEBUG to enable assertions.
-CFLAGS = $(CCFLAGS) $(SASR) $(DEBUG) $(MULHACK) $(FAST) $(LTP_CUT) \
- $(WAV49) $(CCINC) -I$(INC)
+CFLAGS = $(PG) $(CCFLAGS) $(SASR) $(DEBUG) $(MULHACK) $(FAST) \
+ $(LTP_CUT) $(WAV49) $(K6OPT) $(CCINC) -I$(INC)
######### It's $(CC) $(CFLAGS)
-LFLAGS = $(LDFLAGS) $(LDINC)
+LFLAGS = $(PG) $(LDFLAGS) $(LDINC)
######### It's $(LD) $(LFLAGS)
# Targets
LIBGSM = $(LIB)/libgsm.a
+LIBGSMSO= $(LIB)/libgsm.so
TOAST = $(BIN)/toast
UNTOAST = $(BIN)/untoast
@@ -163,6 +175,7 @@ GSM_SOURCES = $(SRC)/add.c \
$(SRC)/code.c \
$(SRC)/debug.c \
$(SRC)/decode.c \
+ $(SRC)/k6opt.s \
$(SRC)/long_term.c \
$(SRC)/lpc.c \
$(SRC)/preprocess.c \
@@ -207,6 +220,7 @@ GSM_OBJECTS = $(SRC)/add.o \
$(SRC)/code.o \
$(SRC)/debug.o \
$(SRC)/decode.o \
+ $(SRC)/k6opt.o \
$(SRC)/long_term.o \
$(SRC)/lpc.o \
$(SRC)/preprocess.o \
@@ -279,7 +293,7 @@ TOAST_INSTALL_TARGETS = \
# Target rules
-all: $(LIBGSM) $(TOAST) $(TCAT) $(UNTOAST)
+all: $(LIBGSM) $(LIBGSMSO) $(TOAST) $(TCAT) $(UNTOAST)
@-echo $(ROOT): Done.
tst: $(TST)/lin2cod $(TST)/cod2lin $(TOAST) $(TST)/test-result
@@ -299,6 +313,11 @@ install: toastinstall gsminstall
# The basic API: libgsm
+$(LIBGSMSO): $(LIB) $(GSM_OBJECTS)
+ $(LD) -o $@.1.0.10 -shared -Xlinker -soname -Xlinker libgsm.so.1 $(GSM_OBJECTS) -lc
+ ln -fs libgsm.so.1.0.10 lib/libgsm.so.1
+ ln -fs libgsm.so.1.0.10 lib/libgsm.so
+
$(LIBGSM): $(LIB) $(GSM_OBJECTS)
-rm $(RMFLAGS) $(LIBGSM)
$(AR) $(ARFLAGS) $(LIBGSM) $(GSM_OBJECTS)
@@ -308,15 +327,15 @@ $(LIBGSM): $(LIB) $(GSM_OBJECTS)
# Toast, Untoast and Tcat -- the compress-like frontends to gsm.
$(TOAST): $(BIN) $(TOAST_OBJECTS) $(LIBGSM)
- $(LD) $(LFLAGS) -o $(TOAST) $(TOAST_OBJECTS) $(LIBGSM) $(LDLIB)
+ $(LD) $(LFLAGS) -o $(TOAST) $(TOAST_OBJECTS) $(LIBGSMSO) $(LDLIB)
$(UNTOAST): $(BIN) $(TOAST)
-rm $(RMFLAGS) $(UNTOAST)
- $(LN) $(TOAST) $(UNTOAST)
+ $(LN) toast $(UNTOAST)
$(TCAT): $(BIN) $(TOAST)
-rm $(RMFLAGS) $(TCAT)
- $(LN) $(TOAST) $(TCAT)
+ $(LN) toast $(TCAT)
# The local bin and lib directories
diff --git a/codecs/gsm/inc/private.h b/codecs/gsm/inc/private.h
index 6b538cc27..140aef1ae 100755
--- a/codecs/gsm/inc/private.h
+++ b/codecs/gsm/inc/private.h
@@ -98,6 +98,44 @@ extern word gsm_asr P((word a, int n));
# define GSM_L_MULT(a, b) /* word a, word b */ \
(((longword)(a) * (longword)(b)) << 1)
+#if defined(__GNUC__) && defined(__i386__)
+
+static __inline__ int GSM_L_ADD(int a, int b)
+{
+ __asm__ __volatile__(
+
+ "addl %2,%0; jno 0f; movl $0x7fffffff,%0; adcl $0,%0; 0:"
+ : "=r" (a)
+ : "0" (a), "ir" (b)
+ : "cc"
+ );
+ return(a);
+}
+
+static __inline__ short GSM_ADD(short a, short b)
+{
+ __asm__ __volatile__(
+ "addw %2,%0; jno 0f; movw $0x7fff,%0; adcw $0,%0; 0:"
+ : "=r" (a)
+ : "0" (a), "ir" (b)
+ : "cc"
+ );
+ return(a);
+}
+
+static __inline__ short GSM_SUB(short a, short b)
+{
+ __asm__ __volatile__(
+ "subw %2,%0; jno 0f; movw $0x7fff,%0; adcw $0,%0; 0:"
+ : "=r" (a)
+ : "0" (a), "ir" (b)
+ : "cc"
+ );
+ return(a);
+}
+
+#else
+
# define GSM_L_ADD(a, b) \
( (a) < 0 ? ( (b) >= 0 ? (a) + (b) \
: (utmp = (ulongword)-((a) + 1) + (ulongword)-((b) + 1)) \
@@ -121,6 +159,8 @@ extern word gsm_asr P((word a, int n));
((ltmp = (longword)(a) - (longword)(b)) >= MAX_WORD \
? MAX_WORD : ltmp <= MIN_WORD ? MIN_WORD : ltmp)
+#endif
+
# define GSM_ABS(a) ((a) < 0 ? ((a) == MIN_WORD ? MAX_WORD : -(a)) : (a))
/* Use these if necessary:
diff --git a/codecs/gsm/src/long_term.c b/codecs/gsm/src/long_term.c
index fd67bda19..ee01c146b 100755
--- a/codecs/gsm/src/long_term.c
+++ b/codecs/gsm/src/long_term.c
@@ -13,7 +13,9 @@
#include "gsm.h"
#include "proto.h"
-
+#ifdef K6OPT
+#include "k6opt.h"
+#endif
/*
* 4.2.11 .. 4.2.12 LONG TERM PREDICTOR (LTP) SECTION
*/
@@ -197,6 +199,9 @@ static void Calculation_of_the_LTP_parameters P4((d,dp,bc_out,Nc_out),
/* Search for the maximum cross-correlation and coding of the LTP lag
*/
+# ifdef K6OPT
+ L_max = k6maxcc(wt,dp,&Nc);
+# else
L_max = 0;
Nc = 40; /* index for the maximum cross-correlation */
@@ -234,7 +239,7 @@ static void Calculation_of_the_LTP_parameters P4((d,dp,bc_out,Nc_out),
L_max = L_result;
}
}
-
+# endif
*Nc_out = Nc;
L_max <<= 1;
diff --git a/codecs/gsm/src/lpc.c b/codecs/gsm/src/lpc.c
index ac2b8a9eb..4ec52ee01 100755
--- a/codecs/gsm/src/lpc.c
+++ b/codecs/gsm/src/lpc.c
@@ -14,6 +14,10 @@
#include "gsm.h"
#include "proto.h"
+#ifdef K6OPT
+#include "k6opt.h"
+#endif
+
#undef P
/*
@@ -44,12 +48,19 @@ static void Autocorrelation P2((s, L_ACF),
/* Search for the maximum.
*/
+#ifndef K6OPT
smax = 0;
for (k = 0; k <= 159; k++) {
temp = GSM_ABS( s[k] );
if (temp > smax) smax = temp;
}
-
+#else
+ {
+ longword lmax;
+ lmax = k6maxmin(s,160,NULL);
+ smax = (lmax > MAX_WORD) ? MAX_WORD : lmax;
+ }
+#endif
/* Computation of the scaling factor.
*/
if (smax == 0) scalauto = 0;
@@ -62,6 +73,7 @@ static void Autocorrelation P2((s, L_ACF),
*/
if (scalauto > 0) {
+# ifndef K6OPT
# ifdef USE_FLOAT_MUL
# define SCALE(n) \
@@ -83,6 +95,10 @@ static void Autocorrelation P2((s, L_ACF),
SCALE(4)
}
# undef SCALE
+
+# else /* K6OPT */
+ k6vsraw(s,160,scalauto);
+# endif
}
# ifdef USE_FLOAT_MUL
else for (k = 0; k <= 159; k++) float_s[k] = (float) s[k];
@@ -90,6 +106,7 @@ static void Autocorrelation P2((s, L_ACF),
/* Compute the L_ACF[..].
*/
+#ifndef K6OPT
{
# ifdef USE_FLOAT_MUL
register float * sp = float_s;
@@ -136,11 +153,24 @@ static void Autocorrelation P2((s, L_ACF),
for (k = 9; k--; L_ACF[k] <<= 1) ;
}
+
+#else
+ {
+ int k;
+ for (k=0; k<9; k++) {
+ L_ACF[k] = 2*k6iprod(s,s+k,160-k);
+ }
+ }
+#endif
/* Rescaling of the array s[0..159]
*/
if (scalauto > 0) {
assert(scalauto <= 4);
+#ifndef K6OPT
for (k = 160; k--; *s++ <<= scalauto) ;
+# else /* K6OPT */
+ k6vsllw(s,160,scalauto);
+# endif
}
}
diff --git a/codecs/gsm/src/preprocess.c b/codecs/gsm/src/preprocess.c
index 99c0709dc..83c3f6a56 100755
--- a/codecs/gsm/src/preprocess.c
+++ b/codecs/gsm/src/preprocess.c
@@ -42,11 +42,8 @@ void Gsm_Preprocess P3((S, s, so),
word mp = S->mp;
word s1;
- longword L_s2;
- longword L_temp;
- word msp, lsp;
word SO;
longword ltmp; /* for ADD */
@@ -58,7 +55,8 @@ void Gsm_Preprocess P3((S, s, so),
/* 4.2.1 Downscaling of the input signal
*/
- SO = SASR( *s, 3 ) << 2;
+ /* SO = SASR( *s, 3 ) << 2;*/
+ SO = SASR( *s, 1 ) & ~3;
s++;
assert (SO >= -0x4000); /* downscaled by */
@@ -80,21 +78,38 @@ void Gsm_Preprocess P3((S, s, so),
assert(s1 != MIN_WORD);
+ /* SJB Remark: float might be faster than the mess that follows */
+
/* Compute the recursive part
*/
- L_s2 = s1;
- L_s2 <<= 15;
/* Execution of a 31 bv 16 bits multiplication
*/
-
+ {
+ word msp, lsp;
+ longword L_s2;
+ longword L_temp;
+
+ L_s2 = s1;
+ L_s2 <<= 15;
+#ifndef __GNUC__
msp = SASR( L_z2, 15 );
- lsp = L_z2-((longword)msp<<15); /* gsm_L_sub(L_z2,(msp<<15)); */
+ lsp = L_z2 & 0x7fff; /* gsm_L_sub(L_z2,(msp<<15)); */
L_s2 += GSM_MULT_R( lsp, 32735 );
L_temp = (longword)msp * 32735; /* GSM_L_MULT(msp,32735) >> 1;*/
L_z2 = GSM_L_ADD( L_temp, L_s2 );
-
+ /* above does L_z2 = L_z2 * 0x7fd5/0x8000 + L_s2 */
+#else
+ L_z2 = ((long long)L_z2*32735 + 0x4000)>>15;
+ /* alternate (ansi) version of above line does slightly different rounding:
+ * L_temp = L_z2 >> 9;
+ * L_temp += L_temp >> 5;
+ * L_temp = (++L_temp) >> 1;
+ * L_z2 = L_z2 - L_temp;
+ */
+ L_z2 = GSM_L_ADD(L_z2,L_s2);
+#endif
/* Compute sof[k] with rounding
*/
L_temp = GSM_L_ADD( L_z2, 16384 );
@@ -105,6 +120,7 @@ void Gsm_Preprocess P3((S, s, so),
msp = GSM_MULT_R( mp, -28180 );
mp = SASR( L_temp, 15 );
*so++ = GSM_ADD( mp, msp );
+ }
}
S->z1 = z1;
diff --git a/codecs/gsm/src/rpe.c b/codecs/gsm/src/rpe.c
index 8a6b81fae..6644e3797 100755
--- a/codecs/gsm/src/rpe.c
+++ b/codecs/gsm/src/rpe.c
@@ -18,7 +18,9 @@
*/
/* 4.2.13 */
-
+#ifdef K6OPT
+#include "k6opt.h"
+#else
static void Weighting_filter P2((e, x),
register word * e, /* signal [-5..0.39.44] IN */
word * x /* signal [0..39] OUT */
@@ -110,6 +112,7 @@ static void Weighting_filter P2((e, x),
: (L_result > MAX_WORD ? MAX_WORD : L_result ));
}
}
+#endif /* K6OPT */
/* 4.2.14 */
diff --git a/codecs/gsm/src/short_term.c b/codecs/gsm/src/short_term.c
index 4f5fd7be7..c1921f551 100755
--- a/codecs/gsm/src/short_term.c
+++ b/codecs/gsm/src/short_term.c
@@ -13,7 +13,12 @@
#include "gsm.h"
#include "proto.h"
+#ifdef K6OPT
+#include "k6opt.h"
+#define Short_term_analysis_filtering Short_term_analysis_filteringx
+
+#endif
/*
* SHORT TERM ANALYSIS FILTERING SECTION
*/
@@ -180,9 +185,16 @@ static void LARp_to_rp P1((LARp),
/* 4.2.10 */
-static void Short_term_analysis_filtering P4((S,rp,k_n,s),
- struct gsm_state * S,
- register word * rp, /* [0..7] IN */
+#ifndef Short_term_analysis_filtering
+
+/* SJB Remark:
+ * I tried 2 MMX versions of this function, neither is significantly
+ * faster than the C version which follows. MMX might be useful if
+ * one were processing 2 input streams in parallel.
+ */
+static void Short_term_analysis_filtering P4((u0,rp0,k_n,s),
+ register word * u0,
+ register word * rp0, /* [0..7] IN */
register int k_n, /* k_end - k_start */
register word * s /* [0..n-1] IN/OUT */
)
@@ -194,45 +206,45 @@ static void Short_term_analysis_filtering P4((S,rp,k_n,s),
* coefficient), it is assumed that the computation begins with index
* k_start (for arrays d[..] and s[..]) and stops with index k_end
* (k_start and k_end are defined in 4.2.9.1). This procedure also
- * needs to keep the array u[0..7] in memory for each call.
+ * needs to keep the array u0[0..7] in memory for each call.
*/
{
- register word * u = S->u;
- register int i;
- register word di, zzz, ui, sav, rpi;
- register longword ltmp;
-
- for (; k_n--; s++) {
-
- di = sav = *s;
-
- for (i = 0; i < 8; i++) { /* YYY */
-
- ui = u[i];
- rpi = rp[i];
- u[i] = sav;
-
- zzz = GSM_MULT_R(rpi, di);
- sav = GSM_ADD( ui, zzz);
-
- zzz = GSM_MULT_R(rpi, ui);
- di = GSM_ADD( di, zzz );
+ register word * u_top = u0 + 8;
+ register word * s_top = s + k_n;
+
+ while (s < s_top) {
+ register word *u, *rp ;
+ register longword di, u_out;
+ di = u_out = *s;
+ for (rp=rp0, u=u0; u<u_top;) {
+ register longword ui, rpi;
+ ui = *u;
+ *u++ = u_out;
+ rpi = *rp++;
+ u_out = ui + (((rpi*di)+0x4000)>>15);
+ di = di + (((rpi*ui)+0x4000)>>15);
+ /* make the common case fastest: */
+ if ((u_out == (word)u_out) && (di == (word)di)) continue;
+ /* otherwise do slower fixup (saturation) */
+ if (u_out>MAX_WORD) u_out=MAX_WORD;
+ else if (u_out<MIN_WORD) u_out=MIN_WORD;
+ if (di>MAX_WORD) di=MAX_WORD;
+ else if (di<MIN_WORD) di=MIN_WORD;
}
-
- *s = di;
+ *s++ = di;
}
}
+#endif
#if defined(USE_FLOAT_MUL) && defined(FAST)
-static void Fast_Short_term_analysis_filtering P4((S,rp,k_n,s),
- struct gsm_state * S,
+static void Fast_Short_term_analysis_filtering P4((u,rp,k_n,s),
+ register word * u;
register word * rp, /* [0..7] IN */
register int k_n, /* k_end - k_start */
register word * s /* [0..n-1] IN/OUT */
)
{
- register word * u = S->u;
register int i;
float uf[8],
@@ -262,6 +274,15 @@ static void Fast_Short_term_analysis_filtering P4((S,rp,k_n,s),
}
#endif /* ! (defined (USE_FLOAT_MUL) && defined (FAST)) */
+/*
+ * SJB Remark: modified Short_term_synthesis_filtering() below
+ * for significant (abt 35%) speedup of decompression.
+ * (gcc-2.95, k6 cpu)
+ * Please don't change this without benchmarking decompression
+ * to see that you haven't harmed speed.
+ * This function burns most of CPU time for untoasting.
+ * Unfortunately, didn't see any good way to benefit from mmx.
+ */
static void Short_term_synthesis_filtering P5((S,rrp,k,wt,sr),
struct gsm_state * S,
register word * rrp, /* [0..7] IN */
@@ -272,32 +293,34 @@ static void Short_term_synthesis_filtering P5((S,rrp,k,wt,sr),
{
register word * v = S->v;
register int i;
- register word sri, tmp1, tmp2;
- register longword ltmp; /* for GSM_ADD & GSM_SUB */
+ register longword sri;
while (k--) {
sri = *wt++;
for (i = 8; i--;) {
+ register longword tmp1, tmp2;
/* sri = GSM_SUB( sri, gsm_mult_r( rrp[i], v[i] ) );
*/
tmp1 = rrp[i];
tmp2 = v[i];
- tmp2 = ( tmp1 == MIN_WORD && tmp2 == MIN_WORD
- ? MAX_WORD
- : 0x0FFFF & (( (longword)tmp1 * (longword)tmp2
- + 16384) >> 15)) ;
-
- sri = GSM_SUB( sri, tmp2 );
+ tmp2 = (( tmp1 * tmp2 + 16384) >> 15) ;
+ /* saturation done below */
+ sri -= tmp2;
+ if (sri != (word)sri) {
+ sri = (sri<0)? MIN_WORD:MAX_WORD;
+ }
/* v[i+1] = GSM_ADD( v[i], gsm_mult_r( rrp[i], sri ) );
*/
- tmp1 = ( tmp1 == MIN_WORD && sri == MIN_WORD
- ? MAX_WORD
- : 0x0FFFF & (( (longword)tmp1 * (longword)sri
- + 16384) >> 15)) ;
- v[i+1] = GSM_ADD( v[i], tmp1);
+ tmp1 = (( tmp1 * sri + 16384) >> 15) ;
+ /* saturation done below */
+ tmp1 += v[i];
+ if (tmp1 != (word)tmp1) {
+ tmp1 = (tmp1<0)? MIN_WORD:MAX_WORD;
+ }
+ v[i+1] = tmp1;
}
*sr++ = v[0] = sri;
}
@@ -355,7 +378,7 @@ void Gsm_Short_Term_Analysis_Filter P3((S,LARc,s),
word * LARpp_j_1 = S->LARpp[ S->j ^= 1 ];
word LARp[8];
-
+int i;
#undef FILTER
#if defined(FAST) && defined(USE_FLOAT_MUL)
# define FILTER (* (S->fast \
@@ -370,19 +393,20 @@ void Gsm_Short_Term_Analysis_Filter P3((S,LARc,s),
Coefficients_0_12( LARpp_j_1, LARpp_j, LARp );
LARp_to_rp( LARp );
- FILTER( S, LARp, 13, s);
+ FILTER( S->u, LARp, 13, s);
Coefficients_13_26( LARpp_j_1, LARpp_j, LARp);
LARp_to_rp( LARp );
- FILTER( S, LARp, 14, s + 13);
+ FILTER( S->u, LARp, 14, s + 13);
Coefficients_27_39( LARpp_j_1, LARpp_j, LARp);
LARp_to_rp( LARp );
- FILTER( S, LARp, 13, s + 27);
+ FILTER( S->u, LARp, 13, s + 27);
Coefficients_40_159( LARpp_j, LARp);
LARp_to_rp( LARp );
- FILTER( S, LARp, 120, s + 40);
+ FILTER( S->u, LARp, 120, s + 40);
+
}
void Gsm_Short_Term_Synthesis_Filter P4((S, LARcr, wt, s),