From 4d445915983a15a807fde29c70758407ba70977a Mon Sep 17 00:00:00 2001 From: markster Date: Tue, 24 Apr 2001 23:01:24 +0000 Subject: Version 0.1.8 from FTP git-svn-id: http://svn.digium.com/svn/asterisk/trunk@302 f38db490-d61c-443f-a65b-d21fe96a405b --- codecs/gsm/src/k6opt.h | 84 ++++++ codecs/gsm/src/k6opt.s | 755 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 839 insertions(+) create mode 100755 codecs/gsm/src/k6opt.h create mode 100755 codecs/gsm/src/k6opt.s (limited to 'codecs/gsm') diff --git a/codecs/gsm/src/k6opt.h b/codecs/gsm/src/k6opt.h new file mode 100755 index 000000000..16ea2ac8d --- /dev/null +++ b/codecs/gsm/src/k6opt.h @@ -0,0 +1,84 @@ +/* k6opt.h vector functions optimized for MMX extensions to x86 + * + * Copyright (C) 1999 by Stanley J. Brooks + * + * Any use of this software is permitted provided that this notice is not + * removed and that neither the authors nor the Technische Universitaet Berlin + * are deemed to have made any representations as to the suitability of this + * software for any purpose nor are held responsible for any defects of + * this software. THERE IS ABSOLUTELY NO WARRANTY FOR THIS SOFTWARE; + * not even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. + * + * Chicago, 03.12.1999 + * Stanley J. Brooks + */ + +extern void Weighting_filter P2((e, x), + const word * e, /* signal [-5..0.39.44] IN */ + word * x /* signal [0..39] OUT */ +) +; + +extern longword k6maxcc P3((wt,dp,Nc_out), + const word *wt, + const word *dp, + word * Nc_out /* OUT */ +) +; +/* + * k6maxmin(p,n,out[]) + * input p[n] is array of shorts (require n>0) + * returns (long) maximum absolute value.. + * if out!=NULL, also returns out[0] the maximum and out[1] the minimum + */ +extern longword k6maxmin P3((p,n,out), + const word *p, + int n, + word *out /* out[0] is max, out[1] is min */ +) +; + +extern longword k6iprod P3((p,q,n), + const word *p, + const word *q, + int n +) +; + +/* + * k6vsraw(p,n,bits) + * input p[n] is array of shorts (require n>0) + * shift/round each to the right by bits>=0 bits. + */ +extern void k6vsraw P3((p,n,bits), + const word *p, + int n, + int bits +) +; + +/* + * k6vsllw(p,n,bits) + * input p[n] is array of shorts (require n>0) + * shift each to the left by bits>=0 bits. + */ +extern void k6vsllw P3((p,n,bits), + const word *p, + int n, + int bits +) +; + +#if 1 /* there isn't any significant speed gain from mmx here: */ +extern void Short_term_analysis_filteringx P4((u0,rp0,k_n,s), + register word * u0, + register word * rp0, /* [0..7] IN */ + register int k_n, /* k_end - k_start */ + register word * s /* [0..n-1] IN/OUT */ +) +; +/* +#define Short_term_analysis_filtering Short_term_analysis_filteringx +*/ +#endif diff --git a/codecs/gsm/src/k6opt.s b/codecs/gsm/src/k6opt.s new file mode 100755 index 000000000..3be5c1853 --- /dev/null +++ b/codecs/gsm/src/k6opt.s @@ -0,0 +1,755 @@ +/* k6opt.s vector functions optimized for MMX extensions to x86 + * + * Copyright (C) 1999 by Stanley J. Brooks + * + * Any use of this software is permitted provided that this notice is not + * removed and that neither the authors nor the Technische Universitaet Berlin + * are deemed to have made any representations as to the suitability of this + * software for any purpose nor are held responsible for any defects of + * this software. THERE IS ABSOLUTELY NO WARRANTY FOR THIS SOFTWARE; + * not even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. + * + * Chicago, 03.12.1999 + * Stanley J. Brooks + */ + + .file "k6opt.s" + .version "01.01" +/* gcc2_compiled.: */ +.section .rodata + .align 4 + .type coefs,@object + .size coefs,24 +coefs: + .value -134 + .value -374 + .value 0 + .value 2054 + .value 5741 + .value 8192 + .value 5741 + .value 2054 + .value 0 + .value -374 + .value -134 + .value 0 +.text + .align 4 +/* void Weighting_filter (const short *e, short *x) */ +.globl Weighting_filter + .type Weighting_filter,@function +Weighting_filter: + pushl %ebp + movl %esp,%ebp + pushl %edi + pushl %esi + pushl %ebx + movl 12(%ebp),%edi + movl 8(%ebp),%ebx + addl $-10,%ebx + emms + movl $0x1000,%eax; movd %eax,%mm5 /* for rounding */ + movq coefs,%mm1 + movq coefs+8,%mm2 + movq coefs+16,%mm3 + xorl %esi,%esi + .p2align 2 +.L21: + movq (%ebx,%esi,2),%mm0 + pmaddwd %mm1,%mm0 + + movq 8(%ebx,%esi,2),%mm4 + pmaddwd %mm2,%mm4 + paddd %mm4,%mm0 + + movq 16(%ebx,%esi,2),%mm4 + pmaddwd %mm3,%mm4 + paddd %mm4,%mm0 + + movq %mm0,%mm4 + punpckhdq %mm0,%mm4 /* mm4 has high int32 of mm0 dup'd */ + paddd %mm4,%mm0; + + paddd %mm5,%mm0 /* add for roundoff */ + psrad $13,%mm0 + packssdw %mm0,%mm0 + movd %mm0,%eax /* ax has result */ + movw %ax,(%edi,%esi,2) + incl %esi + cmpl $39,%esi + jle .L21 + emms + popl %ebx + popl %esi + popl %edi + leave + ret +.Lfe1: + .size Weighting_filter,.Lfe1-Weighting_filter + +.macro ccstep n +.if \n + movq \n(%edi),%mm1 + movq \n(%esi),%mm2 +.else + movq (%edi),%mm1 + movq (%esi),%mm2 +.endif + pmaddwd %mm2,%mm1 + paddd %mm1,%mm0 +.endm + + .align 4 +/* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */ +.globl k6maxcc + .type k6maxcc,@function +k6maxcc: + pushl %ebp + movl %esp,%ebp + pushl %edi + pushl %esi + pushl %ebx + emms + movl 8(%ebp),%edi + movl 12(%ebp),%esi + movl $0,%edx /* will be maximum inner-product */ + movl $40,%ebx + movl %ebx,%ecx /* will be index of max inner-product */ + subl $80,%esi + .p2align 2 +.L41: + movq (%edi),%mm0 + movq (%esi),%mm2 + pmaddwd %mm2,%mm0 + ccstep 8 + ccstep 16 + ccstep 24 + ccstep 32 + ccstep 40 + ccstep 48 + ccstep 56 + ccstep 64 + ccstep 72 + + movq %mm0,%mm1 + punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */ + paddd %mm1,%mm0; + movd %mm0,%eax /* eax has result */ + + cmpl %edx,%eax + jle .L40 + movl %eax,%edx + movl %ebx,%ecx + .p2align 2 +.L40: + subl $2,%esi + incl %ebx + cmpl $120,%ebx + jle .L41 + movl 16(%ebp),%eax + movw %cx,(%eax) + movl %edx,%eax + emms + popl %ebx + popl %esi + popl %edi + leave + ret +.Lfe2: + .size k6maxcc,.Lfe2-k6maxcc + + + .align 4 +/* long k6iprod (const short *p, const short *q, int n) */ +.globl k6iprod + .type k6iprod,@function +k6iprod: + pushl %ebp + movl %esp,%ebp + pushl %edi + pushl %esi + emms + pxor %mm0,%mm0 + movl 8(%ebp),%esi + movl 12(%ebp),%edi + movl 16(%ebp),%eax + leal -32(%esi,%eax,2),%edx /* edx = top - 32 */ + + cmpl %edx,%esi; ja .L202 + + .p2align 2 +.L201: + ccstep 0 + ccstep 8 + ccstep 16 + ccstep 24 + + addl $32,%esi + addl $32,%edi + cmpl %edx,%esi; jbe .L201 + + .p2align 2 +.L202: + addl $24,%edx /* now edx = top-8 */ + cmpl %edx,%esi; ja .L205 + + .p2align 2 +.L203: + ccstep 0 + + addl $8,%esi + addl $8,%edi + cmpl %edx,%esi; jbe .L203 + + .p2align 2 +.L205: + addl $4,%edx /* now edx = top-4 */ + cmpl %edx,%esi; ja .L207 + + movd (%edi),%mm1 + movd (%esi),%mm2 + pmaddwd %mm2,%mm1 + paddd %mm1,%mm0 + + addl $4,%esi + addl $4,%edi + + .p2align 2 +.L207: + addl $2,%edx /* now edx = top-2 */ + cmpl %edx,%esi; ja .L209 + + movswl (%edi),%eax + movd %eax,%mm1 + movswl (%esi),%eax + movd %eax,%mm2 + pmaddwd %mm2,%mm1 + paddd %mm1,%mm0 + + .p2align 2 +.L209: + movq %mm0,%mm1 + punpckhdq %mm0,%mm1 /* mm1 has high int32 of mm0 dup'd */ + paddd %mm1,%mm0; + movd %mm0,%eax /* eax has result */ + + emms + popl %esi + popl %edi + leave + ret +.Lfe3: + .size k6iprod,.Lfe3-k6iprod + + + .align 4 +/* void k6vsraw P3((short *p, int n, int bits) */ +.globl k6vsraw + .type k6vsraw,@function +k6vsraw: + pushl %ebp + movl %esp,%ebp + pushl %esi + movl 8(%ebp),%esi + movl 16(%ebp),%ecx + andl %ecx,%ecx; jle .L399 + movl 12(%ebp),%eax + leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ + emms + movd %ecx,%mm3 + movq ones,%mm2 + psllw %mm3,%mm2; psrlw $1,%mm2 + cmpl %edx,%esi; ja .L306 + + .p2align 2 +.L302: /* 8 words per iteration */ + movq (%esi),%mm0 + movq 8(%esi),%mm1 + paddsw %mm2,%mm0 + psraw %mm3,%mm0; + paddsw %mm2,%mm1 + psraw %mm3,%mm1; + movq %mm0,(%esi) + movq %mm1,8(%esi) + addl $16,%esi + cmpl %edx,%esi + jbe .L302 + + .p2align 2 +.L306: + addl $12,%edx /* now edx = top-4 */ + cmpl %edx,%esi; ja .L310 + + .p2align 2 +.L308: /* do up to 6 words, two at a time */ + movd (%esi),%mm0 + paddsw %mm2,%mm0 + psraw %mm3,%mm0; + movd %mm0,(%esi) + addl $4,%esi + cmpl %edx,%esi + jbe .L308 + + .p2align 2 +.L310: + addl $2,%edx /* now edx = top-2 */ + cmpl %edx,%esi; ja .L315 + + movzwl (%esi),%eax + movd %eax,%mm0 + paddsw %mm2,%mm0 + psraw %mm3,%mm0; + movd %mm0,%eax + movw %ax,(%esi) + + .p2align 2 +.L315: + emms +.L399: + popl %esi + leave + ret +.Lfe4: + .size k6vsraw,.Lfe4-k6vsraw + + .align 4 +/* void k6vsllw P3((short *p, int n, int bits) */ +.globl k6vsllw + .type k6vsllw,@function +k6vsllw: + pushl %ebp + movl %esp,%ebp + pushl %esi + movl 8(%ebp),%esi + movl 16(%ebp),%ecx + andl %ecx,%ecx; jle .L499 + movl 12(%ebp),%eax + leal -16(%esi,%eax,2),%edx /* edx = top - 16 */ + emms + movd %ecx,%mm3 + cmpl %edx,%esi; ja .L406 + + .p2align 2 +.L402: /* 8 words per iteration */ + movq (%esi),%mm0 + movq 8(%esi),%mm1 + psllw %mm3,%mm0; + psllw %mm3,%mm1; + movq %mm0,(%esi) + movq %mm1,8(%esi) + addl $16,%esi + cmpl %edx,%esi + jbe .L402 + + .p2align 2 +.L406: + addl $12,%edx /* now edx = top-4 */ + cmpl %edx,%esi; ja .L410 + + .p2align 2 +.L408: /* do up to 6 words, two at a time */ + movd (%esi),%mm0 + psllw %mm3,%mm0; + movd %mm0,(%esi) + addl $4,%esi + cmpl %edx,%esi + jbe .L408 + + .p2align 2 +.L410: + addl $2,%edx /* now edx = top-2 */ + cmpl %edx,%esi; ja .L415 + + movzwl (%esi),%eax + movd %eax,%mm0 + psllw %mm3,%mm0; + movd %mm0,%eax + movw %ax,(%esi) + + .p2align 2 +.L415: + emms +.L499: + popl %esi + leave + ret +.Lfe5: + .size k6vsllw,.Lfe5-k6vsllw + + +.section .rodata + .align 4 + .type extremes,@object + .size extremes,8 +extremes: + .long 0x80008000 + .long 0x7fff7fff + .type ones,@object + .size ones,8 +ones: + .long 0x00010001 + .long 0x00010001 + +.text + .align 4 +/* long k6maxmin (const short *p, int n, short *out) */ +.globl k6maxmin + .type k6maxmin,@function +k6maxmin: + pushl %ebp + movl %esp,%ebp + pushl %esi + emms + movl 8(%ebp),%esi + movl 12(%ebp),%eax + leal -8(%esi,%eax,2),%edx + + cmpl %edx,%esi + jbe .L52 + movd extremes,%mm0 + movd extremes+4,%mm1 + jmp .L58 + + .p2align 2 +.L52: + movq (%esi),%mm0 /* mm0 will be max's */ + movq %mm0,%mm1 /* mm1 will be min's */ + addl $8,%esi + cmpl %edx,%esi + ja .L56 + + .p2align 2 +.L54: + movq (%esi),%mm2 + + movq %mm2,%mm3 + pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ + movq %mm3,%mm4 + pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */ + pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */ + por %mm3,%mm4 + movq %mm4,%mm0 /* now mm0 is updated max's */ + + movq %mm1,%mm3 + pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ + pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ + pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ + por %mm3,%mm2 + movq %mm2,%mm1 /* now mm1 is updated min's */ + + addl $8,%esi + cmpl %edx,%esi + jbe .L54 + + .p2align 2 +.L56: /* merge down the 4-word max/mins to lower 2 words */ + + movq %mm0,%mm2 + psrlq $32,%mm2 + movq %mm2,%mm3 + pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ + pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */ + pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */ + por %mm3,%mm2 + movq %mm2,%mm0 /* now mm0 is updated max's */ + + movq %mm1,%mm2 + psrlq $32,%mm2 + movq %mm1,%mm3 + pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ + pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ + pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ + por %mm3,%mm2 + movq %mm2,%mm1 /* now mm1 is updated min's */ + + .p2align 2 +.L58: + addl $4,%edx /* now dx = top-4 */ + cmpl %edx,%esi + ja .L62 + /* here, there are >= 2 words of input remaining */ + movd (%esi),%mm2 + + movq %mm2,%mm3 + pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ + movq %mm3,%mm4 + pand %mm2,%mm3 /* mm3 is mm2 masked to new max's */ + pandn %mm0,%mm4 /* mm4 is mm0 masked to its max's */ + por %mm3,%mm4 + movq %mm4,%mm0 /* now mm0 is updated max's */ + + movq %mm1,%mm3 + pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ + pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ + pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ + por %mm3,%mm2 + movq %mm2,%mm1 /* now mm1 is updated min's */ + + addl $4,%esi + + .p2align 2 +.L62: + /* merge down the 2-word max/mins to 1 word */ + + movq %mm0,%mm2 + psrlq $16,%mm2 + movq %mm2,%mm3 + pcmpgtw %mm0,%mm3 /* mm3 is bitmask for words where mm2 > mm0 */ + pand %mm3,%mm2 /* mm2 is mm2 masked to new max's */ + pandn %mm0,%mm3 /* mm3 is mm0 masked to its max's */ + por %mm3,%mm2 + movd %mm2,%ecx /* cx is max so far */ + + movq %mm1,%mm2 + psrlq $16,%mm2 + movq %mm1,%mm3 + pcmpgtw %mm2,%mm3 /* mm3 is bitmask for words where mm2 < mm1 */ + pand %mm3,%mm2 /* mm2 is mm2 masked to new min's */ + pandn %mm1,%mm3 /* mm3 is mm1 masked to its min's */ + por %mm3,%mm2 + movd %mm2,%eax /* ax is min so far */ + + addl $2,%edx /* now dx = top-2 */ + cmpl %edx,%esi + ja .L65 + + /* here, there is one word of input left */ + cmpw (%esi),%cx + jge .L64 + movw (%esi),%cx + .p2align 2 +.L64: + cmpw (%esi),%ax + jle .L65 + movw (%esi),%ax + + .p2align 2 +.L65: /* (finally!) cx is the max, ax the min */ + movswl %cx,%ecx + movswl %ax,%eax + + movl 16(%ebp),%edx /* ptr to output max,min vals */ + andl %edx,%edx; jz .L77 + movw %cx,(%edx) /* max */ + movw %ax,2(%edx) /* min */ + .p2align 2 +.L77: + /* now calculate max absolute val */ + negl %eax + cmpl %ecx,%eax + jge .L81 + movl %ecx,%eax + .p2align 2 +.L81: + emms + popl %esi + leave + ret +.Lfe6: + .size k6maxmin,.Lfe6-k6maxmin + +/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ + .equiv pm_u0,8 + .equiv pm_rp0,12 + .equiv pm_kn,16 + .equiv pm_s,20 + .equiv lv_u_top,-4 + .equiv lv_s_top,-8 + .equiv lv_rp,-40 /* local version of rp0 with each word twice */ + .align 4 +.globl Short_term_analysis_filteringx + .type Short_term_analysis_filteringx,@function +Short_term_analysis_filteringx: + pushl %ebp + movl %esp,%ebp + subl $40,%esp + pushl %edi + pushl %esi + + movl pm_rp0(%ebp),%esi; + leal lv_rp(%ebp),%edi; + cld + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + emms + movl $0x4000,%eax; + movd %eax,%mm4; + punpckldq %mm4,%mm4 /* (0x00004000,0x00004000) for rounding dword product pairs */ + + movl pm_u0(%ebp),%eax + addl $16,%eax + movl %eax,lv_u_top(%ebp) /* UTOP */ + movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */ + movl pm_kn(%ebp),%eax + leal (%edx,%eax,2),%eax + movl %eax,lv_s_top(%ebp) + cmpl %eax,%edx + jae .L179 + .p2align 2 +.L181: + leal lv_rp(%ebp),%esi /* RP */ + movl pm_u0(%ebp),%edi /* U */ + movw (%edx),%ax /* (0,DI) */ + roll $16,%eax + movw (%edx),%ax /* (DI,DI) */ + .p2align 2 +.L185: /* RP is %esi */ + movl %eax,%ecx + movw (%edi),%ax /* (DI,U) */ + movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ + movw %cx,(%edi) + + movd %eax,%mm2 /* mm2 is (0,0,DI,U) */ + rorl $16,%eax + movd %eax,%mm1 /* mm1 is (0,0,U,DI) */ + + movq %mm1,%mm0 + pmullw %mm3,%mm0 + pmulhw %mm3,%mm1 + punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ + paddd %mm4,%mm0 /* mm4 is 0x00004000,0x00004000 */ + psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */ + packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ + paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */ + movd %mm0,%eax /* (DI,U') */ + + addl $2,%edi + addl $4,%esi + cmpl lv_u_top(%ebp),%edi + jb .L185 + + rorl $16,%eax + movw %ax,(%edx) /* last DI goes to *s */ + addl $2,%edx /* next s */ + cmpl lv_s_top(%ebp),%edx + jb .L181 + .p2align 2 +.L179: + emms + popl %esi + popl %edi + leave + ret +.Lfe7: + .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx + +.end + +/* 'as' macro's seem to be case-insensitive */ +.macro STEP n +.if \n + movd \n(%esi),%mm3 /* mm3 is (0,0,RP,RP) */ +.else + movd (%esi),%mm3 /* mm3 is (0,0,RP,RP) */ +.endif + movq %mm5,%mm1; + movd %mm4,%ecx; movw %cx,%ax /* (DI,U) */ + psllq $48,%mm1; psrlq $16,%mm4; por %mm1,%mm4 + psllq $48,%mm0; psrlq $16,%mm5; por %mm0,%mm5 + + movd %eax,%mm2 /* mm2 is (0,0,DI,U) */ + rorl $16,%eax + movd %eax,%mm1 /* mm1 is (0,0,U,DI) */ + + movq %mm1,%mm0 + pmullw %mm3,%mm0 + pmulhw %mm3,%mm1 + punpcklwd %mm1,%mm0 /* mm0 is (RP*U,RP*DI) */ + paddd %mm6,%mm0 /* mm6 is 0x00004000,0x00004000 */ + psrad $15,%mm0 /* (RP*U,RP*DI) adjusted */ + packssdw %mm0,%mm0 /* (*,*,RP*U,RP*DI) adjusted and saturated to word */ + paddsw %mm2,%mm0 /* mm0 is (?,?, DI', U') */ + movd %mm0,%eax /* (DI,U') */ +.endm + +/* void Short_term_analysis_filtering (short *u0, const short *rp0, int kn, short *s) */ + .equiv pm_u0,8 + .equiv pm_rp0,12 + .equiv pm_kn,16 + .equiv pm_s,20 + .equiv lv_rp_top,-4 + .equiv lv_s_top,-8 + .equiv lv_rp,-40 /* local version of rp0 with each word twice */ + .align 4 +.globl Short_term_analysis_filteringx + .type Short_term_analysis_filteringx,@function +Short_term_analysis_filteringx: + pushl %ebp + movl %esp,%ebp + subl $56,%esp + pushl %edi + pushl %esi + pushl %ebx + + movl pm_rp0(%ebp),%esi; + leal lv_rp(%ebp),%edi; + cld + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + lodsw; stosw; stosw + movl %edi,lv_rp_top(%ebp) + emms + + movl $0x4000,%eax; + movd %eax,%mm6; + punpckldq %mm6,%mm6 /* (0x00004000,0x00004000) for rounding dword product pairs */ + + movl pm_u0(%ebp),%ebx + movq (%ebx),%mm4; movq 8(%ebx),%mm5 /* the 8 u's */ + movl pm_s(%ebp),%edx /* edx is local s ptr throughout below */ + movl pm_kn(%ebp),%eax + leal (%edx,%eax,2),%eax + movl %eax,lv_s_top(%ebp) + cmpl %eax,%edx + jae .L179 + .p2align 2 +.L181: + leal lv_rp(%ebp),%esi /* RP */ + movw (%edx),%ax /* (0,DI) */ + roll $16,%eax + movw (%edx),%ax /* (DI,DI) */ + movd %eax,%mm0 + .p2align 2 +.L185: /* RP is %esi */ + step 0 + step 4 + step 8 + step 12 +/* + step 16 + step 20 + step 24 + step 28 +*/ + addl $16,%esi + cmpl lv_rp_top(%ebp),%esi + jb .L185 + + rorl $16,%eax + movw %ax,(%edx) /* last DI goes to *s */ + addl $2,%edx /* next s */ + cmpl lv_s_top(%ebp),%edx + jb .L181 +.L179: + movq %mm4,(%ebx); movq %mm5,8(%ebx) /* the 8 u's */ + emms + popl %ebx + popl %esi + popl %edi + leave + ret +.Lfe7: + .size Short_term_analysis_filteringx,.Lfe7-Short_term_analysis_filteringx + .ident "GCC: (GNU) 2.95.2 19991109 (Debian GNU/Linux)" -- cgit v1.2.3