aboutsummaryrefslogtreecommitdiffstats
path: root/codecs/mp3/src/x86intel.c
blob: d9f0e612543a3b6d94912b04f5ef0ede5919f3a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
/* *************************************************** */
/* ************ DO NOT EDIT THIS FILE!!!! ************ */
/* *************************************************** */
/* This file was automatically generated by gas2intel. */
/* Edit the original gas version instead. */


/*	FreeAmp - The Free MP3 Player */

/*	Based on MP3 decoder originally Copyright (C) 1995-1997 */
/*	Xing Technology Corp.  http://www.xingtech.com */

/*	Copyright (C) 1999 Mark H. Weaver <mhw@netris.org> */

/*	This program is free software; you can redistribute it and/or modify */
/*	it under the terms of the GNU General Public License as published by */
/*	the Free Software Foundation; either version 2 of the License, or */
/*	(at your option) any later version. */

/*	This program is distributed in the hope that it will be useful, */
/*	but WITHOUT ANY WARRANTY; without even the implied warranty of */
/*	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the */
/*	GNU General Public License for more details. */

/*	You should have received a copy of the GNU General Public License */
/*	along with this program; if not, write to the Free Software */
/*	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

/*	$Id$ */
/*	Generated from Id: x86gas.s,v 1.9 1999/03/05 08:58:18 mhw Exp $ */


extern float wincoef[264];
extern float coef32[31];

#define L_tmp 0
#define L_pcm 4
void window_dual(float *vbuf, int vb_ptr, short *pcm)
{
__asm {

	mov esi,vb_ptr
	mov edi,vbuf
	mov ecx,pcm
	push ebp
	sub esp,8
	mov DWORD PTR [esp+L_pcm],ecx

	mov ebp,511		; ebp = 511
	lea ecx,wincoef	; coef = wincoef
	add esi,16		; si = vb_ptr + 16
	mov ebx,esi
	add ebx,32
	and ebx,ebp		; bx = (si + 32) & 511

; First 16
	mov dh,16		; i = 16
	align 4
FirstOuter:
	fldz 			; sum = 0.0
	mov dl,2		; j = 2
	align 4
FirstInner:
; REPEAT 4		; Unrolled loop
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	add ecx,4		; Advance coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	add ecx,4		; Advance coef pointer
	and ebx,ebp		; bx &= 511
	fsubp st(1),st	; Subtract from sum
;--
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	add ecx,4		; Advance coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	add ecx,4		; Advance coef pointer
	and ebx,ebp		; bx &= 511
	fsubp st(1),st	; Subtract from sum
;--
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	add ecx,4		; Advance coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	add ecx,4		; Advance coef pointer
	and ebx,ebp		; bx &= 511
	fsubp st(1),st	; Subtract from sum
;--
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	add ecx,4		; Advance coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	add ecx,4		; Advance coef pointer
	and ebx,ebp		; bx &= 511
	fsubp st(1),st	; Subtract from sum
;--
; END REPEAT

	dec dl		; --j
	jg FirstInner		; Jump back if j > 0

	fistp DWORD PTR [esp+L_tmp]	; tmp = (long) round (sum)
	inc esi		; si++
	mov eax,DWORD PTR [esp+L_tmp]
	dec ebx		; bx--
	mov ebp,eax
	sar eax,15
	inc eax
	sar eax,1
	jz FirstInRange	; Jump if in range

	sar eax,16		; Out of range
	mov ebp,32767
	xor ebp,eax
FirstInRange:
	mov eax,DWORD PTR [esp+L_pcm]
	mov WORD PTR [eax],bp		; Store sample in *pcm
	add eax,4		; Increment pcm
	mov ebp,511		; Reload ebp with 511
	mov DWORD PTR [esp+L_pcm],eax

	dec dh		; --i
	jg FirstOuter		; Jump back if i > 0


; Special case
	fldz 			; sum = 0.0
	mov dl,4		; j = 4
	align 4
SpecialInner:
; REPEAT 2		; Unrolled loop
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	add ecx,4		; Increment coef pointer
	and ebx,ebp		; bx &= 511
	faddp st(1),st	; Add to sum
;--
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	add ecx,4		; Increment coef pointer
	and ebx,ebp		; bx &= 511
	faddp st(1),st	; Add to sum
;--
; END REPEAT

	dec dl		; --j
	jg SpecialInner	; Jump back if j > 0

	fistp DWORD PTR [esp+L_tmp]	; tmp = (long) round (sum)
	dec esi		; si--
	mov eax,DWORD PTR [esp+L_tmp]
	inc ebx		; bx++
	mov ebp,eax
	sar eax,15
	inc eax
	sar eax,1
	jz SpecialInRange	; Jump if within range

	sar eax,16		; Out of range
	mov ebp,32767
	xor ebp,eax
SpecialInRange:
	mov eax,DWORD PTR [esp+L_pcm]
	sub ecx,36		; Readjust coef pointer for last round
	mov WORD PTR [eax],bp		; Store sample in *pcm
	add eax,4		; Increment pcm
	mov ebp,511		; Reload ebp with 511
	mov DWORD PTR [esp+L_pcm],eax


; Last 15
	mov dh,15		; i = 15
	align 4
LastOuter:
	fldz 			; sum = 0.0
	mov dl,2		; j = 2
	align 4
LastInner:
; REPEAT 4		; Unrolled loop
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	sub ecx,4		; Back up coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	sub ecx,4		; Back up coef pointer
	and ebx,ebp		; bx &= 511
	faddp st(1),st	; Add to sum
;--
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	sub ecx,4		; Back up coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	sub ecx,4		; Back up coef pointer
	and ebx,ebp		; bx &= 511
	faddp st(1),st	; Add to sum
;--
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	sub ecx,4		; Back up coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	sub ecx,4		; Back up coef pointer
	and ebx,ebp		; bx &= 511
	faddp st(1),st	; Add to sum
;--
	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+esi*4]	; Multiply by vbuf[si]
	add esi,64		; si += 64
	sub ecx,4		; Back up coef pointer
	and esi,ebp		; si &= 511
	faddp st(1),st	; Add to sum

	fld DWORD PTR [ecx]		; Push *coef
	fmul DWORD PTR [edi+ebx*4]	; Multiply by vbuf[bx]
	add ebx,64		; bx += 64
	sub ecx,4		; Back up coef pointer
	and ebx,ebp		; bx &= 511
	faddp st(1),st	; Add to sum
;--
; END REPEAT

	dec dl		; --j
	jg LastInner		; Jump back if j > 0

	fistp DWORD PTR [esp+L_tmp]	; tmp = (long) round (sum)
	dec esi		; si--
	mov eax,DWORD PTR [esp+L_tmp]
	inc ebx		; bx++
	mov ebp,eax
	sar eax,15
	inc eax
	sar eax,1
	jz LastInRange		; Jump if in range

	sar eax,16		; Out of range
	mov ebp,32767
	xor ebp,eax
LastInRange:
	mov eax,DWORD PTR [esp+L_pcm]
	mov WORD PTR [eax],bp		; Store sample in *pcm
	add eax,4		; Increment pcm
	mov ebp,511		; Reload ebp with 511
	mov DWORD PTR [esp+L_pcm],eax

	dec dh		; --i
	jg LastOuter		; Jump back if i > 0

	add esp,8
	pop ebp

  }
}

/*--------------------------------------------------------------------------- */

#define L_mi 0
#define L_m 4
#define L_dummy 8
#define L_in 12
#define L_out 16
#define L_buf 20	/* Temporary buffer */
#define L_locals 148	/* Bytes used for locals */
void asm_fdct32(float in[], float out[])
{
__asm {

	mov edi,in		; edi = x
	mov esi,out		; esi = f
	push ebp
	sub esp,L_locals

	lea ecx,coef32-128	; coef = coef32 - (32 * 4)
	mov DWORD PTR [esp+4],1		; m = 1
	mov ebp,16		; n = 32 / 2

	lea ebx,DWORD PTR [esp+L_buf]
	mov DWORD PTR [esp+L_out],ebx	; From now on, use temp buf instead of orig x
	jmp ForwardLoopStart

	align 4
ForwardOuterLoop:
	mov edi,DWORD PTR [esp+L_in]	; edi = x
	mov esi,DWORD PTR [esp+L_out]	; esi = f
	mov DWORD PTR [esp+L_out],edi	; Exchange mem versions of f/x for next iter
ForwardLoopStart:
	mov DWORD PTR [esp+L_in],esi
	mov ebx,DWORD PTR [esp+L_m]	; ebx = m (temporarily)
	mov DWORD PTR [esp+L_mi],ebx	; mi = m
	sal ebx,1		; Double m for next iter
	lea ecx,DWORD PTR [ecx+ebp*8]	; coef += n * 8
	mov DWORD PTR [esp+L_m],ebx	; Store doubled m
	lea ebx,DWORD PTR [esi+ebp*4]	; ebx = f2 = f + n * 4
	sal ebp,3		; n *= 8

	align 4
ForwardMiddleLoop:
	mov eax,ebp		; q = n
	xor edx,edx		; p = 0
	test eax,8
	jnz ForwardInnerLoop1

	align 4
ForwardInnerLoop:
	sub eax,4		; q -= 4
	fld DWORD PTR [edi+eax]	; push x[q]
	fld DWORD PTR [edi+edx]	; push x[p]
	fld st(1)		; Duplicate top two stack entries
	fld st(1)
	faddp st(1),st
	fstp DWORD PTR [esi+edx]	; f[p] = x[p] + x[q]
	fsubrp st(1),st
	fmul DWORD PTR [ecx+edx]
	fstp DWORD PTR [ebx+edx]	; f2[p] = coef[p] * (x[p] - x[q])
	add edx,4		; p += 4

ForwardInnerLoop1:
	sub eax,4		; q -= 4
	fld DWORD PTR [edi+eax]	; push x[q]
	fld DWORD PTR [edi+edx]	; push x[p]
	fld st(1)		; Duplicate top two stack entries
	fld st(1)
	faddp st(1),st
	fstp DWORD PTR [esi+edx]	; f[p] = x[p] + x[q]
	fsubrp st(1),st
	fmul DWORD PTR [ecx+edx]
	fstp DWORD PTR [ebx+edx]	; f2[p] = coef[p] * (x[p] - x[q])
	add edx,4		; p += 4

	cmp edx,eax
	jb ForwardInnerLoop	; Jump back if (p < q)

	add esi,ebp		; f += n
	add ebx,ebp		; f2 += n
	add edi,ebp		; x += n
	dec DWORD PTR [esp+L_mi]		; mi--
	jg ForwardMiddleLoop	; Jump back if mi > 0

	sar ebp,4		; n /= 16
	jg ForwardOuterLoop	; Jump back if n > 0


; Setup back loop
	mov ebx,8		; ebx = m = 8 (temporarily)
	mov ebp,ebx		; n = 4 * 2

	align 4
BackOuterLoop:
	mov esi,DWORD PTR [esp+L_out]	; esi = f
	mov DWORD PTR [esp+L_mi],ebx	; mi = m
	mov edi,DWORD PTR [esp+L_in]	; edi = x
	mov DWORD PTR [esp+L_m],ebx	; Store m
	mov DWORD PTR [esp+L_in],esi	; Exchange mem versions of f/x for next iter
	mov ebx,edi
	mov DWORD PTR [esp+L_out],edi
	sub ebx,ebp		; ebx = x2 = x - n
	sal ebp,1		; n *= 2

	align 4
BackMiddleLoop:
	mov ecx,DWORD PTR [ebx+ebp-4]
	mov DWORD PTR [esi+ebp-8],ecx	; f[n - 8] = x2[n - 4]
	fld DWORD PTR [edi+ebp-4]	; push x[n - 4]
	fst DWORD PTR [esi+ebp-4]	; f[n - 4] = x[n - 4], without popping
	lea eax,DWORD PTR [ebp-8]	; q = n - 8
	lea edx,DWORD PTR [ebp-16]	; p = n - 16

	align 4
BackInnerLoop:
	mov ecx,DWORD PTR [ebx+eax]
	mov DWORD PTR [esi+edx],ecx	; f[p] = x2[q]
	fld DWORD PTR [edi+eax]	; push x[q]
	fadd st(1),st
	fxch 
	fstp DWORD PTR [esi+edx+4]	; f[p + 4] = x[q] + x[q + 4]
	sub eax,4		; q -= 4
	sub edx,8		; p -= 8
	jge BackInnerLoop	; Jump back if p >= 0

	fstp DWORD PTR [esp+L_dummy]	; Pop (XXX is there a better way to do this?)
	add esi,ebp		; f += n
	add ebx,ebp		; x2 += n
	add edi,ebp		; x += n
	dec DWORD PTR [esp+L_mi]		; mi--
	jg BackMiddleLoop	; Jump back if mi > 0

	mov ebx,DWORD PTR [esp+L_m]	; ebx = m (temporarily)
	sar ebx,1		; Halve m for next iter
	jg BackOuterLoop	; Jump back if m > 0

	add esp,L_locals
	pop ebp

  }
}