File: imisc.s

package info (click to toggle)
allegro4.4 2%3A4.4.3.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, sid
  • size: 20,744 kB
  • sloc: ansic: 164,458; asm: 17,620; cpp: 3,848; javascript: 3,053; objc: 1,687; sh: 1,107; python: 676; pascal: 179; makefile: 57; perl: 29; lisp: 1
file content (603 lines) | stat: -rw-r--r-- 14,646 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
/*         ______   ___    ___ 
 *        /\  _  \ /\_ \  /\_ \ 
 *        \ \ \L\ \\//\ \ \//\ \      __     __   _ __   ___ 
 *         \ \  __ \ \ \ \  \ \ \   /'__`\ /'_ `\/\`'__\/ __`\
 *          \ \ \/\ \ \_\ \_ \_\ \_/\  __//\ \L\ \ \ \//\ \L\ \
 *           \ \_\ \_\/\____\/\____\ \____\ \____ \ \_\\ \____/
 *            \/_/\/_/\/____/\/____/\/____/\/___L\ \/_/ \/___/
 *                                           /\____/
 *                                           \_/__/
 *
 *      Math routines, compiled sprite wrapper, etc.
 *
 *      By Shawn Hargreaves.
 *
 *      fixsqrt() and fixhypot() routines by David Kuhling.
 *
 *      See readme.txt for copyright information.
 */


#include "asmdefs.inc"

.text



/* empty bank switch routine for the standard VGA mode and memory bitmaps */
FUNC(_stub_bank_switch)
   movl BMP_LINE(%edx, %eax, 4), %eax
   ret

FUNC(_stub_unbank_switch)
   ret

FUNC(_stub_bank_switch_end)
   ret




/* void apply_matrix_f(MATRIX_f *m, float x, float y, float z, 
 *                                  float *xout, float *yout, float *zout);
 *  Floating point vector by matrix multiplication routine.
 */
FUNC(apply_matrix_f)
.comm y,17,17

   #define MTX    ARG1
   #define X      ARG2
   #define Y      ARG3
   #define Z      ARG4
   #define XOUT   ARG5
   #define YOUT   ARG6
   #define ZOUT   ARG7

   pushl %ebp
   movl %esp, %ebp
   pushl %ebx

   movl MTX, %edx 
   movl XOUT, %eax 
   movl YOUT, %ebx 
   movl ZOUT, %ecx 

   flds  M_V00(%edx) 
   fmuls X 
   flds  M_V01(%edx) 
   fmuls Y 
   flds  M_V02(%edx) 
   fmuls Z 
   fxch  %st(2) 

   faddp %st(0), %st(1) 
   flds  M_V10(%edx) 
   fxch  %st(2) 

   faddp %st(0), %st(1) 
   fxch  %st(1) 

   fmuls X 
   fxch  %st(1) 

   fadds M_T0(%edx) 
   flds  M_V11(%edx) 

   fmuls Y 
   flds  M_V12(%edx) 

   fmuls Z 
   fxch  %st(1) 

   faddp %st(0), %st(3) 
   flds  M_V20(%edx) 
   fxch  %st(3) 

   faddp %st(0), %st(1) 
   fxch  %st(2) 

   fmuls X 
   fxch  %st(2) 

   fadds M_T1(%edx) 
   flds  M_V21(%edx) 

   fmuls Y 
   flds  M_V22(%edx) 

   fmuls Z 
   fxch  %st(4) 

   faddp %st(0), %st(1) 
   fxch  %st(1) 
   fstps (%ebx) 

   faddp %st(0), %st(2) 
   fstps (%eax) 

   fadds M_T2(%edx) 
   fstps (%ecx)

   popl %ebx
   movl %ebp, %esp
   popl %ebp
   ret                              /* end of apply_matrix_f() */




#undef X
#undef Y




/* void draw_compiled_sprite(BITMAP *bmp, COMPILED_SPRITE *sprite, int x, y)
 *  Draws a compiled sprite onto the specified bitmap at the specified
 *  position, _ignoring_ clipping. The bitmap must be in the same format
 *  that the sprite was compiled for.
 */
FUNC(draw_compiled_sprite)

   #define BMP       ARG1
   #define SPRITE    ARG2
   #define X         ARG3
   #define Y         ARG4

   pushl %ebp
   movl %esp, %ebp
   subl $4, %esp                 /* 1 local variable: */

   #define PLANE     -4(%ebp)

   pushl %ebx
   pushl %esi
   pushl %edi

   movl BMP, %edx                /* bitmap pointer in edx */
 #ifdef USE_FS
   movw BMP_SEG(%edx), %fs       /* load segment selector into fs */
 #endif

   movl SPRITE, %ebx
   cmpw $0, CMP_PLANAR(%ebx)     /* is the sprite planar or linear? */
   je linear_compiled_sprite

   movl X, %ecx                  /* get write plane mask in bx */
   andb $3, %cl
   movl $0x1102, %ebx
   shlb %cl, %bh

   movl BMP_LINE+4(%edx), %ecx   /* get line width in ecx */
   subl BMP_LINE(%edx), %ecx

   movl X, %esi                  /* get destination address in edi */
   shrl $2, %esi
   movl Y, %edi
   movl BMP_LINE(%edx, %edi, 4), %edi
   addl %esi, %edi

   movl $0x3C4, %edx             /* port address in dx */

   movl $0, PLANE                /* zero the plane counter */

   _align_
planar_compiled_sprite_loop:
   movl %ebx, %eax               /* set the write plane */
   outw %ax, %dx 

   movl %edi, %eax               /* get address in eax */

   movl PLANE, %esi              /* get the drawer function in esi */
   shll $3, %esi
   addl SPRITE, %esi
   movl CMP_DRAW(%esi), %esi

   call *%esi                    /* and draw the plane! */

   incl PLANE                    /* next plane */
   cmpl $4, PLANE
   jge draw_compiled_sprite_done

   rolb $1, %bh                  /* advance the plane position */
   adcl $0, %edi
   jmp planar_compiled_sprite_loop

   _align_
linear_compiled_sprite:
   movl X, %eax
   movzwl CMP_COLOR_DEPTH(%ebx), %ecx
   cmpl $24, %ecx
   jne normal_linear_compiled_sprite
   leal (%eax, %eax, 2), %eax
   jmp end24bpp_linear_compiled_sprite

   _align_
normal_linear_compiled_sprite:
   addl $7, %ecx
   shrl $4, %ecx
   shll %cl, %eax

end24bpp_linear_compiled_sprite:
   movl %eax, %ecx               /* x coordinate in ecx */
   movl Y, %edi                  /* y coordinate in edi */
   movl BMP_WBANK(%edx), %esi    /* bank switch function in esi */
   movl CMP_DRAW(%ebx), %ebx     /* drawer function in ebx */

   call *%ebx                    /* and draw it! */

draw_compiled_sprite_done:
   movl BMP, %edx
   UNWRITE_BANK()

   popl %edi
   popl %esi
   popl %ebx
   movl %ebp, %esp
   popl %ebp
   ret                           /* end of draw_compiled_sprite() */




/* void _do_stretch(BITMAP *source, BITMAP *dest, void *drawer, 
 *                  int sx, fixed sy, fixed syd, int dx, int dy, int dh, 
 *                  int color_depth);
 *
 *  Helper function for stretch_blit(), calls the compiled line drawer.
 */
FUNC(_do_stretch)

   #define SOURCE       ARG1
   #define DEST         ARG2
   #define DRAWER       ARG3
   #define SX           ARG4
   #define SY           ARG5
   #define SYD          ARG6
   #define DX           ARG7
   #define DY           ARG8
   #define DH           ARG9
   #define COL_DEPTH    ARG10

   pushl %ebp
   movl %esp, %ebp
   pushl %edi
   pushl %esi
   pushl %ebx
   pushw %es

   movl DEST, %edx
   movw BMP_SEG(%edx), %es       /* load destination segment */
   movl DRAWER, %ebx             /* the actual line drawer */

   movl BMP_ID(%edx), %eax
   testl $BMP_ID_PLANAR, %eax
   jnz stretch_modex_loop
   movl COL_DEPTH, %eax
   cmpl $8, %eax
   je stretch_normal_loop
   cmpl $15, %eax
   je stretch_bpp_16
   cmpl $16, %eax
   je stretch_bpp_16
   cmpl $24, %eax
   je stretch_bpp_24
   cmpl $32, %eax
   je stretch_bpp_32
   jmp stretch_done


   /* special loop for 24 bit */
   _align_
stretch_bpp_24:
   movl SX, %eax
   leal (%eax, %eax, 2), %eax
   movl %eax, SX
   movl DX, %eax
   leal (%eax, %eax, 2), %eax
   movl %eax, DX

   _align_
stretch_loop24:
   movl SOURCE, %edx             /* get source line (in esi) and bank */
   movl SY, %eax
   shrl $16, %eax
   READ_BANK()
   movl %eax, %esi
   addl SX, %esi

   movl DEST, %edx               /* get dest line (in edi) and bank */
   movl DY, %eax
   WRITE_BANK()
   movl %eax, %edi
   addl DX, %edi
   pushl %edx
   pushl %ebx

   call *%ebx                    /* draw (clobbers eax, ebx, ecx, edx) */

   popl %ebx
   popl %edx
   movl SYD, %eax                /* next line in source bitmap */
   addl %eax, SY
   incl DY                       /* next line in dest bitmap */
   decl DH
   jg stretch_loop24
   jmp stretch_done


   /* special loop for mode-X */
   _align_
stretch_modex_loop:
   movl SOURCE, %edx             /* get source line (in esi) and bank */
   movl SY, %eax
   shrl $16, %eax
   movl BMP_LINE(%edx, %eax, 4), %esi
   addl SX, %esi

   movl DEST, %edx               /* get dest line (in edi) and bank */
   movl DY, %eax
   movl BMP_LINE(%edx, %eax, 4), %edi
   addl DX, %edi

   call *%ebx                    /* draw the line (clobbers eax and ecx) */

   movl SYD, %eax                /* next line in source bitmap */
   addl %eax, SY
   incl DY                       /* next line in dest bitmap */
   decl DH
   jg stretch_modex_loop
   jmp stretch_done


   _align_
stretch_bpp_16:
   shll $1, SX
   shll $1, DX
   jmp stretch_normal_loop

   _align_
stretch_bpp_32:
   shll $2, SX
   shll $2, DX


   /* normal stretching loop */
   _align_
stretch_normal_loop:
   movl SOURCE, %edx             /* get source line (in esi) and bank */
   movl SY, %eax
   shrl $16, %eax
   READ_BANK()
   movl %eax, %esi
   addl SX, %esi

   movl DEST, %edx               /* get dest line (in edi) and bank */
   movl DY, %eax
   WRITE_BANK()
   movl %eax, %edi
   addl DX, %edi

   call *%ebx                    /* draw the line (clobbers eax and ecx) */

   movl SYD, %eax                /* next line in source bitmap */
   addl %eax, SY
   incl DY                       /* next line in dest bitmap */
   decl DH
   jg stretch_normal_loop


stretch_done:
   popw %es

   movl SOURCE, %edx
   UNWRITE_BANK()

   movl DEST, %edx
   UNWRITE_BANK()

   popl %ebx
   popl %esi
   popl %edi
   movl %ebp, %esp
   popl %ebp
   ret                           /* end of _do_stretch() */




/* unsigned long _blender_trans24(unsigned long x, y, n);
 *  24 bit trans blender function. See colblend.c for the others.
 */
FUNC(_blender_trans24)
   pushl %ebp
   movl %esp, %ebp
   pushl %esi
   pushl %ecx
   pushl %ebx

   movl ARG1, %esi
   movl ARG2, %ebx
   movl ARG3, %ecx

   movl %esi, %eax
   movl %ebx, %edx
   andl $0xFF00FF, %eax
   andl $0xFF00FF, %edx

   orl %ecx, %ecx
   jz noinc

   incl %ecx

noinc:
   subl %edx, %eax
   imull %ecx, %eax
   shrl $8, %eax
   addl %ebx, %eax

   andl $0xFF00, %ebx
   andl $0xFF00, %esi

   subl %ebx, %esi
   imull %ecx, %esi
   shrl $8, %esi
   addl %ebx, %esi
   andl $0xFF00FF, %eax
   andl $0xFF00, %esi

   orl %esi, %eax

   popl %ebx
   popl %ecx
   popl %esi
   movl %ebp, %esp
   popl %ebp
   ret                           /* end of _blender_trans24() */




/* fixed fixsqrt(fixed x);
 *  Fixed point square root routine. This code is based on the fixfloat
 *  library by Arne Steinarson.
 */
FUNC(fixsqrt)
   pushl %ebp
   movl %esp, %ebp

   /* This routine is based upon the following idea:
    *    sqrt (x) = sqrt (x/d) * sqrt(d)
    *    d = 2^(2n)
    *    sqrt (x) = sqrt (x / 2^(2n)) * 2^n
    * `x/2^(2n)' has to fall into the range 0..255 so that we can use the
    * square root lookup table. So `2n' is the number of bits `x' has to be
    * shifted to the left to become smaller than 256. The best way to find `2n'
    * is to do a reverse bit scan on `x'. This is achieved by the i386 ASM
    * instruction `bsr'.
    */

   movl ARG1, %eax               /* eax = `x' */
   orl %eax, %eax                /* check whether `x' is negative... */
   jle  sqrt_error_check         /* jump to error-checking if x <= 0 */

   movl %eax, %edx               /* bit-scan is done on edx */
   shrl $6, %edx
   xorl %ecx, %ecx               /* if no bit set: default %cl = 2n = 0 */
   bsrl %edx, %ecx 
   andb $0xFE, %cl               /* make result even -->  %cl = 2n */
   shrl %cl, %eax                /* shift x to fall into range 0..255 */

				 /* table lookup... */
   movzwl GLOBL(_sqrt_table)(,%eax,2), %eax

   shrb $1, %cl                  /* %cl = n */
   shll %cl, %eax                /* multiply `sqrt(x/2^(2n))' by `2^n' */
   shrl $4, %eax                 /* adjust the result */
   jmp sqrt_done

   _align_
sqrt_error_check:                /* here we go if x<=0 */
   jz sqrt_done                  /* if zero, return eax=0 */

   movl GLOBL(allegro_errno), %edx
   movl $ERANGE, (%edx)          /* on overflow, set errno */
   xorl %eax, %eax               /* return zero */

   _align_
sqrt_done:
   movl %ebp, %esp
   popl %ebp
   ret                           /* end of fixsqrt() */




/* fixed fixhypot(fixed x, fixed y);
 *  Return fixed point sqrt (x*x+y*y), which is the length of the 
 *  hypotenuse of a right triangle with sides of length x and y, or the 
 *  distance of point (x|y) from the origin. This routine is faster and more 
 *  accurate than using the direct formula fixsqrt (fixmul (x,x), fixmul(y,y)). 
 *  It will also return correct results for x>=256 or y>=256 where fixmul(x) 
 *  or fixmul(y) would overflow.
 */
FUNC(fixhypot)
   pushl %ebp
   movl %esp, %ebp

   /* The idea of this routine is:
    *    sqrt (x^2+y^2) = sqrt ((x/d)^2+(y/d)^2) * d
    *    d = 2^n
    * Since `x' and `y' are fixed point numbers, they are multiplied in the 
    * following way:
    *    x^2 = (x*x)/2^16
    * so we come to the formula:
    *    sqrt(x^2+y^2) = sqrt((x*x + y*y)/2^(16+2n)) * 2^n
    * and this is almost the same problem as calculating the square root in
    * `fixsqrt': find `2n' so that `(x*x+y*y)/2^(16+2n)' is in the range 0..255
    * so that we can use the square root lookup table.
    */

   movl ARG1, %eax               /* edx:eax = x*x */
   imull %eax
   movl %eax, %ecx               /* save edx:eax */
   pushl %edx
   movl ARG2, %eax               /* edx:eax = y*y */
   imull %eax
   addl %ecx, %eax               /* edx:eax = x*x + y*y */
   popl %ecx
   adcl %ecx, %edx
   cmpl $0x3FFFFFFF, %edx        /* check for overflow */
   ja hypot_overflow

   /* And now we're doing a bit-scan on `x*x+y*y' to find out by how 
    * many bits it needs to be shifted to fall into the range 0..255. 
    * Since the intermediate result is 64 bit we may need two bitscans 
    * in case that no bit is set in the upper 32 bit.
    */ 
   bsrl %edx, %ecx
   jz hypot_part2

   /* we got the bit with the first step */
   incb %cl                      /* make cl even */
   incb %cl
   andb $0xFE, %cl 
   shrdl %cl, %edx, %eax         /* make eax fall into range 0..255 */
   shrl $24, %eax
				 /* eax = table lookup square root */
   movzwl GLOBL(_sqrt_table)(,%eax,2), %eax
   shrb $1, %cl                  /* adjust result... */
   shll %cl, %eax 
   jmp hypot_done

   /* we didn't get the bit with the first step -- so we make another
    * scan on the remaining bits in `eax' to get `2n'.
    */
   _align_
hypot_part2:
   shrl $16, %eax                /* eax = (x*x+y*y)/2^16 */
   movl %eax, %edx               /* edx is used for scanning */
   shrl $6, %edx 
   xorl %ecx, %ecx               /* default `2n' if no bit is set */
   bsrl %edx, %ecx
   andb $0xFE, %cl               /* make cl=2n even */
   shrl %cl, %eax                /* make eax fall into range 0..255 */
				 /* eax = table lookup square root */
   movzwl GLOBL(_sqrt_table)(,%eax,2), %eax
   shrb $1, %cl                  /* cl = n */
   shll %cl, %eax                /* adjust result... */
   shrl $4, %eax 
   jmp hypot_done

   _align_
hypot_overflow:                  /* overflow */
   movl GLOBL(allegro_errno), %eax
   movl $ERANGE, (%eax)          /* set errno */
   movl $0x7FFFFFFF, %eax        /* and return MAXINT */

   _align_
hypot_done:
   movl %ebp, %esp
   popl %ebp
   ret                           /* end of fixhypot() */