File: umul8x16r24.s

package info (click to toggle)
cc65 2.19-2
  • links: PTS
  • area: main
  • in suites: forky, sid, trixie
  • size: 20,268 kB
  • sloc: ansic: 117,151; asm: 66,339; pascal: 4,248; makefile: 1,009; perl: 607
file content (69 lines) | stat: -rw-r--r-- 1,738 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
;
; Ullrich von Bassewitz, 2011-07-10
;
; CC65 runtime: 8x16 => 24 unsigned multiplication
;

        .export         umul8x16r24, umul8x16r24m
        .export         umul8x16r16, umul8x16r16m

        .include        "zeropage.inc"

        .macpack        cpu

;---------------------------------------------------------------------------
; 8x16 => 24 unsigned multiplication routine. Because the overhead for a
; 8x16 => 16 unsigned multiplication routine is small, we will tag it with
; the matching labels, as well.
;
;  routine         LHS         RHS        result          result also in
; -----------------------------------------------------------------------
;  umul8x16r24     ax          ptr1-low   ax:sreg-low     ptr1:sreg-low
;  umul8x16r24m    ptr3        ptr1-low   ax:sreg-low     ptr1:sreg-low
;
; ptr3 is left intact by the routine.
;

umul8x16r24:
umul8x16r16:
        sta     ptr3
        stx     ptr3+1

umul8x16r24m:
umul8x16r16m:
.if (.cpu .bitand ::CPU_ISET_65SC02)
        stz     ptr1+1
        stz     sreg
.else
        ldx     #0
        stx     ptr1+1
        stx     sreg
.endif

        ldy     #8              ; Number of bits
        ldx     ptr3            ; Get into register for speed
        lda     ptr1
        ror     a               ; Get next bit into carry
@L0:    bcc     @L1

        clc
        pha
        txa
        adc     ptr1+1
        sta     ptr1+1
        lda     ptr3+1
        adc     sreg
        sta     sreg
        pla

@L1:    ror     sreg
        ror     ptr1+1
        ror     a
        dey
        bne     @L0

        sta     ptr1            ; Save low byte of result
        ldx     ptr1+1          ; Load high byte of result
        rts                     ; Done