File: joinsf.s

package info (click to toggle)
brickos 0.9.0-1
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 1,700 kB
  • ctags: 1,727
  • sloc: ansic: 9,139; cpp: 860; makefile: 717; asm: 693; sh: 123; perl: 61
file content (204 lines) | stat: -rw-r--r-- 5,786 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
/*
 *  joinsf.s
 *
 *  Joins a sign byte, an exponent, and a mantissa into a single float.
 *
 *  The contents of this file are subject to the Mozilla Public License
 *  Version 1.0 (the "License"); you may not use this file except in
 *  compliance with the License. You may obtain a copy of the License at
 *  http://www.mozilla.org/MPL/
 *
 *  Software distributed under the License is distributed on an "AS IS"
 *  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
 *  License for the specific language governing rights and limitations
 *  under the License.
 *
 *  The Original Code is Librcx floating point code, released May 27, 1999.
 *
 *  The Initial Developer of the Original Code is Kekoa Proudfoot.
 *  Portions created by Kekoa Proudfoot are Copyright (C) 1999
 *  Kekoa Proudfoot. All Rights Reserved.
 *
 *  Contributor(s): Kekoa Proudfoot <kekoa@graphics.stanford.edu>
 */

; possible optimizations:
;  - factor sticky shift (4 shift ops, 2 bit ops) into a bsr

    .section .text

;;
;; function: joinsf
;; input: sign in r3l, exp in r4, mant in r5r6
;; output: float in r0r1
;;

    .global ___joinsf

___joinsf:

    ; Check for and fix overflow, converting 2.29 to 1.29

    bsr     fixoverflow

    ; Is exponent < 1 and is mantissa one bit set?

    mov.w   r4,r4               ; if exponent < 1
    bgt     endif_0             ; greater than implies false
    btst    #5,r5h              ; if mantissa one bit set
    beq     endif_0             ; non-zero implies false

        ; Handle a denorm.

        ; Is exponent < -23?

        mov.w   #-23,r0         ; load -23 to r0, which is currently free
        cmp.w   r0,r4           ; if exponent < -23
        bge     else_1          ; greater than or equal implies false

            ; Set mantissa and exponent to zero

            sub.w   r5,r5       ; clear mantissa
            sub.w   r6,r6
            sub.w   r4,r4       ; clear exponent

            bra     endif_1

        else_1:

            ; Shift mantissa right 1 - exponent places, maintaining sticky bit
            ; Note that since -23 <= exponent < 1, can use byte for counter
            ; Also note that 1 - exponent is at least 1, so use do/while
            ; Use r0l for counter

            mov.b   #1,r0l      ; load 1 to counter (r0l)
            sub.b   r4l,r0l     ; subtract exponent

            dowhile_2:

                ; Shift exponent right one place, maintaining sticky bit

                shlr.b  r5h     ; shift mantissa right 1 place
                rotxr.b r5l
                rotxr.b r6h
                rotxr.b r6l     ; last shift places old sticky bit in carry

                bor     #0,r6l  ; or lsb with old sticky bit
                bst     #0,r6l  ; store new sticky bit

                dec.b   r0l;    ; decrement counter and repeat if non-zero
                bne     dowhile_2

            ; Set exponent to 1

            mov.w   #1,r4       ; load 1 to exponent (r4)

        endif_1:

    endif_0:

    ; Round to nearest even by adding 0x1f if lsb is zero, 0x20 if lsb is one
    ; The lsb in this case is that of the rounded mantissa (the 1 << 6 bit)

    ; We do math by computing mantissa + 0x1f + carry, where lsb is in carry

    bld     #6,r6l              ; load lsb to carry
    addx.b  #0x1f,r6l           ; add 0x1f with carry
    addx.b  #0,r6h              ; finish addition
    addx.b  #0,r5l
    addx.b  #0,r5h

    ; Since that might have overflowed, check for and fix overflow

    bsr     fixoverflow

    ; Check for infinite result (exponent > 254)

    mov.w   #254,r0             ; load 254 to r0
    cmp.w   r0,r4               ; if exponent > 254
    ble     endif_2             ; less than or equal indicates false

        ; Return +inf if positive (sign==0x00), -inf if negative (sign==0x80)

        mov.w   #0x7f80,r0      ; load 0x7f800000 to r0r1
	sub.w   r1,r1
        or.b    r3l,r0h         ; or upper byte with sign byte to set sign

        bra     return

    endif_2:

    ; Is one bit zero (indicating denorm or zero)?

    btst    #5,r5h              ; is one bit set?
    bne     endif_3             ; non-zero indicates false

        ; Set exponent to zero

        sub.w   r4,r4           ; clear exponent to zero

    endif_3:

    ; Shift result right 6 places to remove guard bits

    mov.b   #6,r0l              ; use r0l as counter, set to 6

    dowhile_4:

        shlr.b  r5h             ; shift mantissa right 1 place
        rotxr.b r5l
        rotxr.b r6h
        rotxr.b r6l

        dec.b   r0l             ; decrement counter
        bne     dowhile_4       ; repeat if counter not yet zero

    ; Pack exponent (note 0 <= exp <= 254)

    mov.b   r4l,r5h             ; store in upper mantissa byte
    shlr.b  r5h                 ; shift right one place
    bst     #7,r5l              ; store exponent lsb in msb of next byte

    ; Set sign bit

    or.b    r3l,r5h             ; or in the sign byte

    ; Move result to r0r1

    mov.w   r5,r0
    mov.w   r6,r1

return:

    rts



;;
;; function: fixoverflow
;; input: exponent in r4, 2.29 or 1.29 mantissa in r5r6
;; output: exponent in r4, 1.29 mantissa in r5r6
;;

fixoverflow:

    btst    #6,r5h              ; is two bit set?
    beq     endif_5             ; zero indicates false

        ; Shift mantissa right one place, maintaining sticky bit

        shlr.b  r5h             ; shift mantissa right 1 place
        rotxr.b r5l
        rotxr.b r6h
        rotxr.b r6l             ; last shift places old sticky bit in carry

        bor     #0,r6l          ; or lsb with old sticky bit
        bst     #0,r6l          ; store new sticky bit

        ; Increase exponent

        adds.w  #1,r4           ; add one to exponent

    endif_5:

    rts