File: fp_long.s

package info (click to toggle)
gbdk 2.0.17-3
  • links: PTS
  • area: non-free
  • in suites: potato
  • size: 8,472 kB
  • ctags: 9,307
  • sloc: ansic: 42,333; asm: 7,010; makefile: 912; yacc: 375; awk: 154; csh: 144; sh: 59
file content (1890 lines) | stat: -rw-r--r-- 33,705 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
	.include "global.s"

;	This is a set of routines for floating point handling for C

;	The format of a floating point number is as follows:
;
;			------------
;			*   sign   *	1 bit
;			*----------*
;			* exponent *	7 bits
;			*----------*
;			* mantissa *	24 bits, normalized
;			------------
;
;		Note that the number is stored with the mantissa in the
;		low order bytes, i.e. the sign is the most significant
;		bit of the most significant byte.

	.area   _BSS

	; Temporary registers
.ldivloopcount:
.scratch:	.ds	1
	; Working float
.res:	
		.ds	4
.mul:
		.ds	4

.mulloops:
.fdiv32loops:
.faddscratch:	.ds	1
.fmulcount:	.ds 	1
.fw:		.ds	4
.q:
.ft:
		.ds	4
fperr:		.ds	1	; floating over/underflow flag

	.area _CODE


;	Set the floating overflow flag and return zero. Floating execptions
;	may be caught in which case the appropriate routine will be called.

fpovrflw:
	ld	a,#1
	ld	(fperr),a
fpzero:
	ld	hl,#0		; Make HLDE = 0
	ld	e,l
	ld	d,h
	ret

;	Negate the mantissa in LDE.
negmant::
	xor	a		; Zero a, reset carry
	sub	e
	ld	e,a
	ld	a,#0
	sbc	d
	ld	d,a
	
	ld	a,#0
	sbc	l		;negate the hi byte
	ld	l,a		;put back
	ret			;and return

; 	Change it to adding HLDE with BCfl1fl0
;	Make HLDE equal ft
fladd_getother:		; Just return fl3fl2fl1fl0 in HLDE
	ld	a,(.fw+3)
	ld	h,a
	ld	a,(.fw+2)
	ld	l,a
	ld	a,(.fw+1)
	ld	d,a
	ld	a,(.fw+0)
	ld	e,a
	ret

;	Swap the two floating pt registers HLDE and ft3ft2ft1ft0
;	Destroys BC
fladd_swap::
	push	af
	push	hl
	push	de
	ld	hl,#.fw
	ld	a,(hl+)
	ld	e,a
	ld	a,(hl+)
	ld	d,a
	ld	a,(hl+)
	ld	h,(hl)
	ld	l,a
	pop	bc
	ld	a,c
	ld	(.fw+0),a
	ld	a,b
	ld	(.fw+1),a
	pop	bc
	ld	a,c
	ld	(.fw+2),a
	ld	a,b
	ld	(.fw+3),a
	pop	af
	ret
	

;	Floating subtraction. The value on the stack is subtracted from the
;	value in HLDE. To simplify matters, we do it thus:
;
;	A-B == A+-B
.fsub32::
flsub:
	push	hl
	lda	hl,7(sp)	; HL points to exponent on stack
	ld	a,(hl)
	xor	#0x80		; Toggle the sign bit
	ld	(hl),a
	pop	hl

	;fall through to fladd


;	Floating addition:
;		Add the value in HLDE to the value on the stack, and
;		return with the argument removed from the stack.

;	Timings for adding 1976.0 and 10.0
;		Initial version				- 4080
;		Removed .exxs, replaced with fadd_swap	- 2500
;		Removed swaps around actual add		- 1860
;		Optimised fpnorm			- 1620
;		Improved setup				- 1184
;		Improved neg mant detect code		- 952
;		Found bug in fpnorm			- 1060
;		 Note that the speed depends on the order
;		 that the operands are in
;		 If HLDE is > stack, then the routine is faster
;		Optimised so that fpnorm and round arnt - 816
;		 used unless the number overflows into
;		 H		 

; 	Analysis of routine
;	fladd:
;		Recover right operand
;		If either operand is zero, return the other
;		Make the smaller number current
;		Comupte the number of bits difference (BD)
;		If BD > 24, return the larger
;		Adjust smaller until both have the same exponent
;		Save the exponent of either (=exponent of result) (E)
;		Fiddle with mag+sign on both
;			Make H=0x0ff if num is negative
;			Else H=0
;		Add
;		Rotate right once, saving LSB
;		Increase exponent to make up for RR'ing number
;		Restore sign and new exponent
;		Negate mantissa if new is negative
;		Round if LSB was one
;		Normalise

.fadd32::
fladd:
	ld	a,l		;check 1st operand for zero
	or	d
	or	e		;only need to check mantissa
	jr	nz,5$		; Mantissa is not zero
	pop	bc		; mantissa is zero - return other operand
	pop	de
	pop	hl
	push	bc
	ret
5$:
	ld	a,e		; Store the current operand
	ld	(.fw+0),a
	ld	a,d
	ld	(.fw+1),a
	ld	a,l
	ld	(.fw+2),a
	ld	a,h
	ld	(.fw+3),a
	
	pop	bc		; return address
	pop	de		; low word of 2nd operand
	pop	hl		; hi word
	push	bc		; put return address back on stack
	ld	a,l		; check for zero 2nd arg
	or	d
	or	e		;if zero, just return the 1st operand
	jr	nz,6$		; Not zero - so continue
	jp	fladd_getother	; Zero - return other operand
6$:
	ld	a,(.fw+3)
	res	7,a		;clear sign
	ld	c,h		;get exponent
	res	7,c		;and clear sign
a::
	sub	c		;find difference
	jr	nc,1$		;if negative,
	call	fladd_swap	; switch operands
	ld	c,a		; Make the difference positive
	xor	a		; (A = 0)
	sub	c

1$:
	cp	#24		; if less than 24 bits difference,
	jr	c,2$		; we can do the add
	jp	fladd_getother	; otherwise just return the larger value
2$:
	or	a		; check for zero difference
	call	nz,fpadjust	; adjust till equal
	ld	a,h		; save exponent of result
	ld	(.faddscratch),a
	bit	7,h		; test sign, do we need to negate?
	ld	h,#0		; zero fill in case +ve
	jr	z,3$		; no
	call	negmant		; yes
	ld	h,#0x0ff	; 1 fill top byte
3$:
	ld	a,(.fw+3)
	bit	7,a		;test sign, do we need to negate?
	ld	a,#0		;zero fill in case +ve
	ld	(.fw+3),a
	jr	z,4$		;no
	call	fladd_swap
	call	negmant		;yes
	ld	h,#0x0ff	;1 fill top byte
4$:
	ld	c,l
	ld	b,h
	ld	hl,#.fw
	ld	a,(hl+)
	add	e
	ld	e,a
	ld	a,(hl+)
	adc	d
	ld	d,a
	ld	a,(hl+)
	adc	c
	ld	c,a
	ld	a,(hl)
	adc	b
	ld	h,a
	ld	l,c

	sra	h		; now shift down 1 bit to compensate
	rr	l		; Rotate in the carry bit
	rr	d		; propogate the shift
	rr	e

	push    af              ;save carry flag
	ld	a,(.faddscratch)
        res     7,a             ;clear sign from exponent
        inc     a               ;increment to compensate for shift above
        ld      c,a             ;save it
        ld      a,h
        and     #0x80           ;mask off low bits
        or      c               ;or in exponent
        ld      h,a             ;now have it!
	bit	7,h
	call	nz,negmant
        pop     af              ;restore carry flag
        call    c,round         ;round up if necessary
        		        ;normalize and return!!

;	fpnorm	- passed a floating point number in HLDE (sign and exponent
;		in H) - returns with it normalized.
;
;	Points to note:
;		Normalization consists of shifting the mantissa until there
;		is a 1 bit in the MSB of the mantissa.
;
fpnorm::
	bit	7,l		; If it's already normalised, then do nothing
	ret	nz

	ld	a,l		;check for zero mantissa
	or	d
	or	e
	jp	z,fpzero	;make it a clean zero

	ld	b,h		; Store the exponent in B
	ld	c,b		;copy into c
	res	7,c		;reset the sign bit

	; We know that bit 7 is zero due to test above
5$:
	dec	c		;decrement exponent
	bit 	7,c
	jp	nz,fpovrflw	; Exp is <0 - underflow

	or	a		; Clear carry
	rl	e		; Rotate LDE left
	rl	d
	rl	l

	bit	7,l		; Is HLDE normalised?
	jr	z,5$		; no - loop

3$:
	bit	7,b		;test sign
	jr	z,4$		;skip if clear
	set	7,c		;set the new sign bit
4$:
	ld	h,c		;put exponent and sign back where it belongs
	ret			;finished

;	Round the number in HLDE up by one, because of a shift of bits out
;	earlier

round:
	inc	e
	ret	nz
	inc	d
	ret	nz
	inc	l
	ret	nz
;	
;	ld	a,#1		; Add 1 to LDE
;	add	e
;	ld	e,a
;	ld	a,#0
;	adc	d
;	ld	d,a
;
;	ld	a,#0
;	adc	l	
;	ld	l,a

;	jr	nc,2$		; Carry is clear - dont need to increase
				; exponent
	; Shift the carry in
	; ALT: LDE will equal 800000 - speedup?
	rr	l		; Carry is set - rr mantissa and increase
	rr	d		; exponent
	rr	e
	ld	a,h		; get exponent/sign
	and	#0x07f		; get exponent only
	inc	a		; add one
	ld	c,a
	ld	a,h
	and	#0x080
	or	c		;now exponent and sign again
	ld	h,a
2$:
	ret

;	Adjust the floating number in HLDE by increasing the exponent by the
;	contents of A. The mantissa must be shifted right to compensate.

fpadjust:
	and	#0x01F		;mask of hi bits - irrelevant
1$:
	srl	l		; Rotate mantissa right
	rr	d
	rr	e
	inc	h		; increment exponent - it will not overflow
	dec	a
	jr 	nz,1$		; loop if more
	ret

;	Get the right operand into HLDE', leave the left operand
;	where it is in HLDE, but make both of them +ve. The original
;	exponents/signs are left in C and B, left and right operands
;	respectively.

fsetup::
	push	hl
	lda	hl,6(sp)
	ld	a,(hl+)
	ld	(.fw+0),a	; lower word of right operand
	ld	a,(hl+)
	ld	(.fw+1),a
	ld	a,(hl+)		; high word of right operand
	ld	(.fw+2),a
	ld	a,(hl)
	ld	(.fw+3),a
	
	pop	hl
	ld	a,h		; Store HL
	ld	(.scratch),a
	ld	a,l

	pop	hl
	pop	bc
	lda	sp,4(sp)	; Unjunk stack
	push	bc
	push	hl

	ld	l,a		; Recover HL
	ld	a,(.scratch)
	ld	h,a
	ld	c,a		; Store the exponent
	res	7,h		; Make the working copy positive
	ld	a,(.fw+3)
	ld	b,a
	res	7,a
	ld	(.fw+3),a
	ret

;	Floating multiplication. The number in HLDE is multiplied by the
;	number on the stack under the return address. The stack is cleaned
;	up and the result returned in HLDE.
;
;	Timings: multiply 1976.0 by 10.0
;		Initial					- ~60000
;		Much hacking afterwards			- 6268
;		Added mulx0 = 8 shift hack		- 5228
;		Trimmed some old instruction		- 5148
;		Improved fsetup				- 4436

.fmul32::
flmul:
	call	fsetup		;get operands, make them +ve.

	push	bc		;save exponents etc.

	ld	a,d		; Set DEDE' equal to HLDE
	ld	(.ft+1),a
	ld	a,e
	ld	(.ft+0),a
	ld	e,l		; D is zeroed later

	xor	a		; Zero product
	ld	(.fw+3),a	
	ld	h,a
	ld	l,a
	ld	b,a
	ld	c,a
	ld	d,a		

	ld	a,(.fw+0)	; get low 8 bits of multiplier
	call	mult26		; do 8 bits of multiply

	ld	a,(.fw+1)
	call	mult8		;next 8 bits

	ld	a,(.fw+2)	;next 8 bits
	call	mult8		;do next chunk

	ld	d,b
	ld	e,c
	ld	a,h		;get hi byte
	ld	h,#0
	ld	c,h		;zero lower byte
	jr	1$		;skip forward 	1f
2$:	; 2
	srl	a
	rr	l
	rr	d
	rr	e
	rr	c		;save carry bit in c
	inc	h
1$:	; 1
	or	a		;hi byte zero yet?
	jr	nz,2$		;no, keep shifting down		2b
	ld	a,c		;copy shifted-out bits
	ld	(.scratch),a
	pop	bc		;get exponents
	bit	7,l		;check for zero mantissa
	jp	z,fpzero	;return a clean zero if so
	ld	a,c
	res	7,a		;mask off sign
	sub	#0x41		;remove bias, allow one bit shift
	add	a,h		;add in shift count
	sub	#6		;compensate for shift up earlier
	ld	h,b		;the other
	res	7,h		;mask off signs
	add	a,h		;add them together
	ld	h,a		;put exponent in
	ld	a,c		;now check signs
	xor	b

	bit	7,a
	ret	z		;return if +ve

	set	7,h		;set sign flag
	ld	a,(.scratch)
	rla			;shift top bit out
	ret	nc		;return if no carry
	jp	round		;round it

; 	Register useage
;		HL  1
;		HL' 1
;		DE  11
;		DE' 11


mult26::
	push	af
	ld	a,#6
	ld	(.fmulcount),a
3$:	; 3
	pop	af
	srl	a		;shift LSB of multiplier into carry
	jr	nc,1$		; 1f
	push	af
	
	ld	a,(.ft+0)
	add	c
	ld	c,a
	ld	a,(.ft+1)
	adc	b
	ld	b,a

	jr	nc,2$
	inc	hl
2$:
	add	hl,de
	pop	af
1$:	; 1
	push	af
	or	a
	push	hl
	ld	hl,#.ft
	rl	(hl)
	inc	hl
	rl	(hl)
	pop	hl
	rl	e
	rl	d

	ld	a,(.fmulcount)
	dec	a
	ld	(.fmulcount),a

	jr 	nz,3$

	ld	a,#2
	ld	(.fmulcount),a
	pop	af
	jr	mul8_4		; 4f

; Register useage count
;		HL  11
;		HL' 11
;		DE  1
;		DE' 1

mult8::
				; Encapsulate it
	cp	#0		; Simple hack to speed up mul if A = 0
	jr	nz,mul8_normal
				; If A = 0, then it's just rr HLBC 8 times
	ld	c,b
	ld	b,l
	ld	h,a		; (A=0)		
	ret

mul8_normal:
	push	af
	ld	a,#8
	ld	(.fmulcount),a
mul8_3:
	pop	af
	srl	h
	rr	l
	rr	b
	rr	c	
mul8_4: ; 4
	srl	a		;shift LSB into carry
	jr	nc,1$		; 1f
	push	af
	ld	a,(.ft+0)
	add	c
	ld	c,a
	ld	a,(.ft+1)
	adc	b
	ld	b,a

	jr	nc,2$
	inc	hl
2$:
	add	hl,de
	pop	af
1$:
	push	af
	ld	a,(.fmulcount)
	dec	a
	ld	(.fmulcount),a
	jr	nz,mul8_3		;more?	3b
	
				; De-encapsulate
	pop	af
	ret			;no, return as is


;	Floating division. The number in HLDE is divided by the
;	number on the stack under the return address. The stack is cleaned
;	up and the result returned in HLDE.
;
;	Timings Divide 1976.0 by 10.0 giving 197.600006-ish
;		Initial					- 111272
;		Removed .exx's around 3$		- 72512
;		Removed all .exx's up to 5$		- 20192
;		Swapped BCBC' for q4..q0		- 19708
;		Swapped HL' for BC			- 14428
;		Removed .exafaf's			- 14120
;		Found a redundant scf			- 14060
;		Found that D was free - removed q1	- 13060
;		Better shift of q			- 9856
;	Profile counts
;		Useage of	HL  11(.5)1
;				HL' 11(.5)1
;				DE  1
;				DE' 1
;		Useage of	q3  11
;				q1  11

.fdiv32::
fldiv:
	call	fsetup		; get operands, make them +ve.
				; NOTE returns with them in HLDE, HLDE' =12 34
				; and orig exponents in BC = 5
				; fsetup takes 1044 cycles
				; Time from here
	push	bc		; save exponents etc.	TOS=5
				; Swap DE and HL'
	ld	b,d		; HL=1,DE=2,HL'=3,DE'=4
				; Then HL=1,HL'=2,DE=3,DE'=4
	ld	c,e		; Ignore D as it's zeroed later
	ld	a,(.fw+2)
	ld	e,a

	xor	a		; Zero a
	ld	(.q+0),a	; ...and the quotient
	ld	d,a		; D is free
	ld	(.q+2),a
	ld	(.q+3),a

	ld	h,a		; Zero top byte of divisor
				; Dividend is taken care of later
	
				; Ends with HL=1,HL'=2,DE=3,DE'=4
	ld	a,#24+6		;number of bits in dividend and then some
	ld	(.fdiv32loops),a

3$:
	ld	a,h
	cp	d
	jr	c,5$
	jr	nz,8$
	ld	a,l
	cp	e
	jr	c,5$
8$:
	push	bc
	push	hl		;save dividend - hl is now free

	ld	hl,#.fw
				; Subtract DEfw1fw0 from HLBC
	ld	a,c		; Subtract fw1fw0 from BC
	sub	(hl)
	ld	c,a
	inc	hl
	ld	a,b
	sbc	(hl)
	ld	b,a
		
	pop	hl		; Recover HL
	push	hl

	ld	a,l		; Subtract high words
	sbc	e		; (Subtract DE from HL)
	ld	l,a
	ld	a,h
	sbc	#0
	ld	h,a
	jr	nc,4$
	pop	hl		; DEfw1fw0 is greater than HLBC
	pop	bc		; restore dividend
	jr	5$

4$:
	lda	sp,4(sp)	;unjunk stack
5$:
	ccf			; complement carry bit
	push	hl
	ld	hl,#.q
	rl	(hl)
	inc	hl
	rl	d
	inc	hl
	rl	(hl)
	inc	hl
	rl	(hl)
	pop	hl
	
	or	a		; clear carry flag
	rl	c		; Shift HLBC left 
	rl	b
	rl	l	
	rl	h
	
	ld	a,(.fdiv32loops)
	dec	a		;decrement loop count
	ld	(.fdiv32loops),a
	jr	nz,3$

	ld	hl,#.q
	ld	a,(hl+)
	ld	e,a
	inc	hl		; D is taken care of above
	ld	l,(hl)
	ld	a,(.q+3)

	ld	h,#0
	ld	c,h		;zero lower byte
	jr	1$		;skip forward
2$:
	srl	a
	rr	l
	rr	d
	rr	e
	rr	c		;save carry bit in c
	inc	h
1$:
	or	a		;hi byte zero yet?
	jr	nz,2$		;no, keep shifting down

	push	af
	ld	a,c		;copy shifted-out bits
	ld	(.scratch),a
	pop	af

	pop	bc		;restore exponents
	push	bc		;save signs
	ld	a,c
	res	7,a
	res	7,b
	sub	b
	add	#0x041-6		;compensate
	add	a,h
	ld	h,a
	pop	bc
	ld	a,c
	xor	b		; get sign
	bit	7,a		; Jump if a is positive
	jr	z,6$

	set	7,h
6$:
	ld	a,(.scratch)
	rla
	call	c,round		; round if necessary
	jp	fpnorm		; normalize it and return

; .add32 - add HLDE and stack
;  Add HLDE to the 4 byte long on the stack, returning the result in HLDE
;  Note that the stack grows downwards fro the top, so SP+0 is the return address,
;   SP+2 is the least significant byte and SP+5 is the most significant
;	So push hl; push de
.add32::
	LD	B,H		; BC = temporary registers
	LD	C,L
	LDA	HL,2(SP)	; HL = LSB of operand
	LD	A,E
	ADD	(HL)
	LD	E,A
	INC	HL
	LD	A,D
	ADC	(HL)
	LD	D,A
	INC	HL
	LD	A,C
	ADC	(HL)
	LD	C,A
	INC	HL
	LD	A,B
	ADC	(HL)
	LD	H,A
	LD	L,C
	POP	BC		; Return address
	LDA	SP,4(SP)	; Remove the operand from the stack
	PUSH	BC		; Put return address back on stack
	RET

; .sub32 - subtract stack from HLDE
;  Subtract the 4 byte long on the stack at SP+2 from HLDE
.sub32::
	LD	B,H
	LD	C,L
	LDA	HL,2(SP)	; HL points to the operand
	LD	A,E
	SUB	(HL)
	LD	E,A
	INC	HL
	LD	A,D
	SBC	(HL)
	LD	D,A
	INC	HL
	LD	A,C
	SBC	(HL)
	LD	C,A
	INC	HL
	LD	A,B
	SBC	(HL)
	LD	H,A
	LD	L,C
	POP	BC		; Return address
	LDA	SP,4(SP)	; Remove the operand from the stack
	PUSH	BC		; Put return address back on stack
	RET

; .neg32 - negate HLDE
;  Note that HLDE is a in two's complement form
;  The order of the complementing the registers is unimportant
.neg32::
	LD	A,E
	CPL			; Take 2's complement of A
	LD	E,A
	LD	A,D
	CPL
	LD	D,A
	LD	A,L
	CPL
	LD	L,A
	LD	A,H
	CPL
	LD	H,A
	RET

; .cpl32 - complement HLDE
;  Confused - dosnt this do the same as .neg32?
.cpl32::
	XOR	A		; Zero A, clear flags
	SUB	E
	LD	E,A
	LD	A,#0x00
	SBC	D
	LD	D,A
	LD	A,#0x00
	SBC	L
	LD	L,A
	LD	A,#0x00
	SBC	H
	LD	H,A
	RET

; .xor32 - logical XOR of HLDE with the stack
.xor32::
	LD	B,H		; Temporarialy store HL in BC
	LD	C,L
	LDA	HL,2(SP)	; HL points to the operand
	LD	A,E
	XOR	(HL)
	LD	E,A
	INC	HL
	LD	A,D
	XOR	(HL)
	LD	D,A
	INC	HL
	LD	A,C
	XOR	(HL)
	LD	C,A
	INC	HL
	LD	A,B
	XOR	(HL)
	LD	H,A
	LD	L,C
	POP	BC		; Return address
	LDA	SP,4(SP)	; Remove the operand
	PUSH	BC		; Put return address back on stack
	RET

; .or32 - logical OR of HLDE with the stack
.or32::
	LD	B,H
	LD	C,L
	LDA	HL,2(SP)
	LD	A,E
	OR	(HL)
	LD	E,A
	INC	HL
	LD	A,D
	OR	(HL)
	LD	D,A
	INC	HL
	LD	A,C
	OR	(HL)
	LD	C,A
	INC	HL
	LD	A,B
	OR	(HL)
	LD	H,A
	LD	L,C
	POP	BC		; Return address
	LDA	SP,4(SP)
	PUSH	BC		; Put return address back on stack
	RET

; .and32 - logical AND of HLDE with the stack
.and32::
	LD	B,H
	LD	C,L
	LDA	HL,2(SP)
	LD	A,E
	AND	(HL)
	LD	E,A
	INC	HL
	LD	A,D
	AND	(HL)
	LD	D,A
	INC	HL
	LD	A,C
	AND	(HL)
	LD	C,A
	INC	HL
	LD	A,B
	AND	(HL)
	LD	H,A
	LD	L,C
	POP	BC		; Return address
	LDA	SP,4(SP)
	PUSH	BC		; Put return address back on stack
	RET

; .asl32 - arithmitic shift left of HLDE 'A' times
.asl32::
1$:
	SLA	E
	RL	D
	RL	L
	RL	H
	DEC	A
	JR	NZ,1$
	RET

; .asr32 - arithmitic shift right of HLDE 'A' times
.asr32::
1$:
	SRA	H
	RR	L
	RR	D
	RR	E
	DEC	A
	JR	NZ,1$
	RET

; .lsl32 - logical shift left of HLDE 'A' times
.lsl32::
1$:
;	SLL	E
	RL	D
	RL	L
	RL	H
	DEC	A
	JR	NZ,1$
	RET

; .lsr32 - logical shift right of HLDE 'A' times
.lsr32::
1$:
	SRL	H
	RR	L
	RR	D
	RR	E
	DEC	A
	JR	NZ,1$
	RET

; .cmp32 - check if HLDE is negative, positive or zero
;  Can be used with a subtraction to compare numbers
;   If ( A-B > 0 ) A > B
;   If ( A-B = 0 ) B = A
;   If ( A-B < 0 ) A < B
;  Returns Z = 1 if HLDE = 0, C = 1 if HLDE < 0

	;; Long comparison Sets C if HLDE is negative, and Z if HLDE is zero.
.cmp32::
	BIT	7,H		; Test sign
	JR	Z,1$
	LD	A,E		; Set Z flag
	OR	D		; xxx confused
	OR	L
	OR	H
	SCF			; Negative:	set carry flag
	RET
1$:
	LD	A,E		; Set Z flag
	OR	D
	OR	L
	OR	H
	SCF			; Positive:	clear carry flag
	CCF
	RET

	;; Long multiplication for Z80.
	;;
	;; Called with 1st arg in HLDE, 2nd arg on stack. Returns with
	;;  result in HLDE, other argument removed from stack.

;	Long multiplication for Z80

;	Called with 1st arg in HLDE, 2nd arg on stack. Returns with
;	result in HLDE, other argument removed from stack

;	global	almul, llmul

;	psect	text
;almul:
;llmul:
;

; Tests:
;	Square 27A3, giving 62311C9
;	Initial: 6796
;	Change final exx for simple moves - 6360
;	Change middle exx to simple moves - 6040
;	Changed to mul DEBC, adding to HLHL' - 5672
;	Cleaned up afterwards	- 5460
;	Tried changing push af to ld (.scratch),a in mul8 - 5540
;	Changed so that mul by 256 (0) is simple swap - 3476
;       Fixed 32 cycle offset in timer - 3444

.mul32::			; hl=1,de=2,sp+4=3,sp+2=4
	; None of this mucking about...
	; HLDE to mul3 mul2 mul1 mul0
	; Begin profiling
	ld a,h
	ld (.mul+3),a		; mulB
	ld a,l
	ld (.mul+2),a		; mulC
	ld a,d
	ld (.mul+1),a		; .Bp
	ld a,e
	ld (.mul+0),a		; - 80 cycles .Cp
	
	pop hl			; HL is ret address
	pop de
	pop bc
	push hl			; Put ret address back
				; - 132 cycles

	xor a			; Zero HLHL'
	ld h,a			; (the result)
	ld l,a
	ld (.res+1),a
	ld (.res+0),a		; - 176 cycles
	
	
	ld a,(.mul+0)		; Do the actual multiply
	call .mul8b		; - 1704 cycles

	ld a,(.mul+1)
	call .mul8b		; - 3232 cycles

	ld a,(.mul+2)
	call .mul8b		; - 3304 cycles

	ld a,(.mul+3)
	call .mul8b		; - 3376 cycles

	ld d,h
	ld e,l
	ld a,(.res+1)
	ld h,a
	ld a,(.res+0)
	ld l,a			; - 3424 cycles

	ret

.mul8b:
	cp a,#0
	jr nz,.realmul8b
	; Simple hack so that if we're multipling by zero then just
	;  the shift is performed
	ld e,d
	ld d,c
	ld c,b
	ld b,#0
	ret

.realmul8b:
	push af
	ld a,#8
	ld (.mulloops),a
1$:
	pop af
	SRL	A		; Shift A left, LSB into carry
	JP	NC,2$		; LSB of A was zero, so continue
	ADD	HL,DE		; Add low words
	; Originally 149 cycles, now 100
	PUSH	AF
	LD	A,(.res+0)	; Add DE' to HL'
	ADC	c
	LD	(.res+0),A
	LD	A,(.res+1)
	ADC	b
	LD	(.res+1),A
				; Hee hee - these two were around the wrong way
	POP	AF
	; To here
2$:
	SLA	E		; Rotate the multiplier left (DE)
	RL	D
	; This section took 90 cycles, now 16
	rl	c
	rl	b

	push af
	ld a,(.mulloops)
	DEC	a		; Loop until all 8 bits are done
	ld (.mulloops),a
	JR	NZ,1$
	pop af	
	RET
; Long division routines for Z80.
;
; Called with dividend in HLDE, divisor on stack under 2 return
;  addresses. Returns with dividend in HL/HL', divisor in DE/DE'
;  on return the HIGH words are selected.
; Interface between C type HLDE/stack operands and that required for divide
; In divide,
;	dividend is HLHL'
;	divisor  is DEBC
;	divisor  is removed from stack
;	
;	Notes:
;	+0	HL
;	+2	ret outer
;	+4	ret inner
;	+6	div.l
;	+8	div.h

.mod32::
	call	.lregset
	call	divide
	ld	a,(.div+0)
	ld	e,a
	ld	a,(.div+1)
	ld	d,a
	ret
	
.div32::
	call	.lregset
	call	divide
	ld	a,(.q+3)	
	ld	h,a
	ld	a,(.q+2)	
	ld	l,a
	ld	a,(.q+1)	
	ld	d,a
	ld	a,(.q+0)	
	ld	e,a
	ret

.lregset:
				; SP = +2
	ld	a,e		; Low word of dividend into HL'
	ld	(.div+0),a
	ld	a,d
	ld	(.div+1),a	; DE is now free
	push	hl		; HL is free
				; SP = 0
	lda	sp,2(sp)	; (+2)
	pop	de		; First return address
				; SP = +4
	pop	hl		; Second return address
				; SP = +6
				; Points to divisor.L
	pop	bc		; Get divisor.L
				; SP = +8
	push	de		; Restore return address
				; SP = +6
	lda	sp,2(sp)	; Points to divisor.H
				; SP = +8
	pop	de
				; SP = +10
	push	hl		; Restore inner return address
				; SP = +8
	lda	sp,-8(sp)	; Recover HL
				; SP = 0
	pop	hl
	lda	sp,4(sp)
	ret

; .lregset:
; 	POP	BC		; Get top return address
; 	CALL	.exx		; Select other bank
; 	POP	BC		; Return address of call to this module
; 	POP	DE		; Get low word of divisor
; 	CALL	.exx		; Select hi bank
; 	EX	DE,HL		; Dividend.low -> HL
; 	EX	(SP),HL		; Divisor.high -> HL
; 	EX	DE,HL		; Dividend.high -> HL
; 	CALL	.exx		; Back to low bank
; 	PUSH	BC		; Put outer r.a. back on stack
; 	POP	HL		; Return address
; 	EX	(SP),HL		; Dividend.low -> HL
; 	CALL	.exx
; 	PUSH	BC		; Top return address
; 	RET

; ;	Much the same as lregset, except that on entry the dividend
; ;	is pointed to by HL.
; ;	The pointer is saved in iy for subsequent updating of memory

; iregset:
; 	pop	de		;immediate return address
; 	call	lregset		;returns with hi words selected
; 	push	hl		;save a copy for 'ron
; 	ex	(sp),iy		;get it in iy, saving old iy
; 	ld	h,(iy+3)	;high order byte
; 	ld	l,(iy+2)	;byte 2
; 	exx			;back to low bank
; 	push	hl		;return address
; 	ld	h,(iy+1)	;byte 1
; 	ld	l,(iy+0)	;and LSB
; 	exx			;restore hi words
; 	ret			;now return

; ;	Called with hi words selected, performs division on the absolute
; ;	values of the dividend and divisor. Quotient is positive

; sgndiv:
; 	call	negif		;make dividend positive
; 	exx
; 	ex	de,hl		;put divisor in HL/HL'
; 	exx
; 	ex	de,hl
; 	call	negif		;make divisor positive
; 	ex	de,hl		;restore divisor to DE/DE'
; 	exx
; 	ex	de,hl
; 	exx			;select high words again
; 	jp	divide		;do division

; asaldiv:
; 	call	iregset
; 	call	dosdiv
; store:
; 	ld	(iy+0),e
; 	ld	(iy+1),d
; 	ld	(iy+2),l
; 	ld	(iy+3),h
; 	pop	iy		;restore old iy
; 	ret

; aldiv:
;	call	lregset		;get args

; ;	Called with high words selected, performs signed division by
; ;	the rule that the quotient is negative iff the signs of the dividend
; ;	and divisor differ
; ;	returns quotient in HL/DE

; dosdiv:
; 	ld	a,h
; 	xor	d
; 	ex	af,af'		;sign bit is now sign of quotient
; 	call	sgndiv		;do signed division
; 	ex	af,af'		;get sign flag back
; 	push	bc		;high word
; 	exx
; 	pop	hl
; 	ld	e,c		;low word of quotient
; 	ld	d,b
; 	jp	m,negat		;negate quotient if necessary
; 	ret

; lldiv:	call	lregset

; ;	Called with high words selected, performs unsigned division
; ;	returns with quotient in HL/DE

; doudiv:
; 	call	divide		;unsigned division
; 	push	bc		;high word of quotien
; 	exx
; 	pop	hl
; 	ld	e,c		;low word
; 	ld	d,b
; 	ret

; aslldiv:
; 	call	iregset
; 	call	doudiv
; 	jp	store


; almod:
; 	call	lregset

; ;	Called with high words selected, performs signed modulus - the rule
; ;	is that the sign of the remainder is the sign of the dividend

; dosrem:
; 	ld	a,h		;get sign of dividend
; 	ex	af,af'		;save it
; 	call	sgndiv		;do signed division
; 	push	hl		;high word
; 	exx
; 	pop	de
; 	ex	de,hl		;put high word in hl
; 	ex	af,af'		;get sign bit back
; 	or	a
; 	jp	m,negat		;negate if necessary
; 	ret

; asalmod:
; 	call	iregset
; 	call	dosrem
; 	jp	store

; llmod:
; 	call	lregset

; ;	Called with high words selected, perform unsigned modulus

; dourem:
; 	call	divide
; 	push	hl		;high word of remainder
; 	exx
; 	pop	de
; 	ex	de,hl		;high word in hl
; 	ret

; asllmod:
; 	call	iregset
; 	call	dourem
; 	jp	store

; ;	Negate the long in HL/DE

; negat:	push	hl	;save high word
; 	ld	hl,0
; 	or	a
; 	sbc	hl,de
; 	ex	de,hl
; 	pop	bc		;get high word back
; 	ld	hl,0
; 	sbc	hl,bc
; 	ret		;finito

; negif:	;called with high word in HL, low word in HL'
; 	;returns with positive value

; 	bit	7,h		;check sign
; 	ret	z		;already positive
; 	exx			;select low word
; 	ld	c,l
; 	ld	b,h
; 	ld	hl,0
; 	or	a
; 	sbc	hl,bc
; 	exx
; 	ld	c,l
; 	ld	b,h
; 	ld	hl,0
; 	sbc	hl,bc
; 	ret			;finito

;	Called with dividend in HLHL', divisor in DEBC, high words in
;	selected register set
;	returns with quotient in q3q2q1q0 and DEBC, remainder in HLHL',
;	high words selected


;	Tests on div 62311C9 by 27A3 = 27A3 
;	Initial conversion	- 102096
;	Replaced exx and shift at end	-  90688
;	Shifted loop counter from AF to -  87216
;	 mem, freeing AF
;	Removed need for exx's aroung $1-  81068
;	Changed shift right DEDE' to	-  62708
;	 something simpler
;	Much cleaning and removing of	-  20904
;	 exx's

; From the analysis, S is the most used register.  I'll make S DEBC and
;  Q .q0,.q1,.q2,.q3
;	New time		-  16024
;	Further triming and the quick	-   8548
;	 rotate optimization

;	Algorithim
;	Given dividend A and divisor S, return quotient Q and
;	remainder R such that
;	A	= ( S * Q ) + R
;	HLHL'	is A
;	DEDE'	is S
;	Returns	Q in BCBC'
;		R in HLHL'
;
;	Simplified
;	Init
;	Set	Q=0
;	Set	loops=1
;	Make S bigger than A by rotating
;	If S > A, continue
;	Rotate S right
;	Increase loops
;	If MSB(S)==1, continue
;	 else loop
;	One step of the divide
;	If S > A, then LSB(Q)=0
;	 else
;		LSB(Q)=1
;		Subtract S from A
;	Rotate Q left
;	Rotate S right
;	Decrease loop counter
;	Loop while loop counter>0
;----------------------------------------------------
;	Every time
;	Parts:
;		divide - 
;		Init Q (BCBC')=0
;		Return if S (DEDE')=0
;		Set loops left to 1
;		1$ -
;		Check to see if S is greater than A
;		If yes,
;			Goto 2 with C set
;		If no,
;			Rotate S (DEDE') right
;			Increase the number of loops left
;			If MSB S !=1, goto 1$ (at 3$)
;		2$ -
;		6$ -
;		Subtract S from A
;		If S is less than A, then goto 5$ (C=0)	
;		Else, restore value of A (C=1), goto 5$
;		5$ -
;		Complement the carry flag
;		Rotate BCBC' left, shifting in C
;		Rotate DEDE' right
;		Decrease loop count
;		Loop to 6$ while loop count > 0
;		

divide:
;	rst	0x08
;	.asciz "divide "
	xor	a		; Set quotient to zero
	ld	(.q+0),a
	ld 	(.q+1),a
	ld	(.q+2),a
	ld 	(.q+3),a
 	ld	a,e		;check for zero divisor
 	or	d
	or	c
	or	b
 	ret	z		;return with quotient == 0
 	ld	a,#1		;loop count
	ld 	(.ldivloopcount),a

	; Simple optmisation
	; If H <> 0 and E == 0, then DEBC is at least 8 bits smaller than
	;  HLHL', so do a simple swap instead of rotate
	xor	a		; Is H<>0 ?
	cp	h
	jp	z,3$		; Cant hack
	ld	a,d
	or	e	
	jp	nz,3$		; Cant hack

	ld	d,e		; DE=0 and H!=0
	ld	e,b		; 'Rotate' DEBC 8 to the right
	ld 	b,c
	ld	c,a		; A is zero
	ld	a,#9		; Increase loop counter by 8
	ld	(.ldivloopcount),a

 	jp	3$		;enter loop in middle
1$:
 	or	a		; clear carry
	ld 	a,(.div+0)	; Subtract DEBC from HLHL'
	sub 	c		; to compare them
	ld 	a,(.div+1)	; C=1 - DEBC > HLHL'
	sbc 	b
	ld 	a,l
	sbc 	e
	ld 	a,h
	sbc 	d

 	jr	c,2$		;finished - divisor is big enough
	ld	a,(.ldivloopcount)
 	inc	a		;increment count
	ld	(.ldivloopcount),a

	or	a		;Shift DEBC left
	rl	c
	rl	b
	rl	e
	rl	d
3$:
 	bit	7,d		;test for max divisor
 	jp	z,1$		;loop if msb not set
2$:	; arrive here with shifted divisor, loop count in a, and low words
 	;selected
	
6$:
 	push	hl		;save dividend
	ld	a,(.div+0)
	push	af
	ld	a,(.div+1)
	push 	af

	or	a		;clear carry
	ld 	a,(.div+0)	; Subtract DEBC from HLHL'
	sbc 	c
	ld	(.div+0),a
	ld 	a,(.div+1)
	sbc 	b
	ld	(.div+1),a

	ld	a,l
	sbc	e
	ld	l,a
	ld	a,h
	sbc	d
	ld	h,a

 	jp	nc,4$		; HLHL' is bigger than DEBC
	pop	af
	ld	(.div+1),a
	pop	af
	ld	(.div+0),a
 	pop	hl		;hi word
	scf			; C junked by POP AF
 	jr	5$
4$:
	lda	sp,6(sp)	;unjunk stack
5$:
 	ccf		;complement carry bit
	ld	a,(.q+0)		; Rotate quotient Q left
	rl	a		; Rotate in C flag
	ld	(.q+0),a
	ld 	a,(.q+1)
	rl 	a
	ld	(.q+1),a
	ld	a,(.q+2)
	rl	a
	ld	(.q+2),a
	ld 	a,(.q+3)
	rl 	a
	ld	(.q+3),a

 	srl	d		; Shift divisor right
 	rr	e
	rr	b
	rr	c
	
	ld	a,(.ldivloopcount)
 	dec	a		;decrement loop count
	ld	(.ldivloopcount),a
 	jr	nz,6$

;	Setup the expected return values
;	ld	a,(.q3)
;	ld	d,a
;	ld	a,(.q2)
;	ld	e,a
;	ld	a,(.q1)
;	ld	b,a
;	ld	a,(.q0)
;	ld	c,a
 	ret			;finished
;	Conversion of integer type things to floating. Uses routines out
;	of float.as.

;	psect	text

;	global	altof, lltof, aitof, litof, abtof, lbtof
;	global	fpnorm

lbtof:
	ld	e,a
	ld	d,#0
litof:
	push	hl
	pop	de
;	ex	de,hl		;put arg in de
	ld	l,#0		;zero top byte
b3tof:
	ld	h,#64+24
	jp	fpnorm

abtof:
	ld	e,a
	rla
	sbc	a,a
	ld	d,a

aitof:
	bit	7,h		;negative?
	jp	z,litof		;no, treat as unsigned
	; Negate HL
	xor	a
	sub	l
	ld	l,a
	ld	a,#0
	sbc	h
	ld	h,a
	call	litof
	set	7,h		;set sign flag
	ret

lltof:
	ld	a,h		;anything in top byte?
	or	a
	jr	z,b3tof		;no, just do 3 bytes
	ld	e,d		;shift down 8 bits
	ld	d,l
	ld	l,h
	ld	h,#64+24+8	;the 8 compensates for the shift
	jp	fpnorm		;and normalize it

altof:
	bit	7,h		; negative?
	jr	z,lltof		; no, treat as unsigned
	xor	a		; Negate HLDE
	sub	e
	ld	e,a
	ld	a,#0
	sbc	d
	ld	d,a
	ld	a,#0
	sbc	l
	ld	l,a
	ld	a,#0
	sbc	h
	ld	h,a

	call	lltof
	set	7,h		;set sign flag
	ret

;	ftol - convert floating to long, by using lower bits can also
;	be used to convert from float to int or char

;	psect	text
;	global	ftol
;	global	alrsh, allsh, negmant

ftol:
	bit	7,h		;test sign
	call	nz,negmant	;negate mantissa if required
	ld	a,h		;get exponent
	res	7,a		;mask sign off
	sub	#64+24		;remove offset
	ld	b,a		;save shift count
	ld	a,h		;get exponent, sign
	rla
	sbc	a,a		;sign extend
	ld	h,a		;put back
	bit	7,b		;test sign
;	jp	z,allsh		;shift it left
	ld	a,#0		; Get the count
	sub	b
;	neg			;make +ve
	dec	a		;and reduce it one
	ld	b,a		;put back in b
;	call	nz,alrsh	;shift right
	; add one for rounding
	ld	a,#1
	add	e
	ld	e,a
	ld	a,#0
	add	d
	ld	d,a
;	jp	nc,alrsh	;and shift down one more
	inc	hl		;add in carry first
;	jp	alrsh
; LWORD _fbcd(float x, WORD *exp, char *buf)
;
; Split x into mantissa and decimal exponent parts.
; Return value is the (long) mantissa part, exponent part is
;  stored in *exp as two's complement. Mantissa is stored into buf
;  as an ascii string.

	.NDIG		= 8	; Number of decimal digits

	.globl	.lldiv,.llmod

.hasfrac:
	LD	C,#0x00		; Zero number
	LD	A,E		; Check low 8 bits
	OR	A
	JR	NZ,1$		; Non zero bit in low 8 bits
	LD	C,#8		; Bump count
	LD	A,D		; Check next 8 bits
	OR	A		; Is there a bit there?
	JR	NZ,1$		; Yup
	LD	C,#16
	LD	A,H		; Now check next 8 bits
1$:
	RRA			; Shift bottom bit out
	JR	C,2$		; Found a bit!
	INC	C		; Increment count
	JR	1$		; And loop

2$:
	LD	A,H		; Get exponent
	RES	7,A		; Clear sign bit - should be zero anyway
	SUB	#64+24		; Normalize - remove bias
	ADD	A,C		; Add in bit position
	RET			; Return with value in a and flags set

	.area	_BSS

.fexp:
	.ds	0x01		; Floating exponent temporary
.fsgn:
	.ds	0x01		; Floating sign temporary

	.area	_DATA

.ftenth:
        ;; 0.1
	.db	0xcc
	.db	0xcc
	.db	0xcc
	.db	0x3d
.ften:
	;; 10.0
	.db	0x0
	.db	0x0
	.db	0xa0
	.db	0x44

	.area	_CODE

__fbcd::
	PUSH	BC

	LDA	HL,9(SP)	; Skip return address and registers
	LD	B,(HL)		; BC = exp
	DEC	HL
	LD	C,(HL)
	LDA	HL,4(SP)
	LD	E,(HL)		; HLDE = x
	INC	HL
	LD	D,(HL)
	INC	HL
	LD	A,(HL+)
	LD	L,(HL)
	LD	H,A
	XOR	A
	LD	(.fexp),A	; Zero it
	LD	(.fsgn),A
	LD	(BC),A		; And the returned exp value
	LD	A,H		; Check for zero exponent
	AND	#0x7F		; Zero exponent means 0.0
	JP	NZ,1$		; Return if x == 0.0
	LD	L,A		; Zero mantissa just in case
	LD	E,A
	LD	D,A
	LD	H,A		; And sign/exponent
	JP	.sbcd		; Return with mantissa = 0, exponent = 0
1$:
	RES	7,H		; Test mantissa sign
2$:
	CALL	.hasfrac	; Any fractional part?
	BIT	7,A
	JP	NZ,3$		; Negative if there is fractional part
	PUSH	HL		; Put x on stack
	PUSH	DE
	LD	A,(.ftenth+3)
	LD	H,A
	LD	A,(.ftenth+2)
	LD	L,A
	LD	A,(.ftenth+1)
	LD	D,A
	LD	A,(.ftenth)
	LD	E,A
	CALL	.fmul32		; Returns with value in HLDE
	LD	A,(.fexp)
	INC	A		; Increment exponent
	LD	(.fexp),A
	JR	2$		; Now check again
3$:
	PUSH	HL
	PUSH	DE		; Pass x as argument
	LD	A,(.ften+3)
	LD	H,A
	LD	A,(.ften+2)
	LD	L,A
	LD	A,(.ften+1)
	LD	D,A
	LD	A,(.ften)
	LD	E,A
	CALL	.fmul32		; Multiply it
	LD	A,(.fexp)
	DEC	A		; And decrement exponent
	LD	(.fexp),A
	CALL	.hasfrac	; Check for fractional part
	BIT	7,A
	JP	NZ,3$		; Loop if still fractional
	LD	A,H		; Get exponent
	LD	H,#0x00		; Zero top byte
	SUB	#64+24		; Offset exponent
4$:
	OR	A		; Check for zero
	JR	Z,6$		; Return if finished
	BIT	7,A
	JP	Z,5$
	SRL	L		; Shift L down
	RR	D		; Rotate the rest
	RR	E
	INC	A		; Increment count
	JR	4$
5$:
	SLA	E
	RL	D
	RL	L
	RL	H
	DEC	A
	JR	4$
6$:
	LD	A,(.fexp)
	PUSH	HL
	LD	B,(HL)		; BC = exp
	DEC	HL
	LD	C,(HL)
	POP	HL
	LD	(BC),A		; Store exponent
	INC	BC
	RLA
	SBC	A
	LD	(BC),A		; Sign extend it
	LD	A,(.fsgn)
	BIT	0,A		; Test sign
	JP	Z,.sbcd		; Return if no negation needed
	XOR	A		; Negate low word
	SUB	E
	LD	E,A
	LD	A,#0x00
	SBC	D
	LD	D,A
	LD	A,#0x00		; Negate the hi word
	SBC	L
	LD	L,A
	LD	A,#0x00
	SBC	H
	LD	H,A

.sbcd:				; Now store as ascii
	PUSH	HL
	PUSH	DE		; Save return value
	PUSH	HL
	LDA	HL,11(SP)
	LD	B,(HL)		; BC = buf
	DEC	HL
	LD	C,(HL)
	LD	HL,#.NDIG
	ADD	HL,BC		; Point to end of buffer
	LD	(HL),#0x00	; Null terminate
	LD	B,H		; BC = pointer
	LD	C,L
	POP	HL
	LD	A,#.NDIG
1$:
	PUSH	AF		; Save count
	PUSH	BC		; Save pointer
	PUSH	HL		; Save value
	PUSH	DE
	LD	BC,#0x0000
	PUSH	BC		; Pass 10 on stack
	LD	BC,#0x000A
	PUSH	BC
	CALL	.llmod
	LD	A,E		; Get remainder
	ADD	A,#'0		; Asciize
	POP	DE
	POP	HL		; Restore value
	POP	BC		; Restore pointer
	DEC	BC
	LD	(BC),A
	PUSH	BC		; Save pointer
	LD	BC,#0x0000	; Now divide by 10
	PUSH	BC
	LD	BC,#0x000A
	PUSH	BC
	CALL	.lldiv
	POP	BC		; Restore pointer
	POP	AF		; Restore count
	DEC	A
	JR	NZ,1$		; Loop if more to do
	POP	DE		; Restore return value
	POP	HL

	POP	BC
	RET			; All done