File: burnMMX.S

package info (click to toggle)
cpuburn 1.4a-1
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 320 kB
  • ctags: 91
  • sloc: asm: 644; sh: 117; makefile: 55
file content (161 lines) | stat: -rw-r--r-- 3,173 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#  cpuburn-1.4:	 burnMMX   Chipset/DRAM Loading Utility
#  Copyright 2000  Robert J. Redelmeier.  All Right Reserved
#  Licensed under GNU General Public Licence 2.0.  No warrantee.
#  *** USE AT YOUR OWN RISK ***
 
.text
#ifdef WINDOWS
.globl _main
_main:
	movl	4(%esp),%eax
	movl	$6, %ecx	# default f = 64 kB
	subl	$1, %eax	# is a param given? 
	jz	no_size

	movl	8(%esp),%eax	# address of strings
	movl	4(%eax),%eax	# address of first paramater
	movzb	(%eax),%ecx	# first parameter - a byte
no_size:
	subl	$12, %esp	# stack space
#else
.globl _start
_start:
	subl	$12, %esp
	movl	20(%esp), %eax
	movl	$6, %ecx	# default f = 64 kB
	testl	%eax, %eax	# is a param given? 
	jz	no_size
	movl	(%eax), %ecx
no_size:
#endif			     
	emms
	movq	rt, %mm0
	decl	%ecx		
	andl	$15, %ecx	# mask off ASCII bits
	movl	$256, %eax
	shll	%cl, %eax
	movl	%eax, 4(%esp)	# save blocksize
	movl	$256*1024, %eax
	shrl	%cl, %eax
	movl	%eax, 8(%esp)	# save count blks / 512 MB

	movl	4(%esp), %ecx	# initial fill of 2 cachelines
	shrl	$4, %ecx
	movl	$buffer, %edi
	xorl	%eax, %eax
	notl	%eax
more:
	movl	%eax, %edx	# qwords F-F-0-F , F-0-F-0 
	notl	%edx
	movl	%eax,  0(%edi)
	movl	%eax,  4(%edi)
	movl	%eax,  8(%edi)
	movl	%eax, 12(%edi)
	movl	%edx, 16(%edi)
	movl	%edx, 20(%edi)
	movl	%eax, 24(%edi)
	movl	%eax, 28(%edi)

	movl	%eax, 32(%edi)
	movl	%eax, 36(%edi)
	movl	%edx, 40(%edi)
	movl	%edx, 44(%edi)
	movl	%eax, 48(%edi)
	movl	%eax, 52(%edi)
	movl	%edx, 56(%edi)
	movl	%edx, 60(%edi)
	rcll	$1, %eax	# walking zero, 33 cycle
	leal	64(%edi), %edi	# odd inst to preserve CF
	decl	%ecx
	jnz	more

thrash:				# OUTER LOOP
	movl	8(%esp), %edx	#   reset count for 512 MB
mov_again:			
	movq	%mm0, %mm1
	movq	%mm0, %mm2
	movl	$buffer, %esi
	movl	$buf2, %edi
	movl	4(%esp), %ecx
	shll	$2, %ecx	# move block up
	addl	%ecx, %esi
	addl	%ecx, %edi
	negl	%ecx
.align 16, 0x90
0:				    # WORKLOOP 7 uops/ 3 clks in L1
	movq	0(%esi,%ecx),%mm7
	pmaddwd %mm0, %mm1
	pmaddwd %mm0, %mm2
	movq	%mm7, 0(%edi,%ecx)
	addl	$8, %ecx
	jnz	0b

	movl	$buffer + 32, %edi	# move block back
	movl	$buf2, %esi		# shifting by
	movl	4(%esp), %ecx		# one cacheline
	subl	$8, %ecx
	shll	$2, %ecx
	addl	%ecx, %esi
	addl	%ecx, %edi
	negl	%ecx
.align 16, 0x90
0:				   # second workloop
	movq	0(%esi,%ecx),%mm7
	pmaddwd %mm0, %mm1
	pmaddwd %mm0, %mm2
	movq	%mm7, 0(%edi,%ecx)
	addl	$8, %ecx
	jnz	0b

	movl	$buffer, %edi
	movsl			# replace last c line
	movsl
	movsl
	movsl
	movsl
	movsl
	movsl
	movsl
	decl	%edx		# do again for 512 MB.
	jnz	mov_again

	xorl	%ebx ,%ebx	# DATA CHECK
	decl	%ebx
	pcmpeqd %mm2, %mm1
	psrlq	$16, %mm1
	movd	%mm1, %eax
	incl	%eax
	jnz	error		# MMX calcs OK?

	decl	%ebx
	subl	$32, %edi	
	xorl	%ecx, %ecx
test:				# Check data (NOT optimized)
	mov	0(%edi,%ecx,4), %eax
	cmp	%eax, 4(%edi,%ecx,4)
	jnz	error
	incl	%ecx
	incl	%ecx
	cmpl	4(%esp), %ecx
	jc	test
	jmp	thrash

error:				# error abend
	emms
	movl	$1, %eax
#ifdef WINDOWS
	addl $12, %esp		# deallocate stack
	ret
#else
	push	%ebx
	push	%eax
	int	$0x80
#endif
rt:	.long	0x7fffffff, 0x7fffffff

.bss				# Data allocation
.align 32
.lcomm	buffer,	 32 <<20	# reduce both to 8 <<20 for only
.lcomm	buf2,	 32 <<20	# 16 MB virtual memory available

#