File: parse.py

package info (click to toggle)
magma 2.9.0%2Bds-2
  • links: PTS, VCS
  • area: contrib
  • in suites: forky, sid, trixie
  • size: 83,212 kB
  • sloc: cpp: 709,115; fortran: 121,916; ansic: 32,343; python: 25,603; f90: 15,208; makefile: 942; xml: 253; csh: 232; sh: 203; perl: 104
file content (202 lines) | stat: -rwxr-xr-x 4,852 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env python
#
# Parses MAGMA output and generates a python file,
# storing each run into its own numpy array.
#
# @author Mark Gates

import sys
import os
import re
import numpy


# --------------------
class data_t( object ):
	def __init__( self ):
		self.reset()
	
	def reset( self ):
		self.cmd  = []
		self.name = ''
		self.name_usage = ''
		self.rows = []
#end


# --------------------
# output one run as numpy array
def output( data ):
	if ( not data.rows ):
		return
	
	if ( not data.name ):
		data.name = data.name_usage
	if ( not data.name ):
		data.name = 'unknown'
	
	# find maximum width of each column and make printf format
	n = len( data.rows[0] )
	maxwidths = [0] * n
	for row in data.rows:
		if ( len(row) != n ):
			print '# error: row has', len(row), 'fields; first row had', n, 'fields'
			continue
		widths = map( len, row )
		for i in xrange( n ):
			maxwidths[i] = max( maxwidths[i], widths[i] )
	formats = map( lambda x: '%%%ds' % x, maxwidths )
	format = '\t[ ' + ',  '.join( formats ) + ' ],'
	
	# output table
	for cmd in data.cmd:
		print '#', cmd
	print data.name, '= array(['
	for row in data.rows:
		#print format, row
		try:
			print format % tuple(row)
		except:
			print '# ERROR', format, row
	print '])\n'
	
	data.reset()
# end


# --------------------
# process one file
def process( filename ):
	warmup = 0
	
	print '# ------------------------------------------------------------'
	print '# file:', filename
	
	data   = data_t()
	keys   = []
	tables = {}
	
	infile = open( filename )
	for line in infile:
		# look for header line
		m = re.search( r'^(?:numactl.*)?testing_(\w+)', line )
		if ( m ):
			name = m.group(1)
			m2 = re.search( r'-([LU])\b',       line )  # lower/upper
			if ( m2 ): name += '_' + m2.group(1)
			m2 = re.search( r'-([UV][ASON])\b', line )  # svd U & V vectors
			if ( m2 ): name += '_' + m2.group(1)
			m2 = re.search( r'-([JRL][NV])\b',  line )  # syev job vectors, geev right & left vectors
			if ( m2 ): name += '_' + m2.group(1)
			
			# code repeated below
			if ( name in keys ):
				data = tables[name]
			else:
				data = data_t()
				data.name = name
				keys.append( name )
				tables[name] = data
			# end
			
			data.cmd.append( line.strip() )
			warmup = 2
			continue
		# end
		
		# look for usage line (in case no header line)
		m = re.search( r'Usage: ./testing_(\w+)', line )
		if ( m ):
			name = m.group(1)
			if ( data.rows ):
				# new table with no header
				# code repeated above
				if ( name in keys ):
					data = tables[name]
				else:
					data = data_t()
					data.name = name
					keys.append( name )
					tables[name] = data
				# end
				
				data.cmd.append( line.strip() )
				warmup = 2
				continue
			else:
				# table had header
				data.name_usage = name
		# end
		
		# look for data lines
		# differentiating data lines from other output is not so easy.
		# look for lines containing numbers and excluding certain punctuation
		m  = re.search( r'\b\d+\.\d+\b', line )
		m2 = re.search( r'[%#:=/,]', line )
		if ( m and not m2 ):
			# remove () parens
			# convert --- and words (usually options like "S") to nan
			line2 = re.sub( r'[()]',          ' ',     line  )
			line2 = re.sub( r'\b[a-zA-Z]+\b', ' nan ', line2 )
			line2 = re.sub( r'\s---\s',       ' nan ', line2 )
			line2 = line2.strip()
			
			# gesvd has two job columns, usually the same, while gesdd has
			# only one job column. This eliminates 2nd job column for gesvd.
			if ( re.search( 'gesvd', data.name )):
				line2 = re.sub( r'^( *nan) +nan', r'\1', line2 )
			
			fields = re.split( ' +', line2 )
			
			# verify that everything is numeric
			try:
				map( float, fields )
			except:
				print >>sys.stderr, 'ignoring:', line.strip()  #, '\n>       ', line2
				continue
			
			# skip warmup runs (N = 123, 1234 in first two runs)
			if ( warmup > 0 ):
				warmup -= 1
				m = re.search( r'^ *([a-zA-Z]+ +)*(1000?|1234?)\b', line )
				if ( m ):
					continue
			
			# for gesvd, skip second field, jobv
			# this makes it match gesdd, which has only job, not jobu and jobv
			if ( data.name[1:] == 'gesvd' ):
				fields = fields[0:1] + fields[2:]
			
			data.rows.append( fields )
		# end
	# end
	for key in keys:
		output( tables[key] )
# end


# --------------------
print 'import numpy'
print 'from numpy import array, nan, inf'
print

if ( len(sys.argv) > 1 ):
	m = re.search( 'v?(\d+\.\d+\.\d+|trunk)/cuda(\d+\.\d+)-(.*)/', sys.argv[1] )
	if ( m ):
		print "version = '%s'" % (m.group(1))
		print "cuda    = '%s'" % (m.group(2))
		print "device  = '%s'" % (m.group(3))
		print "cpu     = 'unknown'"
		print
	else:
		print "version = 'unknown'"
		print "cuda    = 'unknown'"
		print "device  = 'unknown'"
		print "cpu     = 'unknown'"
		print
		print >>sys.stderr, "\nWarning: no version information\n"
	# end
# end

for f in sys.argv[1:]:
	process( f )