File: findliterals.py

package info (click to toggle)
simpleparse 2.1.0a1-6
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd, wheezy
  • size: 2,776 kB
  • ctags: 4,332
  • sloc: python: 7,036; ansic: 6,395; makefile: 22
file content (48 lines) | stat: -rwxr-xr-x 1,315 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os, string
from simpleparse.parser import Parser

declaration = r'''
myfile := (notliteral,literal)+, notliteral

# not-a-literal, not reported, repeating
<notliteral> := -literal*

literal             :=  ("'",(CHARNOSNGLQUOTE/ESCAPEDCHAR)*,"'")  /  ('"',(CHARNODBLQUOTE/ESCAPEDCHAR)*,'"')

CHARNOSNGLQUOTE     :=  -[\\']+
CHARNODBLQUOTE      :=  -[\\"]+
ESCAPEDCHAR         :=  '\\',( SPECIALESCAPEDCHAR / OCTALESCAPEDCHAR )
SPECIALESCAPEDCHAR  :=  [\\abfnrtv]
OCTALESCAPEDCHAR    :=  [0-7],[0-7]?,[0-7]?
'''
parser = Parser( declaration, "myfile" )

def bigtest( file, parser = parser  ):
	val = parser.parse( file)
	print 'parsed %s characters of %s characters' % (val[-1], len(file))
	return val

def test():
	bigtest( ''' "this" "that" "them" ''' )
	bigtest( ''' "this" 'that' "th'em" ''' )
	

usage =''' findliterals filename
Finds all single and double-quoted literals in a file and prints them to stdout.
Is not triple-quoted string aware.'''

if __name__ == '__main__':
	test()
	import sys
	if sys.argv[1:]:
		import time
		filename = sys.argv[1]
		file = open( filename ).read()
		t = time.time()
		val = bigtest( file )
		t = t-time.time()
		print '''Parsing Time:''', t
		for report, start, stop, children in val[1]:
			print string.split(file[ start: stop ], '\n')[0][:75]
	else:
		print usage