File: lexTests.py

package info (click to toggle)
codequery 0.21.1%2Bdfsg1-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 11,012 kB
  • sloc: cpp: 114,603; xml: 16,576; python: 6,512; ansic: 3,262; perl: 444; makefile: 324; sql: 160; sh: 43; objc: 19; ruby: 6; php: 1
file content (179 lines) | stat: -rw-r--r-- 6,371 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# -*- coding: utf-8 -*-
# Requires Python 2.7 or later

import io, os, sys, unittest

if sys.platform == "win32":
	import XiteWin as Xite
else:
	import XiteQt as Xite

keywordsHTML = [
b"b body content head href html link meta "
	b"name rel script strong title type xmlns",
b"function",
b"sub"
]

keywordsPerl = [
b"NULL __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD "
b"BEGIN CORE DESTROY END EQ GE GT INIT LE LT NE CHECK abs accept "
b"alarm and atan2 bind binmode bless caller chdir chmod chomp chop "
b"chown chr chroot close closedir cmp connect continue cos crypt "
b"dbmclose dbmopen defined delete die do dump each else elsif endgrent "
b"endhostent endnetent endprotoent endpwent endservent eof eq eval "
b"exec exists exit exp fcntl fileno flock for foreach fork format "
b"formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname "
b"gethostent getlogin getnetbyaddr getnetbyname getnetent getpeername "
b"getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent "
b"getpwent getpwnam getpwuid getservbyname getservbyport getservent "
b"getsockname getsockopt glob gmtime goto grep gt hex if index "
b"int ioctl join keys kill last lc lcfirst le length link listen "
b"local localtime lock log lstat lt map mkdir msgctl msgget msgrcv "
b"msgsnd my ne next no not oct open opendir or ord our pack package "
b"pipe pop pos print printf prototype push quotemeta qu "
b"rand read readdir readline readlink readpipe recv redo "
b"ref rename require reset return reverse rewinddir rindex rmdir "
b"scalar seek seekdir select semctl semget semop send setgrent "
b"sethostent setnetent setpgrp setpriority setprotoent setpwent "
b"setservent setsockopt shift shmctl shmget shmread shmwrite shutdown "
b"sin sleep socket socketpair sort splice split sprintf sqrt srand "
b"stat study sub substr symlink syscall sysopen sysread sysseek "
b"system syswrite tell telldir tie tied time times truncate "
b"uc ucfirst umask undef unless unlink unpack unshift untie until "
b"use utime values vec wait waitpid wantarray warn while write "
b"xor "
b"given when default break say state UNITCHECK __SUB__ fc"
]

class TestLexers(unittest.TestCase):

	def setUp(self):
		self.xite = Xite.xiteFrame
		self.ed = self.xite.ed
		self.ed.ClearAll()
		self.ed.EmptyUndoBuffer()

	def AsStyled(self, withWindowsLineEnds):
		text = self.ed.Contents()
		data = io.BytesIO()
		prevStyle = -1
		for o in range(self.ed.Length):
			styleNow = self.ed.GetStyleAt(o)
			if styleNow != prevStyle:
				styleBuf = "{%0d}" % styleNow
				data.write(styleBuf.encode('utf-8'))
				prevStyle = styleNow
			data.write(text[o:o+1])
		if withWindowsLineEnds:
			return data.getvalue().replace(b"\n", b"\r\n")
		else:
			return data.getvalue()

	def LexExample(self, name, lexerName, keywords, fileMode="b"):
		self.ed.ClearAll()
		self.ed.EmptyUndoBuffer()
		self.ed.SetCodePage(65001)
		self.ed.LexerLanguage = lexerName
		bits = self.ed.StyleBitsNeeded
		mask = 2 << bits - 1
		self.ed.StyleBits = bits
		for i in range(len(keywords)):
			self.ed.SetKeyWords(i, keywords[i])

		nameExample = os.path.join("examples", name)
		namePrevious = nameExample +".styled"
		nameNew = nameExample +".new"
		with open(nameExample, "rb") as f:
			prog = f.read()
		if fileMode == "t" and sys.platform == "win32":
			prog = prog.replace(b"\r\n", b"\n")
		BOM = b"\xEF\xBB\xBF"
		if prog.startswith(BOM):
			prog = prog[len(BOM):]
		lenDocument = len(prog)
		self.ed.AddText(lenDocument, prog)
		self.ed.Colourise(0, lenDocument)
		self.assertEquals(self.ed.EndStyled, lenDocument)
		try:
			with open(namePrevious, "rb") as f:
				prevStyled = f.read()
			if fileMode == "t" and sys.platform == "win32":
				prog = prog.replace(b"\r\n", b"\n")
		except EnvironmentError:
			prevStyled = ""
		progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32")
		if progStyled != prevStyled:
			with open(nameNew, "wb") as f:
				f.write(progStyled)
			print("Incorrect lex for " + name)
			print(progStyled)
			print(prevStyled)
			self.assertEquals(progStyled, prevStyled)
			# The whole file doesn't parse like it did before so don't try line by line
			# as that is likely to fail many times.
			return

		if fileMode == "b":	# "t" files are large and this is a quadratic check
			# Try partial lexes from the start of every line which should all be identical.
			for line in range(self.ed.LineCount):
				lineStart = self.ed.PositionFromLine(line)
				self.ed.StartStyling(lineStart, mask)
				self.assertEquals(self.ed.EndStyled, lineStart)
				self.ed.Colourise(lineStart, lenDocument)
				progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32")
				if progStyled != prevStyled:
					print("Incorrect partial lex for " + name + " at line " + line)
					with open(nameNew, "wb") as f:
						f.write(progStyled)
					self.assertEquals(progStyled, prevStyled)
					# Give up after one failure
					return

	# Test lexing just once from beginning to end in text form.
	# This is used for test cases that are too long to be exhaustively tested by lines and
	# may be sensitive to line ends so are tested as if using Unix LF line ends.
	def LexLongCase(self, name, lexerName, keywords, fileMode="b"):
		self.LexExample(name, lexerName, keywords, "t")

	def testCXX(self):
		self.LexExample("x.cxx", b"cpp", [b"int"])

	def testPython(self):
		self.LexExample("x.py", b"python",
			[b"class def else for if import in print return while"])

	def testHTML(self):
		self.LexExample("x.html", b"hypertext", keywordsHTML)

	def testASP(self):
		self.LexExample("x.asp", b"hypertext", keywordsHTML)

	def testPHP(self):
		self.LexExample("x.php", b"hypertext", keywordsHTML)

	def testVB(self):
		self.LexExample("x.vb", b"vb", [b"as dim or string"])

	def testLua(self):
		self.LexExample("x.lua", b"lua", [b"function end"])

	def testRuby(self):
		self.LexExample("x.rb", b"ruby", [b"class def end"])

	def testPerl(self):
		self.LexExample("x.pl", b"perl", keywordsPerl)

	def testPerl52(self):
		self.LexLongCase("perl-test-5220delta.pl", b"perl", keywordsPerl)

	def testPerlPrototypes(self):
		self.LexLongCase("perl-test-sub-prototypes.pl", b"perl", keywordsPerl)

	def testD(self):
		self.LexExample("x.d", b"d",
			[b"keyword1", b"keyword2", b"", b"keyword4", b"keyword5",
			b"keyword6", b"keyword7"])

if __name__ == '__main__':
	Xite.main("lexTests")