File: columngrammar.py

package info (click to toggle)
gavodachs 2.3%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,260 kB
  • sloc: python: 58,359; xml: 8,882; javascript: 3,453; ansic: 661; sh: 158; makefile: 22
file content (161 lines) | stat: -rw-r--r-- 5,186 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
A grammar that just splits the source into input lines and then
lets you name character ranges.
"""

#c Copyright 2008-2020, the GAVO project
#c
#c This program is free software, covered by the GNU GPL.  See the
#c COPYING file in the source distribution.



from gavo import base
from gavo import utils
from gavo.grammars.common import Grammar, FileRowIterator, FileRowAttributes
from gavo.utils import parsetricks


class SplitLineIterator(FileRowIterator):
	def __init__(self, grammar, sourceToken, **kwargs):
		FileRowIterator.__init__(self, grammar, sourceToken, **kwargs)
		for i in range(self.grammar.topIgnoredLines):
			self.inputFile.readline()
		self.lineNo = self.grammar.topIgnoredLines

	def _iterRows(self):
		while True:
			self.lineNo += 1
			inputLine = self.inputFile.readline()
			if not inputLine:
				break

			if (self.grammar.commentIntroducer is not base.NotGiven
					and inputLine.startswith(self.grammar.commentIntroducer)):
				continue

			res = self._parse(inputLine)
			yield res
			self.recNo += 1

		self.inputFile.close()
		self.grammar = None
	
	def _parse(self, inputLine):
		res = {}
		try:
			for key, slice in self.grammar.colRanges.items():
				if self.grammar.strip:
					res[key] = inputLine[slice].strip()
				else:
					res[key] = inputLine[slice]
		except IndexError:
			raise base.ui.logOldExc(base.SourceParseError("Short line", inputLine, 
				self.getLocator(), self.sourceToken))
		return res

	def getLocator(self):
		return "line %d"%self.lineNo


class ColRangeAttribute(base.UnicodeAttribute):
	"""A range of indices.

	Ranges can be specified as either <int1>-<int2>, just <int>
	(which is equivalent to <int>-<int>), or as half-open ranges 
	(<int>- or -<int>) Ranges are, contrary to
	python slices, inclusive on both sides, and start counting
	from one.
	"""
	def parse(self, value):
		if isinstance(value, slice):
			#	we're already parsed
			return value

		try:
			if "-" in value:
				startLit, endLit = value.split("-")
				start, end = None, None
				if startLit.strip():
					start = int(startLit)-1
				if endLit.strip():
					end = int(endLit)
				return slice(start, end)
			else:
				col = int(value)
				return slice(col-1, col)
		except ValueError:
			raise base.ui.logOldExc(
				base.LiteralParseError("colRanges", value, hint="A column range,"
				" (either int1-int2 or just an int) is expected here."))


class ColumnGrammar(Grammar, FileRowAttributes):
	"""A grammar that builds rowdicts out of character index ranges.

	This works by using the colRanges attribute like <col key="mag">12-16</col>,
	which will take the characters 12 through 16 inclusive from each input
	line to build the input column mag.

	As a shortcut, you can also use the colDefs attribute; it contains
	a string of the form {<key>:<range>}, i.e.,
	a whitespace-separated list of colon-separated items of key and range
	as accepted by cols, e.g.::
		
		<colDefs>
			a: 3-4
			_u: 7
		</colDefs>
	"""
	name_ = "columnGrammar"

	_til = base.IntAttribute("topIgnoredLines", default=0, description=
		"Skip this many lines at the top of each source file.",
		copyable=True)
	_cols = base.DictAttribute("colRanges", description="Mapping of"
		" source keys to column ranges.", itemAttD=ColRangeAttribute("col"),
		copyable=True)
	_colDefs = base.ActionAttribute("colDefs", description="Shortcut"
		" way of defining cols", methodName="_parseColDefs")
	_commentIntroducer = base.UnicodeAttribute("commentIntroducer",
		default=base.NotGiven, description="A character sequence"
		" that, when found at the beginning of a line makes this line"
		" ignored", copyable=True)
	_strip = base.BooleanAttribute("strip", default=True,
		description="Strip all parsed strings?", copyable=True)

	def _getColDefGrammar(self):
		with parsetricks.pyparsingWhitechars("\n\t\r "):
			intLiteral = parsetricks.Word(parsetricks.nums)
			# need to manually swallow whitespace after literals
			blindWhite = parsetricks.Suppress(parsetricks.Optional(parsetricks.White()))
			dash = blindWhite + parsetricks.Literal("-") + blindWhite

			range = parsetricks.Combine(
				dash + blindWhite + intLiteral
				| intLiteral + parsetricks.Optional(dash + parsetricks.Optional(intLiteral)))
			range.setName("Column range")

			identifier = parsetricks.Regex(utils.identifierPattern.pattern[:-1])
			identifier.setName("Column key")

			clause = (identifier + parsetricks.Literal(":") + blindWhite + range
				).addParseAction(lambda s,p,t: (t[0], t[2]))
			colDefs = parsetricks.ZeroOrMore(clause)+parsetricks.StringEnd()
			# range.setDebug(True);identifier.setDebug(True);clause.setDebug(True)
			return colDefs

	def _parseColDefs(self, ctx):
		# the handler for colDefs -- parse shortcut colDefs
		try:
			for key, range in utils.pyparseString(self._getColDefGrammar(), 
					self.colDefs):
				self.colRanges[key] = self._cols.itemAttD.parse(range)
		except parsetricks.ParseException as ex:
			raise base.LiteralParseError("colDefs", self.colDefs,
				hint="colDefs is a whitespace-separated list of key:range pairs."
				" Your literal doesn't look like this, and here's what the"
				" parser had to complain: %s"%ex)

		
	rowIterator = SplitLineIterator