1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
|
"""
SIL, the Simple Instance Language, is an attempt to allow
data model instances written in a simple, JSON-like language.
"""
#c Copyright 2008-2020, the GAVO project
#c
#c This program is free software, covered by the GNU GPL. See the
#c COPYING file in the source distribution.
import re
from gavo import utils
from gavo.dm import common
# sentinels for further processing
class Atom(str):
"""a sentinel class for atomic values of roles
"""
noQuotesOkRE = re.compile("[\w_.]+$")
def asSIL(self):
if self.noQuotesOkRE.match(self):
return str(self)
else:
return '"%s"'%(self.replace('"', '""'))
def __repr__(self):
return "a"+str.__repr__(self).lstrip("u")
class Reference(str):
"""a sentinel class for roles referencing something else.
"""
def asSIL(self):
return "@%s"%self
# parse methods, used by getGrammar, by nonterminal name there
def _pa_attributeDef(s, p, toks):
return ("attr", toks[0], toks[2])
def _pa_typeAnnotation(s, p, toks):
return toks[1]
def _pa_collection(s, p, toks):
if len(toks)==1:
# no explicit type annotation; we return None as type.
return ("coll", None, toks[0])
else:
return ("coll", toks[0], toks[1])
def _pa_obj(s, p, toks):
if len(toks)==2:
# with type annotation
return ("obj", toks[0], toks[1][2])
else:
# no type annotation; we should later add an annotation based on
# the default for the DM
return ("obj", None, toks[0][2])
def _pa_objectBody(s, p, toks):
return ("uobj", None, toks[1].asList())
def _pa_sequenceBody(s, p, toks):
return [toks[1].asList()]
def _pa_reference(s, p, toks):
return Reference(toks[1])
def _pa_simpleImmediate(s, p, toks):
return Atom(toks[0])
def _pa_nullLiteral(s, p, toks):
return [None]
class getGrammar(utils.CachedResource):
"""returns a grammar for parsing a SIL object description.
"""
@classmethod
def impl(cls):
from gavo.utils.parsetricks import (Word, Literal, alphas, alphanums,
QuotedString, Forward, ZeroOrMore, Group, Optional, cStyleComment,
pyparsingWhitechars)
with pyparsingWhitechars("\t\n\r "):
qualifiedIdentifier = Word(alphas+"_:", alphanums+"-._:")
plainIdentifier = Word(alphas+"_", alphanums+"-._")
externalIdentifier = Word(alphas+"_", alphanums+"._/#-")
plainLiteral = Word(alphanums+"_-.")
quotedLiteral = QuotedString(quoteChar='"', escQuote='""')
nullLiteral = Literal("__NULL__")
reference = (Literal('@') + externalIdentifier)
complexImmediate = Forward()
simpleImmediate = plainLiteral | quotedLiteral
value = (nullLiteral | reference | complexImmediate | simpleImmediate)
attributeDef = (plainIdentifier
+ Literal(":")
+ value)
typeAnnotation = (Literal('(')
+ qualifiedIdentifier
+ Literal(')'))
objectBody = (Literal('{')
+ Group(ZeroOrMore( attributeDef ))
+ Literal('}'))
obj = Optional(typeAnnotation) + objectBody
sequenceBody = (Literal('[')
+ Group(ZeroOrMore(value | objectBody))
+ Literal(']'))
collection = Optional(typeAnnotation) + sequenceBody
complexImmediate << ( obj | collection )
for sym in [complexImmediate, collection, sequenceBody,
objectBody, typeAnnotation, attributeDef]:
sym.ignore(cStyleComment)
for n, func in globals().items():
if n.startswith("_pa_"):
locals()[n[4:]].setParseAction(func)
cls.symbols = locals()
return obj
@classmethod
def enableDebuggingOutput(cls):
"""(not user-servicable)
"""
from gavo.utils.parsetricks import ParserElement
for name, sym in cls.symbols.items():
if isinstance(sym, ParserElement):
sym.setDebug(True)
sym.setName(name)
def _iterAttrs(node, seqType, roleName):
"""generates parse events for nodes with attribute children.
(see _parseTreeToEvents).
"""
for child in node[2]:
assert child[0]=='attr'
if isinstance(child[2], (Reference, Atom)):
yield ('attr', child[1], child[2])
elif isinstance(child[2], tuple):
for grandchild in _parseTreeToEvents(child[2], roleName=child[1]):
yield grandchild
elif child[2] is None:
# swallow __NULL__ attributes
pass
else:
assert False, "Bad object as parsed value: %s"%repr(child[2])
def _iterObjs(node, seqType, roleName):
for child in node[2]:
if isinstance(child, (Reference, Atom)):
yield ('item', child, None)
else:
# complex child -- yield events
for grandchild in _parseTreeToEvents(child, seqType=seqType,
roleName=roleName):
yield grandchild
_PARSER_EVENT_MAPPING = {
# -> (iterparse ev name, type source, child parser)
'obj': ('obj', 'fromNode', _iterAttrs),
'uobj': ('obj', 'seqType', _iterAttrs),
'coll': ('coll', 'fromNode', _iterObjs)
}
def _parseTreeToEvents(node, seqType=None, roleName=None):
"""helps iterparse by interpreting the parser events in evStream.
"""
opener, typeSource, childParser = _PARSER_EVENT_MAPPING[node[0]]
if typeSource=='fromNode':
nodeType = node[1]
elif typeSource=='seqType':
nodeType = seqType
else:
assert False
yield (opener, roleName, nodeType)
for child in childParser(node, nodeType, roleName):
yield child
yield ('pop', None, None)
def iterparse(silLiteral):
"""yields parse events for a SIL literal in a string.
The parse events are triples of one of the forms:
* ('attr', roleName, value) add an attribute to the current annotation
* ('obj', roleName, type) create a new object object of type
* ('coll', type, None) create a new collection annotation (type can be None)
* ('item', val, None) add an atomic value to the current collection
* ('pop', None, None) finish current annotation and add it to its container
"""
root = getGrammar().parseString(silLiteral, parseAll=True)[0]
return _parseTreeToEvents(root)
def getAnnotation(silLiteral, annotationFactory):
"""returns an annotation object parsed from silLiteral.
annotationFactory is a callable that takes attributeName/attributeValue
pairs and returns annotations; attributeValue is either an Atom or
a Reference in these cases.
"""
obStack, result = [], None
iterator = iterparse(silLiteral)
# make the root of the DM instance tree
evType, arg1, arg2 = next(iterator)
assert evType=='obj'
root = common.ObjectAnnotation(arg1, arg2, None)
obStack.append(root)
for evType, arg1, arg2 in iterator:
if evType=='obj':
obStack.append(common.ObjectAnnotation(arg1, arg2, root))
elif evType=='coll':
obStack.append(common.CollectionAnnotation(arg1, arg2, root))
elif evType=='pop':
newRole = obStack.pop()
if obStack:
obStack[-1].add(newRole)
else:
# we've just popped the total result. Make sure
# any furher operations fail.
del obStack
result = newRole
elif evType=='attr':
obStack[-1].add( #noflake: the del obStack up there is conditional
annotationFactory(root, arg1, arg2))
elif evType=='item':
collection = obStack[-1] #noflake: see above
assert isinstance(collection, common.CollectionAnnotation)
collection.add(
annotationFactory(root, collection.name, arg1))
else:
assert False
if result is None:
raise utils.StructureError("Data model annotation yielded no result.")
if result.type is None:
raise utils.StructureError("Root of Data Model annotation must"
" have a type.")
return result
if __name__=="__main__":
g = getGrammar()
getGrammar.enableDebuggingOutput()
res = g.parseString(
"""
(:testclass) {
seq: [a "b c d" @e]}""", parseAll=True)[0]
print(res)
|