1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
|
"""
Grammalecte: compile rules
Create a Direct Acyclic Rule Graphs (DARGs)
"""
import re
import os
import time
import concurrent.futures
import darg
import compile_rules_js_convert as jsconv
import helpers
import graphspell
#### PROCESS POOL EXECUTOR ####
xProcessPoolExecutor = None
def initProcessPoolExecutor (nMultiCPU=None):
"process pool executor initialisation"
global xProcessPoolExecutor
if xProcessPoolExecutor:
# we shutdown the ProcessPoolExecutor which may have been launched previously
print(" ProcessPoolExecutor shutdown.")
xProcessPoolExecutor.shutdown(wait=False)
nMaxCPU = max(os.cpu_count()-1, 1)
if nMultiCPU is None or not (1 <= nMultiCPU <= nMaxCPU):
nMultiCPU = nMaxCPU
print(" CPU processes used for workers: ", nMultiCPU)
xProcessPoolExecutor = concurrent.futures.ProcessPoolExecutor(max_workers=nMultiCPU)
def rewriteCode (sCode):
"convert simple code syntax to a string of Python code"
if sCode[0:1] == "=":
sCode = sCode[1:]
sCode = sCode.replace("__also__", "bCondMemo")
sCode = sCode.replace("__else__", "not bCondMemo")
sCode = sCode.replace("sContext", "_sAppContext")
sCode = re.sub(r"\b(morph0?|morphVC|value|tag|meta|info)[(]\\(\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
sCode = re.sub(r"\b(morph0?|morphVC|value|tag|meta|info)[(]\\-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
sCode = re.sub(r"\b(select|define|definefrom|rewrite|addmorph|setmeta)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2]', sCode)
sCode = re.sub(r"\b(select|define|definefrom|rewrite|addmorph|setmeta)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1]', sCode)
sCode = re.sub(r"\b(agreement|suggAgree)[(][\\](\d+), *[\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], lToken[nTokenOffset+\\3]', sCode)
sCode = re.sub(r"\b(agreement|suggAgree)[(][\\](\d+), *[\\]-(\d+)", 'g_\\1(lToken[nTokenOffset+\\2], lToken[nLastToken-\\3+1]', sCode)
sCode = re.sub(r"\b(agreement|suggAgree)[(][\\]-(\d+), *[\\](\d+)", 'g_\\1(lToken[nLastToken-\\2+1], lToken[nTokenOffset+\\3]', sCode)
sCode = re.sub(r"\b(agreement|suggAgree)[(][\\]-(\d+), *[\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], lToken[nLastToken-\\3+1]', sCode)
sCode = re.sub(r"\b(tagbefore|tagafter)[(][\\](\d+)", 'g_\\1(lToken[nTokenOffset+\\2], dTags', sCode)
sCode = re.sub(r"\b(tagbefore|tagafter)[(][\\]-(\d+)", 'g_\\1(lToken[nLastToken-\\2+1], dTags', sCode)
sCode = re.sub(r"\bspace[(][\\](\d+)", 'g_space(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
sCode = re.sub(r"\bspace[(][\\]-(\d+)", 'g_space(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
sCode = re.sub(r"\bmorph2[(][\\](\d+)", 'g_morph2(lToken[nTokenOffset+\\1], lToken[nTokenOffset+\\1+1]', sCode)
sCode = re.sub(r"\bmorph2[(][\\]-(\d+)", 'g_morph2(lToken[nLastToken-\\1+1], lToken[nLastToken-\\1+2]', sCode)
sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(>1", 'g_\\1(lToken[nLastToken+1]', sCode) # next token
sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(<1", 'g_\\1(lToken[nTokenOffset]', sCode) # previous token
sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(>(\d+)", 'g_\\1(g_token(lToken, nLastToken+\\2)', sCode) # next token
sCode = re.sub(r"\b(morph0?|tag|meta|value|info)\(<(\d+)", 'g_\\1(g_token(lToken, nTokenOffset+1-\\2)', sCode) # previous token
sCode = re.sub(r"\bspace[(](>1)", 'g_space(lToken[nLastToken+1], g_token(lToken, nLastToken+2)', sCode) # next token
sCode = re.sub(r"\bspace[(](<1)", 'g_space(lToken[nTokenOffset], lToken[nTokenOffset+1]', sCode) # previous token
sCode = re.sub(r"\bspell *[(]", '_oSpellChecker.isValid(', sCode)
sCode = re.sub(r"\bbefore\(\s*", 'look(sSentence[:lToken[1+nTokenOffset]["nStart"]], ', sCode) # before(sCode)
sCode = re.sub(r"\bafter\(\s*", 'look(sSentence[lToken[nLastToken]["nEnd"]:], ', sCode) # after(sCode)
sCode = re.sub(r"\bbefore0\(\s*", 'look(sSentence0[:lToken[1+nTokenOffset]["nStart"]], ', sCode) # before0(sCode)
sCode = re.sub(r"\bafter0\(\s*", 'look(sSentence0[lToken[nLastToken]["nEnd"]:], ', sCode) # after0(sCode)
sCode = re.sub(r"[\\](\d+)", 'lToken[nTokenOffset+\\1]["sValue"]', sCode)
sCode = re.sub(r"[\\]-(\d+)", 'lToken[nLastToken-\\1+1]["sValue"]', sCode)
sCode = re.sub(r">1", 'lToken[nLastToken+1]["sValue"]', sCode)
sCode = re.sub(r"<1", 'lToken[nTokenOffset]["sValue"]', sCode)
return sCode
def changeReferenceToken (sText, dPos):
"change group reference in <sText> with values in <dPos>"
if "\\" not in sText:
return sText
for i in range(len(dPos), 0, -1):
sText = re.sub("\\\\"+str(i)+"(?![0-9])", "\\\\"+str(dPos[i]), sText)
return sText
def checkTokenNumbers (sText, sActionId, nToken):
"check if token references in <sText> greater than <nToken> (debugging)"
for x in re.finditer(r"\\(\d+)", sText):
if int(x.group(1)) > nToken:
print("# Error in token index at line " + sActionId + " ("+str(nToken)+" tokens only)")
print(sText)
def checkIfThereIsCode (sText, sActionId):
"check if there is code in <sText> (debugging)"
if re.search(r"[.]\w+[(]|sugg\w+[(]|\(\\[0-9]|\[(?:[0-9]:|:)", sText):
print("# Warning at line " + sActionId + ": This message looks like code. Line should probably begin with =")
print(sText)
class GraphBuilder:
def __init__ (self, sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority):
self.sGraphName = sGraphName
self.sGraphCode = sGraphCode
self.sLang = sLang
self.dDef = dDef
self.dDecl = dDecl
self.dOptPriority = dOptPriority
self.dAntiPatterns = {}
self.dActions = {}
self.dFuncName = {}
self.dFunctions = {}
self.dLemmas = {}
def createGraphAndActions (self, lRuleLine):
"create a graph as a dictionary with <lRuleLine>"
fStartTimer = time.time()
print("{:>8,} rules in {:<30} ".format(len(lRuleLine), f"<{self.sGraphName}|{self.sGraphCode}>"), end="")
lPreparedRule = []
for i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority in lRuleLine:
for aRule in self.createRule(i, sRuleName, sTokenLine, iActionBlock, lActions, nPriority):
lPreparedRule.append(aRule)
# Debugging
if False:
print("\nRULES:")
for e in lPreparedRule:
if e[-2] == "##2211":
print(e)
# Graph creation
oDARG = darg.DARG(lPreparedRule, self.sLang)
dGraph = oDARG.createGraph()
print(oDARG, end="")
# debugging
if False:
print("\nGRAPH:", self.sGraphName)
for k, v in dGraph.items():
print(k, "\t", v)
print("\tin {:>8.2f} s".format(time.time()-fStartTimer))
sPyCallables, sJSCallables = self.createCallables()
return dGraph, self.dActions, sPyCallables, sJSCallables, self.dLemmas
def _genTokenLines (self, sTokenLine):
"tokenize a string and return a list of lines of tokens"
lTokenLines = []
nFirstNullable = 0
nLastNullable = 0
for n, sTokBlock in enumerate(sTokenLine.split(), 1):
# replace merger characters by spaces
if "␣" in sTokBlock:
sTokBlock = sTokBlock.replace("␣", " ")
# optional token?
bNullPossible = sTokBlock.startswith("?") and sTokBlock.endswith("¿")
if bNullPossible:
sTokBlock = sTokBlock[1:-1]
if nFirstNullable == 0:
nFirstNullable = n
nLastNullable = n
# token with definition?
if sTokBlock.startswith("(") and sTokBlock.endswith(")"):
nFirstNullable = -1
if sTokBlock.startswith("({") and sTokBlock.endswith("})") and sTokBlock[1:-1] in self.dDef:
sTokBlock = "(" + self.dDef[sTokBlock[1:-1]] + ")"
elif sTokBlock.startswith("{") and sTokBlock.endswith("}") and sTokBlock in self.dDef:
sTokBlock = self.dDef[sTokBlock]
if ( (sTokBlock.startswith("[") and sTokBlock.endswith("]")) or (sTokBlock.startswith("([") and sTokBlock.endswith("])")) ):
# multiple token
bSelectedGroup = sTokBlock.startswith("(") and sTokBlock.endswith(")")
if bSelectedGroup:
sTokBlock = sTokBlock[1:-1]
lToken = self._createTokenList(sTokBlock)
if not lTokenLines:
lTokenLines = [ ["("+s+")"] for s in lToken ] if bSelectedGroup else [ [s] for s in lToken ]
if bNullPossible:
lTokenLines.extend([ [] for i in range(len(lToken)+1) ])
else:
lNewTemp = []
if bNullPossible:
for aRule in lTokenLines:
for sElem in lToken:
aNewRule = list(aRule)
aNewRule.append(sElem)
lNewTemp.append(aNewRule)
else:
sElem1 = lToken.pop(0)
for aRule in lTokenLines:
for sElem in lToken:
aNewRule = list(aRule)
aNewRule.append("(" + sElem + ")" if bSelectedGroup else sElem)
lNewTemp.append(aNewRule)
aRule.append("(" + sElem1 + ")" if bSelectedGroup else sElem1)
lTokenLines.extend(lNewTemp)
else:
# simple token
if not lTokenLines:
lTokenLines = [[sTokBlock], []] if bNullPossible else [[sTokBlock]]
else:
if bNullPossible:
lNewTemp = []
for aRule in lTokenLines:
lNew = list(aRule)
lNew.append(sTokBlock)
lNewTemp.append(lNew)
lTokenLines.extend(lNewTemp)
else:
for aRule in lTokenLines:
aRule.append(sTokBlock)
nLastNullable = nLastNullable - n - 1
for aRule in lTokenLines:
yield aRule, nFirstNullable, nLastNullable
def _createTokenList (self, sTokBlock):
"return a list of tokens from a block of tokens"
lToken = []
for sToken in sTokBlock[1:-1].split("|"):
if "+" in sToken and not sToken.startswith("+"):
for sCode in self.dDecl:
if sToken.endswith(sCode):
sToken = sToken[:-len(sCode)]
lToken.append(sToken)
for sSuffix in self.dDecl[sCode]:
lToken.append(sToken+sSuffix)
break
else:
lToken.append(sToken)
return lToken
def createRule (self, iLine, sRuleName, sTokenLine, iActionBlock, lActions, nPriority):
"generator: create rule as list"
# print(iLine, "//", sRuleName, "//", sTokenLine, "//", lActions, "//", nPriority)
if sTokenLine.startswith("!!") and sTokenLine.endswith("¡¡"):
# antipattern
sTokenLine = sTokenLine[2:-2].strip()
if sRuleName not in self.dAntiPatterns:
self.dAntiPatterns[sRuleName]= []
for lToken, _, _ in self._genTokenLines(sTokenLine):
self.dAntiPatterns[sRuleName].append(lToken)
else:
# pattern
for lToken, nFirstNullable, nLastNullable in self._genTokenLines(sTokenLine):
if sRuleName in self.dAntiPatterns and lToken in self.dAntiPatterns[sRuleName]:
# <lToken> matches an antipattern -> discard
continue
# Calculate positions
dPos = {} # key: iGroup, value: iToken
iGroup = 0
#if iLine == 15818: # debug
# print(" ".join(lToken))
for i, sToken in enumerate(lToken):
if sToken.startswith("(") and sToken.endswith(")"):
lToken[i] = sToken[1:-1]
iGroup += 1
dPos[iGroup] = i + 1 # we add 1, for we count tokens from 1 to n (not from 0)
# check lemmas
if sToken.startswith(">") and sToken != ">" and sToken[1:] not in self.dLemmas:
self.dLemmas[sToken[1:]] = iLine
# Parse actions
for iAction, (iActionLine, sAction) in enumerate(lActions, 1):
sAction = sAction.strip()
if sAction:
sActionId = f"{self.sGraphCode}__{sRuleName}__b{iActionBlock}_a{iAction}"
aAction = self.createAction(sActionId, sAction, nPriority, len(lToken), dPos, iActionLine, nFirstNullable, nLastNullable)
if aAction:
sActionName = self.storeAction(sActionId, aAction)
lResult = list(lToken)
lResult.extend(["##"+str(iLine), sActionName])
#if iLine == 13341:
# print(" ".join(lToken))
# print(sActionId, aAction)
yield lResult
else:
print("# Error on action at line:", iLine)
print(sTokenLine, "\n", lActions)
exit()
else:
print("No action found for ", iActionLine)
exit()
def createAction (self, sActionId, sAction, nPriority, nToken, dPos, iActionLine, nFirstNullable, nLastNullable):
"create action rule as a list"
sLineId = "#" + str(iActionLine)
# Option
sOption = False
m = re.match("/(\\w+)/", sAction)
if m:
sOption = m.group(1)
sAction = sAction[m.end():].strip()
if nPriority == -1:
nPriority = self.dOptPriority.get(sOption, 4)
# valid action?
m = re.search(r"(?P<action>[-=~/!>])(?P<start>-?\d+\.?|)(?P<end>:\.?-?\d+|)(?P<casing>:|)>>", sAction)
if not m:
print("\n# Error. No action found at: ", sLineId, sActionId)
exit()
# Condition
sCondition = sAction[:m.start()].strip()
if sCondition:
sCondition = changeReferenceToken(sCondition, dPos)
sCondition = self.createFunction("cond", sCondition)
else:
sCondition = ""
# Case sensitivity
bCaseSensitivity = not bool(m.group("casing"))
# Action
cAction = m.group("action")
sAction = sAction[m.end():].strip()
sAction = changeReferenceToken(sAction, dPos)
# target
cStartLimit = "<"
cEndLimit = ">"
if not m.group("start"):
iStartAction = 1
iEndAction = 0
else:
if cAction != "-" and (m.group("start").endswith(".") or m.group("end").startswith(":.")):
print("\n# Error. Wrong selection on tokens at: ", sLineId ,sActionId)
return None
if m.group("start").endswith("."):
cStartLimit = ">"
iStartAction = int(m.group("start").rstrip("."))
if not m.group("end"):
iEndAction = iStartAction
else:
if m.group("end").startswith(":."):
cEndLimit = "<"
iEndAction = int(m.group("end").lstrip(":."))
if dPos and m.group("start"):
iStartAction = dPos.get(iStartAction, iStartAction)
if iEndAction:
iEndAction = dPos.get(iEndAction, iEndAction)
if iStartAction < 0:
iStartAction += 1
if iEndAction < 0:
iEndAction += 1
# check target
if nFirstNullable > -1:
if nFirstNullable > 0 and iStartAction > 0 and iEndAction != 0 and (iStartAction > nFirstNullable or iStartAction == nFirstNullable == iEndAction):
print(f"# Error. At {sLineId}, {sActionId}, target start is bigger than first nullable token.")
if nFirstNullable > 0 and iEndAction > 0 and iStartAction != 1 and (iEndAction > nFirstNullable or iStartAction == nFirstNullable == iEndAction):
print(f"# Error. At {sLineId}, {sActionId}, target end is bigger than first nullable token.")
if nLastNullable < 0 and iStartAction < 0 and iEndAction != 0 and ((iStartAction-1) < nLastNullable or (iStartAction-1) == nFirstNullable == (iEndAction-1)):
print(f"# Error. At {sLineId}, {sActionId}, target start is lower than last nullable token.")
if nLastNullable < 0 and iEndAction < 0 and iStartAction != 1 and ((iEndAction-1) < nLastNullable or (iStartAction-1) == nFirstNullable == (iEndAction-1)):
print(f"# Error. At {sLineId}, {sActionId}, target end is lower than last nullable token.")
if cAction == "-":
## error
iMsg = sAction.find(" && ")
if iMsg == -1:
print("\n# Error. No message at: ", sLineId, sActionId)
exit()
else:
sMsg = sAction[iMsg+4:].strip()
sAction = sAction[:iMsg].strip()
sURL = ""
mURL = re.search("[|] *(https?://.*)", sMsg)
if mURL:
sURL = mURL.group(1).strip()
sMsg = sMsg[:mURL.start(0)].strip()
checkTokenNumbers(sMsg, sActionId, nToken) # check tokens in message
if sMsg[0:1] == "=":
sMsg = self.createFunction("msg", sMsg, True)
else:
checkIfThereIsCode(sMsg, sActionId)
# checking token consistancy
checkTokenNumbers(sCondition, sActionId, nToken) # check tokens in condition
checkTokenNumbers(sAction, sActionId, nToken) # check tokens in action
if cAction == ">":
## no action, break loop if condition is False
return [sLineId, sOption, sCondition, cAction, ""]
if not sAction and cAction != "!":
print(f"\n# Error in action at line <{sLineId}/{sActionId}>: This action is empty.")
exit()
if sAction[0:1] != "=" and cAction != "=":
checkIfThereIsCode(sAction, sActionId)
if cAction == "-":
## error detected --> suggestion
if sAction[0:1] == "=":
sAction = self.createFunction("sugg", sAction, True)
elif sAction.startswith('"') and sAction.endswith('"'):
sAction = sAction[1:-1]
if not sMsg:
print(f"\n# Error in action at line <{sLineId}/{sActionId}>: The message is empty.")
exit()
return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction, cStartLimit, cEndLimit, bCaseSensitivity, nPriority, sMsg, sURL]
if cAction == "~":
## text processor
if sAction[0:1] == "=":
sAction = self.createFunction("tp", sAction, True)
elif sAction.startswith('"') and sAction.endswith('"'):
sAction = sAction[1:-1]
elif sAction not in "␣*_":
nToken = sAction.count("|") + 1
if iStartAction > 0 and iEndAction > 0:
if (iEndAction - iStartAction + 1) != nToken:
print(f"\n# Error in action at line <{sLineId}/{sActionId}>: numbers of modified tokens modified.")
elif iStartAction < 0 or iEndAction < 0 and iStartAction != iEndAction:
print(f"\n# Warning in action at line <{sLineId}/{sActionId}>: rewriting with possible token position modified.")
return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction, bCaseSensitivity]
if cAction in "!/":
## tags
return [sLineId, sOption, sCondition, cAction, sAction, iStartAction, iEndAction]
if cAction == "=":
## disambiguator
sAction = self.createFunction("da", sAction)
return [sLineId, sOption, sCondition, cAction, sAction]
print("\n# Unknown action at ", sLineId, sActionId)
return None
def storeAction (self, sActionId, aAction):
"store <aAction> in <self.dActions> avoiding duplicates and return action name"
nVar = 1
while True:
sActionName = sActionId + "_" + str(nVar)
if sActionName not in self.dActions:
self.dActions[sActionName] = aAction
return sActionName
if aAction == self.dActions[sActionName]:
return sActionName
nVar += 1
def showActions (self):
"debugging function"
print("\nActions:")
for sActionName, aAction in oFunctionManager.dActions.items():
print(sActionName, aAction)
def createFunction (self, sType, sCode, bStartWithEqual=False):
"create a function (stored in <self.dFunctions>) and return function name"
sCode = rewriteCode(sCode)
sFuncName = self._getNameForCode(sType, sCode)
self.dFunctions[sFuncName] = sCode
return sFuncName if not bStartWithEqual else "="+sFuncName
def _getNameForCode (self, sType, sCode):
"create and get a name for a code"
if sType not in self.dFuncName:
self.dFuncName[sType] = {}
if sCode not in self.dFuncName[sType]:
self.dFuncName[sType][sCode] = len(self.dFuncName[sType])+1
return "_g_" + sType + "_" + self.sGraphCode + "_" + str(self.dFuncName[sType][sCode])
def createCallables (self):
"return callables for Python and JavaScript"
sPyCallables = ""
sJSCallables = ""
for sFuncName, sReturn in self.dFunctions.items():
if sFuncName.startswith("_g_cond_"): # condition
sParams = "lToken, nTokenOffset, nLastToken, sCountry, bCondMemo, dTags, sSentence, sSentence0"
elif sFuncName.startswith("_g_msg_"): # message
sParams = "lToken, nTokenOffset, nLastToken"
elif sFuncName.startswith("_g_sugg_"): # suggestion
sParams = "lToken, nTokenOffset, nLastToken"
elif sFuncName.startswith("_g_tp_"): # text preprocessor
sParams = "lToken, nTokenOffset, nLastToken"
elif sFuncName.startswith("_g_da_"): # disambiguator
sParams = "lToken, nTokenOffset, nLastToken"
else:
print("# Unknown function type in [" + sFuncName + "]")
continue
# Python
sPyCallables += f"def {sFuncName} ({sParams}):\n"
sPyCallables += f" return {sReturn}\n"
# JavaScript
sJSCallables += f" {sFuncName}: function ({sParams}) {{\n"
sJSCallables += " return " + jsconv.py2js(sReturn) + ";\n"
sJSCallables += " },\n"
return sPyCallables, sJSCallables
def processing (sGraphName, sGraphCode, sLang, lRuleLine, dDef, dDecl, dOptPriority):
"to be run in a separate process"
oGraphBuilder = GraphBuilder(sGraphName, sGraphCode, sLang, dDef, dDecl, dOptPriority)
dGraph, dActions, sPy, sJS, dLemmas = oGraphBuilder.createGraphAndActions(lRuleLine)
return (sGraphName, dGraph, dActions, sPy, sJS, dLemmas)
def make (lRule, sLang, dDef, dDecl, dOptPriority):
"compile rules, returns a dictionary of values"
# for clarity purpose, don’t create any file here
# removing comments, zeroing empty lines, creating definitions, storing tests, merging rule lines
print(" parsing graph rules...")
lTokenLine = []
lActions = []
bActionBlock = False
nPriority = -1
dAllGraph = {}
dGraphCode = {}
sGraphName = ""
iActionBlock = 0
aRuleName = set()
oDictionary = graphspell.SpellChecker("fr")
for iLine, sLine in lRule:
sLine = sLine.rstrip()
if "\t" in sLine:
# tabulation not allowed
print("# Error. Tabulation at line: ", iLine)
exit()
elif sLine.startswith("@@@@GRAPH: "):
# rules graph call
m = re.match(r"@@@@GRAPH: *(\w+) *[|] *(\w+)", sLine.strip())
if m:
sGraphName = m.group(1)
sGraphCode = m.group(2)
if sGraphName in dAllGraph or sGraphCode in dGraphCode:
print(f"# Error at line {iLine}. Graph name <{sGraphName}> or graph code <{sGraphCode}> already exists.")
exit()
dAllGraph[sGraphName] = []
dGraphCode[sGraphName] = sGraphCode
else:
print("# Error. Graph name not found at line", iLine)
exit()
elif sLine.startswith("__") and sLine.endswith("__"):
# new rule group
m = re.match("__(\\w+)(!\\d|)__", sLine)
if m:
sRuleName = m.group(1)
if sRuleName in aRuleName:
print(f"# Error at line {iLine}. Rule name <{sRuleName}> already exists.")
exit()
aRuleName.add(sRuleName)
iActionBlock = 1
nPriority = int(m.group(2)[1:]) if m.group(2) else -1
else:
print("# Syntax error in rule group: ", sLine, " -- line:", iLine)
exit()
elif re.match(" \\S", sLine):
# tokens line
lTokenLine.append([iLine, sLine.strip()])
elif sLine.startswith(" ||"):
# tokens line continuation
iPrevLine, sPrevLine = lTokenLine[-1]
lTokenLine[-1] = [iPrevLine, sPrevLine + " " + sLine.strip()[2:]]
elif sLine.startswith(" <<- "):
# actions
lActions.append([iLine, sLine[12:].strip()])
if not re.search(r"[-=~/!>](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine):
bActionBlock = True
elif sLine.startswith(" && "):
# action message
iPrevLine, sPrevLine = lActions[-1]
lActions[-1] = [iPrevLine, sPrevLine + sLine]
elif sLine.startswith(" ") and bActionBlock:
# action line continuation
iPrevLine, sPrevLine = lActions[-1]
lActions[-1] = [iPrevLine, sPrevLine + " " + sLine.strip()]
if re.search(r"[-=~/!>](?:-?\d\.?(?::\.?-?\d+|)|):?>>", sLine):
bActionBlock = False
elif re.match("[ ]*$", sLine):
# empty line to end merging
if not lTokenLine:
continue
if bActionBlock or not lActions:
print("# Error. No action found at line:", iLine)
print(bActionBlock, lActions)
exit()
if not sGraphName:
print("# Error. All rules must belong to a named graph. Line: ", iLine)
exit()
for j, sTokenLine in lTokenLine:
dAllGraph[sGraphName].append((j, sRuleName, sTokenLine, iActionBlock, list(lActions), nPriority))
lTokenLine.clear()
lActions.clear()
iActionBlock += 1
else:
print("# Unknown line at:", iLine)
print(sLine)
exit()
# processing rules
print(" processing graph rules...")
initProcessPoolExecutor(len(dAllGraph))
fStartTimer = time.time()
# build graph
lResult = []
nRule = 0
for sGraphName, lRuleLine in dAllGraph.items():
nRule += len(lRuleLine)
try:
xFuture = xProcessPoolExecutor.submit(processing, sGraphName, dGraphCode[sGraphName], sLang, lRuleLine, dDef, dDecl, dOptPriority)
lResult.append(xFuture)
except (concurrent.futures.TimeoutError, concurrent.futures.CancelledError):
return "Analysis aborted (time out or cancelled)"
except concurrent.futures.BrokenExecutor:
return "Executor broken. The server failed."
# merging results
xProcessPoolExecutor.shutdown(wait=True) # waiting that everything is finished
dAllActions = {}
sPyCallables = ""
sJSCallables = ""
for xFuture in lResult:
sGraphName, dGraph, dActions, sPy, sJS, dLemmas = xFuture.result()
dAllGraph[sGraphName] = dGraph
dAllActions.update(dActions)
sPyCallables += sPy
sJSCallables += sJS
# check lemmas
for sLemma, iLine in dLemmas.items():
if sLemma not in oDictionary.getLemma(sLemma):
print(f" # Error at line {iLine}: <{sLemma}> is not a known lemma")
# create a dictionary of URL
dTempURL = { "": 0 }
i = 1
for sKey, lValue in dAllActions.items():
if lValue[3] == "-":
if lValue[-1]:
if lValue[-1] not in dTempURL:
dTempURL[lValue[-1]] = i
i += 1
lValue[-1] = dTempURL[lValue[-1]]
else:
lValue[-1] = 0
dURL = { v: k for k, v in dTempURL.items() } # reversing key and values
# end
print(" Total: ", nRule, "rules, ", len(dAllActions), "actions")
print(" Build time: {:.2f} s".format(time.time() - fStartTimer))
return {
# the graphs describe paths of tokens to actions which eventually execute callables
"rules_graphs": str(dAllGraph), # helpers.convertDictToString(dAllGraph)
"rules_actions": helpers.convertDictToString(dAllActions), # str(dAllActions)
"rules_graph_URL": helpers.convertDictToString(dURL), # str(dURL)
"rules_graphsJS": str(dAllGraph),
"rules_actionsJS": jsconv.pyActionsToString(dAllActions),
"rules_graph_URLJS": str(dURL),
"graph_callables": sPyCallables,
"graph_callablesJS": sJSCallables
}
|