File: fql.py

package info (click to toggle)
python-pynlpl 1.2.9-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,900 kB
  • sloc: python: 25,677; sh: 73; makefile: 3
file content (2041 lines) | stat: -rw-r--r-- 100,101 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
#---------------------------------------------------------------

# PyNLPl - FoLiA Query Language
#   by Maarten van Gompel
#   Centre for Language Studies
#   Radboud University Nijmegen
#   http://proycon.github.com/folia
#   http://www.github.com/proycon/pynlpl
#   proycon AT anaproy DOT nl
#
#   Module for reading, editing and writing FoLiA XML using
#   the FoLiA Query Language
#
#   Licensed under GPLv3
#
#----------------------------------------------------------------


from __future__ import print_function
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import

from pynlpl.formats import folia
from copy import copy
import json
import re
import sys
import random
import datetime

OPERATORS = ('=','==','!=','>','<','<=','>=','CONTAINS','NOTCONTAINS','MATCHES','NOTMATCHES')
MASK_NORMAL = 0
MASK_LITERAL = 1
MASK_EXPRESSION = 2
MAXEXPANSION = 99

FOLIAVERSION = '1.5.0'
FQLVERSION = '0.4.1'

class SyntaxError(Exception):
    pass

class QueryError(Exception):
    pass


def getrandomid(query,prefix=""):
    randomid = ""
    while not randomid or randomid in query.doc.index:
        randomid =  prefix + "%08x" % random.getrandbits(32) #generate a random ID
    return randomid

class UnparsedQuery(object):
    """This class takes care of handling grouped blocks in parentheses and handling quoted values"""
    def __init__(self, s, i=0):
        self.q = []
        self.mask = []
        l = len(s)
        begin = 0
        while i < l:
            c = s[i]
            if c == " ":
                #process previous word
                if begin < i:
                    w = s[begin:i]
                    self.q.append(w)
                    self.mask.append(MASK_NORMAL)
                begin = i + 1
            elif i == l - 1:
                #process last word
                w = s[begin:]
                self.q.append(w)
                self.mask.append(MASK_NORMAL)

            if c == '(': #groups
                #find end quote and process block
                level = 0
                quoted = False
                s2 = ""
                for j in range(i+1,l):
                    c2 = s[j]
                    if c2 == '"':
                        if s[j-1] != "\\": #check it isn't escaped
                            quoted = not quoted
                    if not quoted:
                        if c2 == '(':
                            level += 1
                        elif c2 == ')':
                            if level == 0:
                                s2 = s[i+1:j]
                                break
                            else:
                                level -= 1
                if s2:
                    self.q.append(UnparsedQuery(s2))
                    self.mask.append(MASK_EXPRESSION)
                    i = j
                    begin = i+1
                else:
                    raise SyntaxError("Unmatched parenthesis at char " + str(i))
            elif c == '"': #literals
                if i == 0 or (i > 0 and s[i-1] != "\\"): #check it isn't escaped
                    #find end quote and process block
                    s2 = None
                    for j in range(i+1,l):
                        c2 = s[j]
                        if c2 == '"':
                            if s[j-1] != "\\": #check it isn't escaped
                                s2 = s[i+1:j]
                                break
                    if not s2 is None:
                        self.q.append(s2.replace('\\"','"').replace("\\n","\n")) #undo escaped quotes and newlines
                        self.mask.append(MASK_LITERAL)
                        i = j
                        begin = i+1
                    else:
                        raise SyntaxError("Unterminated string literal at char " + str(i))

            i += 1

        remove = []
        #process shortcut notation
        for i, (w,m) in enumerate(zip(self.q,self.mask)):
            if m == MASK_NORMAL and w[0] == ':':
                #we have shortcut notation for a HAS statement, rewrite:
                self.q[i] = UnparsedQuery(w[1:] + " HAS class " + self.q[i+1] + " \"" + self.q[i+2] + "\"")
                self.mask[i] = MASK_EXPRESSION
                remove += [i+1,i+2]

        if remove:
            for index in reversed(remove):
                del self.q[index]
                del self.mask[index]




    def __iter__(self):
        for w in self.q:
            yield w

    def __len__(self):
        return len(self.q)

    def __getitem__(self, index):
        try:
            return self.q[index]
        except:
            return ""

    def kw(self, index, value):
        try:
            if isinstance(value, tuple):
                return self.q[index] in value and self.mask[index] == MASK_NORMAL
            else:
                return self.q[index] == value and self.mask[index] == MASK_NORMAL
        except:
            return False


    def __exists__(self, keyword):
        for k,m in zip(self.q,self.mask):
            if keyword == k and m == MASK_NORMAL:
                return True
        return False

    def __setitem__(self, index, value):
        self.q[index] = value


    def __str__(self):
        s = []
        for w,m in zip(self.q,self.mask):
            if m == MASK_NORMAL:
                s.append(w)
            elif m == MASK_LITERAL:
                s.append('"' + w.replace('"','\\"') + '"')
            elif m == MASK_EXPRESSION:
                s.append('(' + str(w) + ')')
        return " ".join(s)






class Filter(object): #WHERE ....
    def __init__(self, filters, negation=False,disjunction=False):
        self.filters = filters
        self.negation = negation
        self.disjunction = disjunction

    @staticmethod
    def parse(q, i=0):
        filters = []
        negation = False
        logop = ""

        l = len(q)
        while i < l:
            if q.kw(i, "NOT"):
                negation = True
                i += 1
            elif isinstance(q[i], UnparsedQuery):
                filter,_  = Filter.parse(q[i])
                filters.append(filter)
                i += 1
                if q.kw(i,"AND") or q.kw(i, "OR"):
                    if logop and q[i] != logop:
                        raise SyntaxError("Mixed logical operators, use parentheses: " + str(q))
                    logop = q[i]
                    i += 1
                else:
                    break #done
            elif i == 0 and (q[i].startswith("PREVIOUS") or q[i].startswith("NEXT") or q.kw(i, ("LEFTCONTEXT","RIGHTCONTEXT","CONTEXT","PARENT","ANCESTOR","CHILD") )):
                #we have a context expression, always occuring in its own subquery
                modifier = q[i]
                i += 1
                selector,i =  Selector.parse(q,i)
                filters.append( (modifier, selector,None) )
                break
            elif q[i+1] in OPERATORS and q[i] and q[i+2]:
                operator = q[i+1]
                if q[i] == "class":
                    v = lambda x,y='cls': getattr(x,y)
                elif q[i] in ("text","value","phon"):
                    v = lambda x,y='text': getattr(x,'value') if isinstance(x, (folia.Description, folia.Comment, folia.Content)) else getattr(x,'phon') if isinstance(x,folia.PhonContent) else getattr(x,'text')()
                else:
                    v = lambda x,y=q[i]: getattr(x,y)
                if q[i] == 'confidence':
                    cnv = float
                else:
                    cnv =  lambda x: x
                if operator == '=' or operator == '==':
                    filters.append( lambda x,y=q[i+2],v=v : v(x) == y )
                elif operator == '!=':
                    filters.append( lambda x,y=q[i+2],v=v : v(x) != y )
                elif operator == '>':
                    filters.append( lambda x,y=cnv(q[i+2]),v=v : False if v(x) is None else v(x) > y )
                elif operator == '<':
                    filters.append( lambda x,y=cnv(q[i+2]),v=v : False if v(x) is None else v(x) < y )
                elif operator == '>=':
                    filters.append( lambda x,y=cnv(q[i+2]),v=v : False if v(x) is None else v(x) >= y )
                elif operator == '<=':
                    filters.append( lambda x,y=cnv(q[i+2]),v=v : False if v(x) is None else v(x) <= y )
                elif operator == 'CONTAINS':
                    filters.append( lambda x,y=q[i+2],v=v : v(x).find( y ) != -1 )
                elif operator == 'NOTCONTAINS':
                    filters.append( lambda x,y=q[i+2],v=v : v(x).find( y ) == -1 )
                elif operator == 'MATCHES':
                    filters.append( lambda x,y=re.compile(q[i+2]),v=v : y.search(v(x)) is not None  )
                elif operator == 'NOTMATCHES':
                    filters.append( lambda x,y=re.compile(q[i+2]),v=v : y.search(v(x)) is None  )

                if q.kw(i+3,("AND","OR")):
                    if logop and q[i+3] != logop:
                        raise SyntaxError("Mixed logical operators, use parentheses: " + str(q))
                    logop = q[i+3]
                    i += 4
                else:
                    i += 3
                    break #done
            elif 'HAS' in q[i:]:
                #has statement (spans full UnparsedQuery by definition)
                selector,i =  Selector.parse(q,i)
                if not q.kw(i,"HAS"):
                    raise SyntaxError("Expected HAS, got " + str(q[i]) + " at position " + str(i) + " in: " + str(q))
                i += 1
                subfilter,i = Filter.parse(q,i)
                filters.append( ("CHILD",selector,subfilter) )
            else:
                raise SyntaxError("Expected comparison operator, got " + str(q[i+1]) + " in: " + str(q))

        if negation and len(filters) > 1:
            raise SyntaxError("Expecting parentheses when NOT is used with multiple conditions")

        return Filter(filters, negation, logop == "OR"), i

    def __call__(self, query, element, debug=False):
        """Tests the filter on the specified element, returns a boolean"""
        match = True
        if debug: print("[FQL EVALUATION DEBUG] Filter - Testing filter [" + str(self) + "] for ", repr(element),file=sys.stderr)
        for filter in self.filters:
            if isinstance(filter,tuple):
                modifier, selector, subfilter = filter
                if debug: print("[FQL EVALUATION DEBUG] Filter - Filter is a subfilter of type " + modifier + ", descending...",file=sys.stderr)
                #we have a subfilter, i.e. a HAS statement on a subelement
                match = False
                if modifier == "CHILD":
                    for subelement,_ in selector(query, [element], True, debug): #if there are multiple subelements, they are always treated disjunctly
                        if not subfilter:
                            match = True
                        else:
                            match = subfilter(query, subelement, debug)
                        if match: break #only one subelement has to match by definition, then the HAS statement is matched
                elif modifier == "PARENT":
                    match = selector.match(query, element.parent,debug)
                elif modifier == "NEXT":
                    neighbour = element.next()
                    if neighbour:
                        match = selector.match(query, neighbour,debug)
                elif modifier == "PREVIOUS":
                    neighbour = element.previous()
                    if neighbour:
                        match = selector.match(query, neighbour,debug)
                else:
                    raise NotImplementedError("Context keyword " + modifier + " not implemented yet")
            elif isinstance(filter, Filter):
                #we have a nested filter (parentheses)
                match = filter(query, element, debug)
            else:
                #we have a condition function we can evaluate
                match = filter(element)

            if self.negation:
                match = not match
            if match:
                if self.disjunction:
                    if debug: print("[FQL EVALUATION DEBUG] Filter returns True",file=sys.stderr)
                    return True
            else:
                if not self.disjunction: #implies conjunction
                    if debug: print("[FQL EVALUATION DEBUG] Filter returns False",file=sys.stderr)
                    return False

        if debug: print("[FQL EVALUATION DEBUG] Filter returns ", str(match),file=sys.stderr)
        return match

    def __str__(self):
        q = ""
        if self.negation:
            q += "NOT "
        for i, filter in enumerate(self.filters):
            if i > 0:
                if self.disjunction:
                    q += "OR "
                else:
                    q += "AND "
            if isinstance(filter, Filter):
                q += "(" + str(filter) + ") "
            elif isinstance(filter, tuple):
                modifier,selector,subfilter = filter
                q += "(" + modifier + " " + str(selector) + " HAS " + str(subfilter) + ") "
            else:
                #original filter can't be reconstructed, place dummy:
                q += "...\"" + str(filter.__defaults__[0]) +"\""
        return q.strip()




class SpanSet(list):
    def select(self,*args):
        raise QueryError("Got a span set for a non-span element")

    def partof(self, collection):
        for e in collection:
            if isinstance(e, SpanSet):
                if len(e) != len(self):
                    return False
                for c1,c2 in zip(e,self):
                    if c1 is not c2:
                        return False
        return False



class Selector(object):
    def __init__(self, Class, set=None,id=None, filter=None, nextselector=None, expansion = None):
        self.Class = Class
        self.set = set
        self.id = id
        self.filter = filter
        self.nextselector =  nextselector #selectors can be chained
        self.expansion = expansion #{min,max} occurrence interval, allowed only in Span and evaluated there instead of here


    def chain(self, targets):
        assert targets[0] is self
        selector = self
        selector.nextselector = None
        for target in targets[1:]:
            selector.nextselector = target
            selector = target

    @staticmethod
    def parse(q, i=0, allowexpansion=False):
        l = len(q)
        set = None
        id = None
        filter = None
        expansion = None

        if q[i] == "ID" and q[i+1]:
            id = q[i+1]
            Class = None
            i += 2
        else:
            if q[i] == "ALL":
                Class = "ALL"
            else:
                try:
                    Class = folia.XML2CLASS[q[i]]
                except:
                    raise SyntaxError("Expected element type, got " + str(q[i]) + " in: " + str(q))
            i += 1

        if q[i] and q[i][0] == "{" and q[i][-1] == "}":
            if not allowexpansion:
                raise SyntaxError("Expansion expressions not allowed at this point, got one at position " + str(i) + " in: " + str(q))
            expansion = q[i][1:-1]
            expansion = expansion.split(',')
            i += 1
            try:
                if len(expansion) == 1:
                    expansion = (int(expansion), int(expansion))
                elif len(expansion) == 2 and expansion[0] == "":
                    expansion = (0,int(expansion[1]))
                elif len(expansion) == 2 and expansion[1] == "":
                    expansion = (int(expansion[0]),MAXEXPANSION)
                elif len(expansion) == 2:
                    expansion = tuple(int(x) for x in expansion if x)
                else:
                    raise SyntaxError("Invalid expansion expression: " + ",".join(expansion))
            except ValueError:
                raise SyntaxError("Invalid expansion expression: " + ",".join(expansion))

        while i < l:
            if q.kw(i,"OF") and q[i+1]:
                set = q[i+1]
                i += 2
            elif q.kw(i,"ID") and q[i+1]:
                id = q[i+1]
                i += 2
            elif q.kw(i, "WHERE"):
                #ok, big filter coming up!
                filter, i = Filter.parse(q,i+1)
                break
            else:
                #something we don't handle
                break

        return Selector(Class,set,id,filter, None, expansion), i

    def __call__(self, query, contextselector, recurse=True, debug=False): #generator, lazy evaluation!
        if isinstance(contextselector,tuple) and len(contextselector) == 2:
            selection = contextselector[0](*contextselector[1])
        else:
            selection = contextselector

        count = 0

        for e in selection:
            selector = self
            while True: #will loop through the chain of selectors, only the first one is called explicitly
                if debug: print("[FQL EVALUATION DEBUG] Select - Running selector [", str(self), "] on ", repr(e),file=sys.stderr)

                if selector.id:
                    if debug: print("[FQL EVALUATION DEBUG] Select - Selecting ID " + selector.id,file=sys.stderr)
                    try:
                        candidate = query.doc[selector.id]
                        selector.Class = candidate.__class__
                        if not selector.filter or  selector.filter(query,candidate, debug):
                            if debug: print("[FQL EVALUATION DEBUG] Select - Yielding (by ID) ", repr(candidate),file=sys.stderr)
                            yield candidate, e
                    except KeyError:
                        if debug: print("[FQL EVALUATION DEBUG] Select - Selecting by ID failed for ID " + selector.id,file=sys.stderr)
                        pass #silently ignore ID mismatches
                elif selector.Class == "ALL":
                    for candidate in e:
                        if isinstance(candidate, folia.AbstractElement):
                            yield candidate, e
                elif selector.Class:
                    if debug: print("[FQL EVALUATION DEBUG] Select - Selecting Class " + selector.Class.XMLTAG + " with set " + str(selector.set),file=sys.stderr)
                    if selector.Class.XMLTAG in query.defaultsets:
                        selector.set = query.defaultsets[selector.Class.XMLTAG]
                    isspan = issubclass(selector.Class, folia.AbstractSpanAnnotation)
                    if isinstance(e, tuple): e = e[0]
                    if isspan and (isinstance(e, folia.Word) or isinstance(e, folia.Morpheme)):
                        for candidate in e.findspans(selector.Class, selector.set):
                            if not selector.filter or  selector.filter(query,candidate, debug):
                                if debug: print("[FQL EVALUATION DEBUG] Select - Yielding span, single reference: ", repr(candidate),file=sys.stderr)
                                yield candidate, e
                    elif isspan and isinstance(e, SpanSet):
                        #we take the first item of the span to find the candidates
                        for candidate in e[0].findspans(selector.Class, selector.set):
                            if not selector.filter or  selector.filter(query,candidate, debug):
                                #test if all the other elements in the span are in this candidate
                                matched = True
                                spanelements = list(candidate.wrefs())
                                for e2 in e[1:]:
                                    if e2 not in spanelements:
                                        matched = False
                                        break
                                if matched:
                                    if debug: print("[FQL EVALUATION DEBUG] Select - Yielding span, multiple references: ", repr(candidate),file=sys.stderr)
                                    yield candidate, e
                    elif isinstance(e, SpanSet):
                        yield e, e
                    else:
                        #print("DEBUG: doing select " + selector.Class.__name__ + " (recurse=" + str(recurse)+") on " + repr(e))
                        for candidate  in e.select(selector.Class, selector.set, recurse):
                            try:
                                if candidate.changedbyquery is query:
                                    #this candidate has been added/modified by the query, don't select it again
                                    continue
                            except AttributeError:
                                pass
                            if not selector.filter or  selector.filter(query,candidate, debug):
                                if debug: print("[FQL EVALUATION DEBUG] Select - Yielding ", repr(candidate), " in ", repr(e),file=sys.stderr)
                                yield candidate, e

                if selector.nextselector is None:
                    if debug: print("[FQL EVALUATION DEBUG] Select - End of chain",file=sys.stderr)
                    break # end of chain
                else:
                    if debug: print("[FQL EVALUATION DEBUG] Select - Selecting next in chain",file=sys.stderr)
                    selector = selector.nextselector


    def match(self, query, candidate, debug = False):
        if debug: print("[FQL EVALUATION DEBUG] Select - Matching selector [", str(self), "] on ", repr(candidate),file=sys.stderr)
        if self.id:
            if candidate.id != self.id:
                return False
        elif self.Class:
            if not isinstance(candidate,self.Class):
                return False
        if self.filter and not self.filter(query,candidate, debug):
            return False
        if debug: print("[FQL EVALUATION DEBUG] Select - Selector matches! ", repr(candidate),file=sys.stderr)
        return True

    def autodeclare(self,doc):
        if self.Class and self.set:
            if not doc.declared(self.Class, self.set):
                doc.declare(self.Class, self.set)
            if self.nextselector:
                self.nextselector.autodeclare()

    def __str__(self):
        s = ""
        if self.Class:
            s += self.Class.XMLTAG + " "
        if self.set:
            s += "OF " + self.set + " "
        if self.id:
            s += "ID " + self.id + " "
        if self.filter:
            s += "WHERE " + str(self.filter)

        if self.nextselector:
            s += str(self.nextselector)
        return s.strip()


class Span(object):
    def __init__(self, targets, intervals = []):
        self.targets = targets #Selector instances making up the span

    def __len__(self):
        return len(self.targets)

    @staticmethod
    def parse(q, i=0):
        targets = []
        l = len(q)
        while i < l:
            if q.kw(i,"ID") or q[i] in folia.XML2CLASS:
                target,i = Selector.parse(q,i, True)
                targets.append(target)
            elif q.kw(i,"&"):
                #we're gonna have more targets
                i += 1
            elif q.kw(i,"NONE"):
                #empty span
                return Span([]), i+1
            else:
                break

        if not targets:
            raise SyntaxError("Expected one or more span targets, got " + str(q[i]) + " in: " + str(q))

        return Span(targets), i

    def __call__(self, query, contextselector, recurse=True,debug=False): #returns a list of element in a span
        if debug: print("[FQL EVALUATION DEBUG] Span  - Building span from target selectors (" + str(len(self.targets)) + ")",file=sys.stderr)

        backtrack = []
        l = len(self.targets)
        if l == 0:
            #span is explicitly empty, this is allowed in RESPAN context
            if debug: print("[FQL EVALUATION DEBUG] Span  - Yielding explicitly empty SpanSet",file=sys.stderr)
            yield SpanSet()
        else:
            #find the first non-optional element, it will be our pivot:
            pivotindex = None
            for i, target in enumerate(self.targets):
                if self.targets[i].id or not self.targets[i].expansion or self.targets[i].expansion[0] > 0:
                    pivotindex = i
                    break
            if pivotindex is None:
                raise QueryError("All parts in the SPAN expression are optional, at least one non-optional component is required")


            #get first target
            for element, target in self.targets[pivotindex](query, contextselector, recurse,debug):
                if debug: print("[FQL EVALUATION DEBUG] Span  - First item of span found  (pivotindex=" + str(pivotindex) + ",l=" + str(l) + "," + str(repr(element)) + ")",file=sys.stderr)
                spanset = SpanSet() #elemnent is added later

                match = True #we attempt to disprove this


                #now see if consecutive elements match up


                #--- matching prior to pivot -------

                #match optional elements before pivotindex
                i = pivotindex
                currentelement = element
                while i > 0:
                    i -= 1
                    if i < 0: break
                    selector = self.targets[i]
                    minmatches = selector.expansion[0]
                    assert minmatches == 0 #everything before pivot has to have minmatches 0
                    maxmatches = selector.expansion[1]
                    done = False

                    matches = 0
                    while True:
                        prevelement = element
                        element = element.previous(selector.Class, None)
                        if not element or (target and target not in element.ancestors()):
                            if debug: print("[FQL EVALUATION DEBUG] Span  - Prior element not found or out of scope",file=sys.stderr)
                            done = True #no more elements left
                            break
                        elif element and not selector.match(query, element,debug):
                            if debug: print("[FQL EVALUATION DEBUG] Span  - Prior element does not match filter",file=sys.stderr)
                            element = prevelement #reset
                            break

                        if debug: print("[FQL EVALUATION DEBUG] Span  - Prior element matches",file=sys.stderr)
                        #we have a match
                        matches += 1
                        spanset.insert(0,element)
                        if matches >= maxmatches:
                            if debug: print("[FQL EVALUATION DEBUG] Span  - Maximum threshold reached for span selector " + str(i) + ", breaking", file=sys.stderr)
                            break

                    if done:
                        break

                #--- matching pivot and selectors after pivot  -------

                done = False #are we done with this selector?
                element = currentelement
                i = pivotindex - 1 #loop does +1 at the start of each iteration, we want to start with the pivotindex
                while i < l:
                    i += 1
                    if i == l:
                        if debug: print("[FQL EVALUATION DEBUG] Span  - No more selectors to try",i,l, file=sys.stderr)
                        break
                    selector = self.targets[i]
                    if selector.id: #selection by ID, don't care about consecutiveness
                        try:
                            element = query.doc[selector.id]
                            if debug: print("[FQL EVALUATION DEBUG] Span  - Obtained subsequent span item from ID: ", repr(element), file=sys.stderr)
                        except KeyError:
                            if debug: print("[FQL EVALUATION DEBUG] Span  - Obtained subsequent with specified ID does not exist ", file=sys.stderr)
                            match = False
                            break
                        if element and not selector.match(query, element,debug):
                            if debug: print("[FQL EVALUATION DEBUG] Span  - Subsequent element does not match filter",file=sys.stderr)
                        else:
                            spanset.append(element)

                    else: #element must be consecutive
                        if selector.expansion:
                            minmatches = selector.expansion[0]
                            maxmatches = selector.expansion[1]
                        else:
                            minmatches = maxmatches = 1

                        if debug: print("[FQL EVALUATION DEBUG] Span  - Preparing to match selector " + str(i) + " of span, expansion={" + str(minmatches) + "," + str(maxmatches) + "}", file=sys.stderr)
                        matches = 0

                        while True:
                            submatch = True #does the element currenty under consideration match? (the match variable is reserved for the entire match)
                            done = False #are we done with this span selector?
                            holdelement = False #do not go to next element

                            if debug: print("[FQL EVALUATION DEBUG] Span  - Processing element with span selector " + str(i) + ": ", repr(element), file=sys.stderr)

                            if not element or (target and target not in element.ancestors()):
                                if debug:
                                    if not element:
                                        print("[FQL EVALUATION DEBUG] Span  - Element not found",file=sys.stderr)
                                    elif target and not target in element.ancestors():
                                        print("[FQL EVALUATION DEBUG] Span  - Element out of scope",file=sys.stderr)
                                submatch = False
                            elif element and not selector.match(query, element,debug):
                                if debug: print("[FQL EVALUATION DEBUG] Span  - Element does not match filter",file=sys.stderr)
                                submatch = False

                            if submatch:
                                matches += 1
                                if debug: print("[FQL EVALUATION DEBUG] Span  - Element is a match, got " + str(matches) + " match(es) now", file=sys.stderr)

                                if matches > minmatches:
                                    #check if the next selector(s) match too, then we have a point where we might branch two ways
                                    #j = 1
                                    #while i+j < len(self.targets):
                                    #    nextselector = self.targets[i+j]
                                    #    if nextselector.match(query, element,debug):
                                    #        #save this point for backtracking, when we get stuck, we'll roll back to this point
                                    #       backtrack.append( (i+j, prevelement, copy(spanset) ) ) #using prevelement, nextelement will be recomputed after backtracking,   using different selector
                                    #   if not nextselector.expansion or nextselector.expansion[0] > 0:
                                    #        break
                                    #    j += 1
                                    #TODO: implement
                                    pass
                                elif matches < minmatches:
                                    if debug: print("[FQL EVALUATION DEBUG] Span  - Minimum threshold not reached yet for span selector " + str(i), file=sys.stderr)

                                spanset.append(element)
                                if matches >= maxmatches:
                                    if debug: print("[FQL EVALUATION DEBUG] Span  - Maximum threshold reached for span selector " + str(i) + ", breaking", file=sys.stderr)
                                    done = True #done with this selector
                            else:
                                if matches < minmatches:
                                    #can we backtrack?
                                    if backtrack: #(not reached currently)
                                        if debug: print("[FQL EVALUATION DEBUG] Span  - Backtracking",file=sys.stderr)
                                        index, element, spanset = backtrack.pop()
                                        i = index - 1 #next iteration will do +1 again
                                        match = True #default
                                        continue
                                    else:
                                        #nope, all is lost, we have no match
                                        if debug: print("[FQL EVALUATION DEBUG] Span  - Minimum threshold could not be attained for span selector " + str(i), file=sys.stderr)
                                        match = False
                                        break
                                else:
                                    if debug: print("[FQL EVALUATION DEBUG] Span  - No match for span selector " + str(i) + ", but no problem since matching threshold was already reached", file=sys.stderr)
                                    holdelement = True
                                    done = True
                                    break

                            if not holdelement:
                                prevelement = element
                                #get next element
                                element = element.next(selector.Class, None)
                                if debug: print("[FQL EVALUATION DEBUG] Span  - Selecting next element for next round", repr(element), file=sys.stderr)

                            if done or not match:
                                if debug: print("[FQL EVALUATION DEBUG] Span  - Done with span selector " + str(i), repr(element), file=sys.stderr)
                                break

                        if not match: break

                if match:
                    if debug: print("[FQL EVALUATION DEBUG] Span  - Span found, returning spanset (" + repr(spanset) + ")",file=sys.stderr)
                    yield spanset
                else:
                    if debug: print("[FQL EVALUATION DEBUG] Span  - Span not found",file=sys.stderr)




class Target(object): #FOR/IN... expression
    def __init__(self, targets, strict=False,nested = None, start=None, end=None,endinclusive=True,repeat=False):
        self.targets = targets #Selector instances
        self.strict = strict #True for IN
        self.nested = nested #in a nested another target
        self.start = start
        self.end = end
        self.endinclusive = endinclusive
        self.repeat = repeat


    @staticmethod
    def parse(q, i=0):
        if q.kw(i,'FOR'):
            strict = False
        elif q.kw(i,'IN'):
            strict = True
        else:
            raise SyntaxError("Expected target expression, got " + str(q[i]) + " in: " + str(q))
        i += 1

        targets = []
        nested = None
        start = end = None
        endinclusive = True
        repeat = False
        l = len(q)
        while i < l:
            if q.kw(i,'SPAN'):
                target,i = Span.parse(q,i+1)
                targets.append(target)
            elif q.kw(i,"ID") or q[i] in folia.XML2CLASS or q[i] == "ALL":
                target,i = Selector.parse(q,i)
                targets.append(target)
            elif q.kw(i,","):
                #we're gonna have more targets
                i += 1
            elif q.kw(i, ('FOR','IN')):
                nested,i = Selector.parse(q,i+1)
            elif q.kw(i,"START"):
                start,i = Selector.parse(q,i+1)
            elif q.kw(i,("END","ENDAFTER")): #inclusive
                end,i = Selector.parse(q,i+1)
                endinclusive = True
            elif q.kw(i,"ENDBEFORE"): #exclusive
                end,i = Selector.parse(q,i+1)
                endinclusive = False
            elif q.kw(i,"REPEAT"):
                repeat = True
                i += 1
            else:
                break

        if not targets:
            raise SyntaxError("Expected one or more targets, got " + str(q[i]) + " in: " + str(q))

        return Target(targets,strict,nested,start,end,endinclusive, repeat), i


    def __call__(self, query, contextselector, recurse, debug=False): #generator, lazy evaluation!
        if self.nested:
            if debug: print("[FQL EVALUATION DEBUG] Target - Deferring to nested target first",file=sys.stderr)
            contextselector = (self.nested, (query, contextselector, not self.strict))

        if debug: print("[FQL EVALUATION DEBUG] Target - Chaining and calling target selectors (" + str(len(self.targets)) + ")",file=sys.stderr)

        if self.targets:
            if isinstance(self.targets[0], Span):
                for span in self.targets:
                    if not isinstance(span, Span): raise QueryError("SPAN statement may not be mixed with non-span statements in a single selection")
                    if debug: print("[FQL EVALUATION DEBUG] Target - Evaluation span ",file=sys.stderr)
                    for spanset in span(query, contextselector, recurse, debug):
                        if debug: print("[FQL EVALUATION DEBUG] Target - Yielding spanset ",file=sys.stderr)
                        yield spanset
            else:
                selector = self.targets[0]
                selector.chain(self.targets)

                started = (self.start is None)
                dobreak = False

                for e,_ in selector(query, contextselector, recurse, debug):
                    if not started:
                        if self.start.match(query, e):
                            if debug: print("[FQL EVALUATION DEBUG] Target - Matched start! Starting from here...",e, file=sys.stderr)
                            started = True
                    if started:
                        if self.end:
                            if self.end.match(query, e):
                                if not self.endinclusive:
                                    if debug: print("[FQL EVALUATION DEBUG] Target - Matched end! Breaking before yielding...",e, file=sys.stderr)
                                    started = False
                                    if self.repeat:
                                        continue
                                    else:
                                        break
                                else:
                                    if debug: print("[FQL EVALUATION DEBUG] Target - Matched end! Breaking after yielding...",e, file=sys.stderr)
                                    started = False
                                    dobreak = True
                        if debug: print("[FQL EVALUATION DEBUG] Target - Yielding  ",repr(e), file=sys.stderr)
                        yield e
                        if dobreak and not self.repeat:
                            break






class Alternative(object):  #AS ALTERNATIVE ... expression
    def __init__(self, subassignments={},assignments={},filter=None, nextalternative=None):
        self.subassignments = subassignments
        self.assignments = assignments
        self.filter = filter
        self.nextalternative = nextalternative

    @staticmethod
    def parse(q,i=0):
        if q.kw(i,'AS') and q[i+1] == "ALTERNATIVE":
            i += 1

        subassignments = {}
        assignments = {}
        filter = None

        if q.kw(i,'ALTERNATIVE'):
            i += 1
            if not q.kw(i,'WITH'):
                i = getassignments(q, i, subassignments)
            if q.kw(i,'WITH'):
                i = getassignments(q, i+1,  assignments)
            if q.kw(i,'WHERE'):
                filter, i = Filter.parse(q, i+1)
        else:
            raise SyntaxError("Expected ALTERNATIVE, got " + str(q[i]) + " in: " + str(q))

        if q.kw(i,'ALTERNATIVE'):
            #we have another!
            nextalternative,i  = Alternative.parse(q,i)
        else:
            nextalternative = None

        return Alternative(subassignments, assignments, filter, nextalternative), i

    def __call__(self, query, action, focus, target,debug=False):
        """Action delegates to this function"""
        isspan = isinstance(action.focus.Class, folia.AbstractSpanAnnotation)

        subassignments = {} #make a copy
        for key, value in action.assignments.items():
            subassignments[key] = value
        for key, value in self.subassignments.items():
            subassignments[key] = value

        if action.action == "SELECT":
            if not focus: raise QueryError("SELECT requires a focus element")
            if not isspan:
                for alternative in focus.alternatives(action.focus.Class, focus.set):
                    if not self.filter or (self.filter and self.filter.match(query, alternative, debug)):
                        yield alternative
            else:
                raise NotImplementedError("Selecting alternative span not implemented yet")
        elif action.action == "EDIT" or action.action == "ADD":
            if not isspan:
                if focus:
                    parent = focus.ancestor(folia.AbstractStructureElement)
                    alternative = folia.Alternative( query.doc, action.focus.Class( query.doc , **subassignments), **self.assignments)
                    parent.append(alternative)
                    yield alternative
                else:
                    alternative = folia.Alternative( query.doc, action.focus.Class( query.doc , **subassignments), **self.assignments)
                    target.append(alternative)
                    yield alternative
            else:
                raise NotImplementedError("Editing alternative span not implemented yet")
        else:
            raise QueryError("Alternative does not handle action " + action.action)


    def autodeclare(self, doc):
        pass #nothing to declare

    def substitute(self, *args):
        raise QueryError("SUBSTITUTE not supported with AS ALTERNATIVE")

class Correction(object): #AS CORRECTION/SUGGESTION expression...
    def __init__(self, set,actionassignments={}, assignments={},filter=None,suggestions=[], bare=False):
        self.set = set
        self.actionassignments = actionassignments #the assignments in the action
        self.assignments = assignments #the assignments for the correction
        self.filter = filter
        self.suggestions = suggestions # [ (subassignments, suggestionassignments) ]
        self.bare = bare

    @staticmethod
    def parse(q,i, focus):
        if q.kw(i,'AS') and q.kw(i+1,'CORRECTION'):
            i += 1
            bare = False
        if q.kw(i,'AS') and q.kw(i+1,'BARE') and q.kw(i+2,'CORRECTION'):
            bare = True
            i += 2

        set = None
        actionassignments = {}
        assignments = {}
        filter = None
        suggestions = []

        if q.kw(i,'CORRECTION'):
            i += 1
            if q.kw(i,'OF') and q[i+1]:
                set = q[i+1]
                i += 2
            if not q.kw(i,'WITH'):
                i = getassignments(q, i, actionassignments, focus)
            if q.kw(i,'WHERE'):
                filter, i = Filter.parse(q, i+1)
            if q.kw(i,'WITH'):
                i = getassignments(q, i+1,  assignments)
        else:
            raise SyntaxError("Expected CORRECTION, got " + str(q[i]) + " in: " + str(q))

        l = len(q)
        while i < l:
            if q.kw(i,'SUGGESTION'):
                i+= 1
                suggestion = ( {}, {} ) #subassignments, suggestionassignments
                if isinstance(q[i], UnparsedQuery):
                    if not q[i].kw(0,'SUBSTITUTE') and not q[i].kw(0,'ADD'):
                        raise SyntaxError("Subexpression after SUGGESTION, expected ADD or SUBSTITUTE, got " + str(q[i]))
                    Correction.parsesubstitute(q[i],suggestion)
                    i += 1
                elif q.kw(i,'MERGE') or q.kw(i,'SPLIT'):
                    if q.kw(i,'MERGE'):
                        suggestion[1]['merge'] = True
                    else:
                        suggestion[1]['split'] = True
                    i+= 1
                    if q.kw(i,'DELETION'): #No need to do anything, DELETION is just to make things more explicit in the syntax, it will result in an empty suggestion
                        i+=1
                    elif isinstance(q[i], UnparsedQuery):
                        if not q[i].kw(0,'SUBSTITUTE') and not q[i].kw(0,'ADD'):
                            raise SyntaxError("Subexpression after SUGGESTION, expected ADD or SUBSTITUTE, got " + str(q[i]))
                        Correction.parsesubstitute(q[i],suggestion)
                        i += 1
                    elif not q.kw(i,'WITH'):
                        i = getassignments(q, i, suggestion[0], focus) #subassignments (the actual element in the suggestion)
                elif not q.kw(i,'WITH'):
                    i = getassignments(q, i, suggestion[0], focus) #subassignments (the actual element in the suggestion)
                if q.kw(i,'WITH'):
                    i = getassignments(q, i+1, suggestion[1]) #assignments for the suggestion
                suggestions.append(suggestion)
            else:
                raise SyntaxError("Expected SUGGESTION or end of AS clause, got " + str(q[i]) + " in: " + str(q))

        return Correction(set, actionassignments, assignments, filter, suggestions, bare), i

    @staticmethod
    def parsesubstitute(q,suggestion):
        suggestion[0]['substitute'],_ = Action.parse(q)

    def __call__(self, query, action, focus, target,debug=False):
        """Action delegates to this function"""
        if debug: print("[FQL EVALUATION DEBUG] Correction - Processing ", repr(focus),file=sys.stderr)

        isspan = isinstance(action.focus.Class, folia.AbstractSpanAnnotation)


        actionassignments = {} #make a copy
        for key, value in action.assignments.items():
            if key == 'class': key = 'cls'
            actionassignments[key] = value
        for key, value in self.actionassignments.items():
            if key == 'class': key = 'cls'
            actionassignments[key] = value

        if actionassignments:
            if (not 'set' in actionassignments or actionassignments['set'] is None) and action.focus.Class:
                try:
                    actionassignments['set'] = query.defaultsets[action.focus.Class.XMLTAG]
                except KeyError:
                    actionassignments['set'] = query.doc.defaultset(action.focus.Class)
            if action.focus.Class.REQUIRED_ATTRIBS and folia.Attrib.ID in action.focus.Class.REQUIRED_ATTRIBS:
                actionassignments['id'] = getrandomid(query, "corrected." + action.focus.Class.XMLTAG + ".")

        kwargs = {}
        if self.set:
            kwargs['set'] = self.set

        for key, value in self.assignments.items():
            if key == 'class': key = 'cls'
            kwargs[key] = value

        if action.action == "SELECT":
            if not focus: raise QueryError("SELECT requires a focus element")
            correction = focus.incorrection()
            if correction:
                if not self.filter or (self.filter and self.filter.match(query, correction, debug)):
                    yield correction
        elif action.action in ("EDIT","ADD","PREPEND","APPEND"):
            if focus:
                correction = focus.incorrection()
            else:
                correction = False

            inheritchildren = []
            if focus and not self.bare: #copy all data within
                inheritchildren = list(focus.copychildren(query.doc, True))
                if action.action == "EDIT" and action.span: #respan
                    #delete all word references from the copy first, we will add new ones
                    inheritchildren = [ c  for c in inheritchildren if not isinstance(c, folia.WordReference) ]
                    if not isinstance(focus, folia.AbstractSpanAnnotation): raise QueryError("Can only perform RESPAN on span annotation elements!")
                    contextselector = target if target else query.doc
                    spanset = next(action.span(query, contextselector, True, debug)) #there can be only one
                    for w in spanset:
                        inheritchildren.append(w)

            if actionassignments:
                kwargs['new'] = action.focus.Class(query.doc,*inheritchildren, **actionassignments)
                if focus and action.action not in ('PREPEND','APPEND'):
                    kwargs['original'] = focus
                #TODO: if not bare, fix all span annotation references to this element
            elif focus and action.action not in ('PREPEND','APPEND'):
                if isinstance(focus, folia.AbstractStructureElement):
                    kwargs['current'] = focus #current only needed for structure annotation
                if correction and (not 'set' in kwargs or correction.set == kwargs['set']) and (not 'cls' in kwargs or correction.cls == kwargs['cls']): #reuse the existing correction element
                    print("Reusing " + correction.id,file=sys.stderr)
                    kwargs['reuse'] = correction

            if action.action in ('PREPEND','APPEND'):
                #get parent relative to target
                parent = target.ancestor( (folia.AbstractStructureElement, folia.AbstractSpanAnnotation, folia.AbstractAnnotationLayer) )
            elif focus:
                if 'reuse' in kwargs and kwargs['reuse']:
                    parent = focus.ancestor( (folia.AbstractStructureElement, folia.AbstractSpanAnnotation, folia.AbstractAnnotationLayer) )
                else:
                    parent = focus.ancestor( (folia.AbstractStructureElement, folia.AbstractSpanAnnotation, folia.AbstractAnnotationLayer, folia.Correction) )
            else:
                parent = target

            if 'id' not in kwargs and 'reuse' not in kwargs:
                kwargs['id'] = parent.generate_id(folia.Correction)

            kwargs['suggestions'] = []
            for subassignments, suggestionassignments in self.suggestions:
                subassignments = copy(subassignments) #assignment for the element in the suggestion
                for key, value in action.assignments.items():
                    if not key in subassignments:
                        if key == 'class': key = 'cls'
                        subassignments[key] = value
                if (not 'set' in subassignments or subassignments['set'] is None) and action.focus.Class:
                    try:
                        subassignments['set'] = query.defaultsets[action.focus.Class.XMLTAG]
                    except KeyError:
                        subassignments['set'] = query.doc.defaultset(action.focus.Class)
                if focus and not self.bare: #copy all data within (we have to do this again for each suggestion as it will generate different ID suffixes)
                    inheritchildren = list(focus.copychildren(query.doc, True))
                if action.focus.Class.REQUIRED_ATTRIBS and folia.Attrib.ID in action.focus.Class.REQUIRED_ATTRIBS:
                    subassignments['id'] = getrandomid(query, "suggestion.")
                kwargs['suggestions'].append( folia.Suggestion(query.doc, action.focus.Class(query.doc, *inheritchildren,**subassignments), **suggestionassignments )   )

            if action.action == 'PREPEND':
                index = parent.getindex(target,True) #recursive
                if index == -1:
                    raise QueryError("Insertion point for PREPEND action not found")
                kwargs['insertindex'] = index
                kwargs['nooriginal'] = True
            elif action.action == 'APPEND':
                index = parent.getindex(target,True) #recursive
                if index == -1:
                    raise QueryError("Insertion point for APPEND action not found")
                kwargs['insertindex'] = index+1
                kwargs['insertindex_offset'] = 1 #used by correct if it needs to recompute the index
                kwargs['nooriginal'] = True

            yield parent.correct(**kwargs) #generator
        elif action.action == "DELETE":
            if debug: print("[FQL EVALUATION DEBUG] Correction - Deleting ", repr(focus), " (in " + repr(focus.parent) + ")",file=sys.stderr)
            if not focus: raise QueryError("DELETE AS CORRECTION did not find a focus to operate on")
            kwargs['original'] = focus
            kwargs['new'] = [] #empty new
            c = focus.parent.correct(**kwargs) #generator
            yield c
        else:
            raise QueryError("Correction does not handle action " + action.action)


    def autodeclare(self,doc):
        if self.set:
            if not doc.declared(folia.Correction, self.set):
                doc.declare(folia.Correction, self.set)

    def prepend(self, query, content, contextselector, debug):
        return self.insert(query, content, contextselector, 0, debug)

    def append(self, query, content, contextselector, debug):
        return self.insert(query, content, contextselector, 1, debug)

    def insert(self,  query, content, contextselector, offset, debug):
        kwargs = {}
        if self.set:
            kwargs['set'] = self.set
        for key, value in self.assignments.items():
            if key == 'class': key = 'cls'
            kwargs[key] = value
        self.autodeclare(query.doc)

        if not content:
            #suggestions only, no subtitution obtained from main action yet, we have to process it still
            if debug: print("[FQL EVALUATION DEBUG] Correction.insert - Initialising for suggestions only",file=sys.stderr)
            if isinstance(contextselector,tuple) and len(contextselector) == 2:
                contextselector = contextselector[0](*contextselector[1])
            target = list(contextselector)[0] #not a spanset


            insertindex = 0
            #find insertion index:
            if debug: print("[FQL EVALUATION DEBUG] Correction.insert - Finding insertion index for target ", repr(target), "  in ", repr(target.parent),file=sys.stderr)
            for i, e in enumerate(target.parent):
                if e is target:
                    if debug: print("[FQL EVALUATION DEBUG] Correction.insert - Target ", repr(target), " found in ", repr(target.parent), " at index ", i,file=sys.stderr)
                    insertindex = i
                    break

            content = {'parent': target.parent,'new':[]}
            kwargs['insertindex'] = insertindex + offset
        else:
            kwargs['insertindex'] = content['index'] + offset
            if debug: print("[FQL EVALUATION DEBUG] Correction.insert - Initialising correction",file=sys.stderr)
            kwargs['new'] = [] #stuff will be appended

        kwargs['nooriginal'] = True #this is an insertion, there is no original
        kwargs = self.assemblesuggestions(query,content,debug,kwargs)

        if debug: print("[FQL EVALUATION DEBUG] Correction.insert - Applying and returning correction ", repr(kwargs),file=sys.stderr)
        return content['parent'].correct(**kwargs)

    def substitute(self, query, substitution, contextselector, debug):
        kwargs = {}
        if self.set:
            kwargs['set'] = self.set
        for key, value in self.assignments.items():
            if key == 'class': key = 'cls'
            kwargs[key] = value
        self.autodeclare(query.doc)


        if not substitution:
            #suggestions only, no subtitution obtained from main action yet, we have to process it still
            if debug: print("[FQL EVALUATION DEBUG] Correction.substitute - Initialising for suggestions only",file=sys.stderr)
            if isinstance(contextselector,tuple) and len(contextselector) == 2:
                contextselector = contextselector[0](*contextselector[1])
            target = list(contextselector)[0]
            if not isinstance(target, SpanSet):
                raise QueryError("SUBSTITUTE expects target SPAN")

            prev = target[0].parent
            for e in target[1:]:
                if e.parent != prev:
                    raise QueryError("SUBSTITUTE can only be performed when the target items share the same parent. First parent is " + repr(prev) + ", parent of " + repr(e) + " is " + repr(e.parent))

            insertindex = 0
            #find insertion index:
            for i, e in enumerate(target[0].parent):
                if e is target[0]:
                    insertindex = i
                    break

            substitution = {'parent': target[0].parent,'new':[]}
            kwargs['insertindex'] = insertindex
            kwargs['current'] = target
        else:
            kwargs['insertindex'] = substitution['index']
            kwargs['original'] =  substitution['span']
            if debug: print("[FQL EVALUATION DEBUG] Correction.substitute - Initialising correction",file=sys.stderr)
            kwargs['new'] = [] #stuff will be appended

        kwargs = self.assemblesuggestions(query,substitution,debug,kwargs)

        if debug: print("[FQL EVALUATION DEBUG] Correction.substitute - Applying and returning correction",file=sys.stderr)
        return substitution['parent'].correct(**kwargs)


    def assemblesuggestions(self, query, substitution, debug, kwargs):
        if self.suggestions:
            kwargs['suggestions'] = [] #stuff will be appended

        for i, (Class, actionassignments, subactions) in enumerate(substitution['new']):
            if actionassignments:
                if (not 'set' in actionassignments or actionassignments['set'] is None):
                    try:
                        actionassignments['set'] = query.defaultsets[Class.XMLTAG]
                    except KeyError:
                        actionassignments['set'] = query.doc.defaultset(Class)
            actionassignments['id'] = "corrected.%08x" % random.getrandbits(32) #generate a random ID
            e = Class(query.doc, **actionassignments)
            if debug: print("[FQL EVALUATION DEBUG] Correction.assemblesuggestions - Adding to new",file=sys.stderr)
            kwargs['new'].append(e)
            for subaction in subactions:
                subaction.focus.autodeclare(query.doc)
                if debug: print("[FQL EVALUATION DEBUG] Correction.assemblesuggestions - Invoking subaction", subaction.action,file=sys.stderr)
                subaction(query, [e], debug ) #note: results of subactions will be silently discarded

        for subassignments, suggestionassignments in self.suggestions:
            suggestionchildren = []
            if 'substitute' in subassignments:
                #SUBTITUTE (or synonym ADD)
                action = subassignments['substitute']
                del subassignments['substitute']
            else:
                #we have a suggested deletion
                action = None
            if debug: print("[FQL EVALUATION DEBUG] Correction.assemblesuggestions - Adding suggestion",file=sys.stderr)
            while action:
                subassignments = copy(subassignments) #assignment for the element in the suggestion
                if isinstance(action.focus, tuple) and len(action.focus) == 2:
                    action.focus = action.focus[0]
                for key, value in action.assignments.items():
                    if key == 'class': key = 'cls'
                    subassignments[key] = value
                if (not 'set' in subassignments or subassignments['set'] is None) and action.focus.Class:
                    try:
                        subassignments['set'] = query.defaultsets[action.focus.Class.XMLTAG]
                    except KeyError:
                        subassignments['set'] = query.doc.defaultset(action.focus.Class)
                focus = action.focus
                focus.autodeclare(query.doc)
                if focus.Class.REQUIRED_ATTRIBS and folia.Attrib.ID in focus.Class.REQUIRED_ATTRIBS:
                    subassignments['id'] = getrandomid(query, "suggestion.")
                suggestionchildren.append( focus.Class(query.doc, **subassignments))
                action = action.nextaction

            if debug: print("[FQL EVALUATION DEBUG] Correction.assemblesuggestions - Suggestionchildren: ", len(suggestionchildren),file=sys.stderr)

            if 'split' in suggestionassignments and suggestionassignments['split']:
                nextitem = substitution['parent'].next(substitution['parent'].__class__, None)
                if nextitem:
                    suggestionassignments['split'] = nextitem.id
                else:
                    del suggestionassignments['split']
            if 'merge' in suggestionassignments and suggestionassignments['merge']:
                nextitem = substitution['parent'].next(substitution['parent'].__class__, None)
                if nextitem:
                    suggestionassignments['merge'] = nextitem.id
                else:
                    del suggestionassignments['merge']
            kwargs['suggestions'].append( folia.Suggestion(query.doc,*suggestionchildren, **suggestionassignments )   )

        return kwargs


def getassignments(q, i, assignments,  focus=None):
    l = len(q)
    while i < l:
        if q.kw(i, ('id','set','subset','annotator','class','n')):
            if q[i+1] == 'NONE':
                assignments[q[i]] = None
            else:
                assignments[q[i]] = q[i+1]
            i+=2
        elif q.kw(i,'confidence'):
            if q[i+1] == 'NONE':
                assignments[q[i]] = None
            else:
                try:
                    assignments[q[i]] = float(q[i+1])
                except:
                    raise SyntaxError("Invalid value for confidence: " + str(q[i+1]))
            i+=2
        elif q.kw(i,'annotatortype'):
            if q[i+1] == "auto":
                assignments[q[i]] = folia.AnnotatorType.AUTO
            elif q[i+1] == "manual":
                assignments[q[i]] = folia.AnnotatorType.MANUAL
            elif q[i+1] == "NONE":
                assignments[q[i]] = None
            else:
                raise SyntaxError("Invalid value for annotatortype: " + str(q[i+1]))
            i+=2
        elif q.kw(i,('text','value','phon')):
            if not focus is None and focus.Class in (folia.TextContent, folia.Description, folia.Comment):
                key = 'value'
            elif not focus is None and focus.Class is folia.PhonContent:
                key = 'phon'
            else:
                key = 'text'
            assignments[key] = q[i+1]
            i+=2
        elif q.kw(i, 'datetime'):
            if q[i+1] == "now":
                assignments[q[i]] = datetime.datetime.now()
            elif q[i+1] == "NONE":
                assignments[q[i]] = None
            elif q[i+1].isdigit():
                try:
                    assignments[q[i]] = datetime.datetime.fromtimestamp(q[i+1])
                except:
                    raise SyntaxError("Unable to parse datetime: " + str(q[i+1]))
            else:
                try:
                    assignments[q[i]] = datetime.strptime("%Y-%m-%dT%H:%M:%S")
                except:
                    raise SyntaxError("Unable to parse datetime: " + str(q[i+1]))
            i += 2
        else:
            if not assignments:
                raise SyntaxError("Expected assignments after WITH statement, but no valid attribute found, got  " + str(q[i]) + " at position " + str(i) + " in: " +  str(q))
            break
    return i

class Action(object): #Action expression
    def __init__(self, action, focus, assignments={}):
        self.action = action
        self.focus = focus #Selector
        self.assignments = assignments
        self.form = None
        self.subactions = []
        self.nextaction = None
        self.span = None #encodes an extra SPAN/RESPAN action


    @staticmethod
    def parse(q,i=0):
        if q.kw(i, ('SELECT','EDIT','DELETE','ADD','APPEND','PREPEND','SUBSTITUTE')):
            action = q[i]
        else:
            raise SyntaxError("Expected action, got " + str(q[i]) + " in: " + str(q))

        assignments = {}

        i += 1
        if (action in ('SUBSTITUTE','APPEND','PREPEND')) and (isinstance(q[i],UnparsedQuery)):
            focus = None   #We have a SUBSTITUTE/APPEND/PREPEND (AS CORRECTION) expression
        elif (action == 'SELECT') and q.kw(i,('FOR','IN')): #select statement  without focus, pure target
            focus = None
        else:
            focus, i = Selector.parse(q,i)

            if action == "ADD" and focus.filter:
                raise SyntaxError("Focus has WHERE statement but ADD action does not support this")

            if q.kw(i,"WITH"):
                if action in ("SELECT", "DELETE"):
                    raise SyntaxError("Focus has WITH statement but " + action + " does not support this: " +str(q))
                i += 1
                i = getassignments(q,i ,assignments, focus)

        #we have enough to set up the action now
        action = Action(action, focus, assignments)

        if action.action in ("EDIT","ADD", "APPEND","PREPEND") and q.kw(i,("RESPAN","SPAN")):
            action.span, i = Span.parse(q,i+1)
        if action.action == "DELETE" and q.kw(i,("RESTORE")):
            action.restore = q[i+1].upper()
            i += 2
        else:
            action.restore = None

        done = False
        while not done:
            if isinstance(q[i], UnparsedQuery):
                #we have a sub expression
                if q[i].kw(0, ('EDIT','DELETE','ADD')):
                    #It's a sub-action!
                    if action.action in ("DELETE"):
                        raise SyntaxError("Subactions are not allowed for action " + action.action + ", in: " + str(q))
                    subaction, _ = Action.parse(q[i])
                    action.subactions.append( subaction )
                elif q[i].kw(0, 'AS'):
                    if q[i].kw(1, "ALTERNATIVE"):
                        action.form,_ = Alternative.parse(q[i])
                    elif q[i].kw(1, "CORRECTION") or (q[i].kw(1,"BARE") and q[i].kw(2, "CORRECTION")):
                        action.form,_ = Correction.parse(q[i],0,action.focus)
                    else:
                        raise SyntaxError("Invalid keyword after AS: " + str(q[i][1]))
                i+=1
            else:
                done = True


        if q.kw(i, ('SELECT','EDIT','DELETE','ADD','APPEND','PREPEND','SUBSTITUTE')):
            #We have another action!
            action.nextaction, i = Action.parse(q,i)

        return action, i


    def __call__(self, query, contextselector, debug=False):
        """Returns a list focusselection after having performed the desired action on each element therein"""

        #contextselector is a two-tuple function recipe (f,args), so we can reobtain the generator which it returns

        #select all focuss, not lazy because we are going return them all by definition anyway


        if debug: print("[FQL EVALUATION DEBUG] Action - Preparing to evaluate action chain starting with ", self.action,file=sys.stderr)

        #handles all actions further in the chain, not just this one!!! This actual method is only called once
        actions = [self]
        a = self
        while a.nextaction:
            actions.append(a.nextaction)
            a = a.nextaction

        if len(actions) > 1:
            #multiple actions to perform, apply contextselector once and load in memory    (will be quicker at higher memory cost, proportionate to the target selection size)
            if isinstance(contextselector, tuple) and len(contextselector) == 2:
                contextselector = list(contextselector[0](*contextselector[1]))
            focusselection_all = []
            constrainedtargetselection_all = []

        for action in actions:
            if action.action != "SELECT" and action.focus:
                #check if set is declared, if not, auto-declare
                if debug: print("[FQL EVALUATION DEBUG] Action - Auto-declaring ",action.focus.Class.__name__, " of ", str(action.focus.set),file=sys.stderr)
                action.focus.autodeclare(query.doc)

            if action.form and isinstance(action.form, Correction) and action.focus:
                if debug: print("[FQL EVALUATION DEBUG] Action - Auto-declaring ",action.focus.Class.__name__, " of ", str(action.focus.set),file=sys.stderr)
                action.form.autodeclare(query.doc)


        substitution = {}
        if self.action == 'SUBSTITUTE' and not self.focus and self.form:
            #we have a SUBSTITUTE (AS CORRECTION) statement with no correction but only suggestions
            #defer substitute to form
            result = self.form.substitute(query, None, contextselector, debug)
            focusselection = [result]
            constrainedtargetselection = []
            #(no further chaining possible in this setup)
        elif self.action == 'PREPEND' and not self.focus and self.form:
            #we have a PREPEND (AS CORRECTION) statement with no correction but only suggestions
            #defer substitute to form
            result = self.form.prepend(query, None, contextselector, debug)
            focusselection = [result]
            constrainedtargetselection = []
            #(no further chaining possible in this setup)
        elif self.action == 'APPEND' and not self.focus and self.form:
            #we have a APPEND (AS CORRECTION) statement with no correction but only suggestions
            #defer substitute to form
            result = self.form.append(query, None, contextselector, debug)
            focusselection = [result]
            constrainedtargetselection = []
            #(no further chaining possible in this setup)
        else:

            for action in actions:
                if debug: print("[FQL EVALUATION DEBUG] Action - Evaluating action ", action.action,file=sys.stderr)
                focusselection = []
                constrainedtargetselection = [] #selecting focus elements constrains the target selection
                processed_form = []

                if substitution and action.action != "SUBSTITUTE":
                    raise QueryError("SUBSTITUTE can not be chained with " + action.action)

                if action.action == "SELECT" and not action.focus: #SELECT without focus, pure target-select
                    if isinstance(contextselector, tuple) and len(contextselector) == 2:
                        for e in contextselector[0](*contextselector[1]):
                            constrainedtargetselection.append(e)
                            focusselection.append(e)
                    else:
                        for e in contextselector:
                            constrainedtargetselection.append(e)
                            focusselection.append(e)

                elif action.action not in ("ADD","APPEND","PREPEND"): #only for actions that operate on an existing focus
                    if contextselector is query.doc and action.focus.Class in ('ALL',folia.Text):
                        focusselector = ( (x,x) for x in query.doc )  #Patch to make root-level SELECT ALL work as intended
                    else:
                        strict = query.targets and query.targets.strict
                        focusselector = action.focus(query,contextselector, not strict, debug)
                    if debug: print("[FQL EVALUATION DEBUG] Action - Obtaining focus...",file=sys.stderr)
                    for focus, target in focusselector:
                        if target and action.action != "SUBSTITUTE":
                            if isinstance(target, SpanSet):
                                if not target.partof(constrainedtargetselection):
                                    if debug: print("[FQL EVALUATION DEBUG] Action - Got target result (spanset), adding ", repr(target),file=sys.stderr)
                                    constrainedtargetselection.append(target)
                            elif not any(x is target for x in constrainedtargetselection):
                                if debug: print("[FQL EVALUATION DEBUG] Action - Got target result, adding ", repr(target),file=sys.stderr)
                                constrainedtargetselection.append(target)


                        if action.form and action.action != "SUBSTITUTE":
                            #Delegate action to form (= correction or alternative)
                            if not any(x is focus for x in  processed_form):
                                if debug: print("[FQL EVALUATION DEBUG] Action - Got focus result, processing using form ", repr(focus),file=sys.stderr)
                                processed_form.append(focus)
                                focusselection += list(action.form(query, action,focus,target,debug))
                            else:
                                if debug: print("[FQL EVALUATION DEBUG] Action - Focus result already obtained, skipping... ", repr(focus),file=sys.stderr)
                                continue
                        else:
                            if isinstance(focus,SpanSet):
                                if not focus.partof(focusselection):
                                    if debug: print("[FQL EVALUATION DEBUG] Action - Got focus result (spanset), adding ", repr(target),file=sys.stderr)
                                    focusselection.append(target)
                                else:
                                    if debug: print("[FQL EVALUATION DEBUG] Action - Focus result (spanset) already obtained, skipping... ", repr(target),file=sys.stderr)
                                    continue
                            elif not any(x is focus for x in  focusselection):
                                if debug: print("[FQL EVALUATION DEBUG] Action - Got focus result, adding ", repr(focus),file=sys.stderr)
                                focusselection.append(focus)
                            else:
                                if debug: print("[FQL EVALUATION DEBUG] Action - Focus result already obtained, skipping... ", repr(focus),file=sys.stderr)
                                continue

                            if action.action == "EDIT":
                                if debug: print("[FQL EVALUATION DEBUG] Action - Applying EDIT to focus ", repr(focus),file=sys.stderr)
                                for attr, value in action.assignments.items():
                                    if attr in ("text","value","phon"):
                                        if isinstance(focus, (folia.Description, folia.Comment, folia.Content)):
                                            if debug: print("[FQL EVALUATION DEBUG] Action - setting value ("+ value+ ") on focus ", repr(focus),file=sys.stderr)
                                            focus.value = value
                                        elif isinstance(focus, (folia.PhonContent)):
                                            if debug: print("[FQL EVALUATION DEBUG] Action - setphon("+ value+ ") on focus ", repr(focus),file=sys.stderr)
                                            focus.setphon(value)
                                        else:
                                            if debug: print("[FQL EVALUATION DEBUG] Action - settext("+ value+ ") on focus ", repr(focus),file=sys.stderr)
                                            focus.settext(value)
                                    elif attr == "class":
                                        if debug: print("[FQL EVALUATION DEBUG] Action - " + attr +  " = " + value + " on focus ", repr(focus),file=sys.stderr)
                                        focus.cls = value
                                    else:
                                        if debug: print("[FQL EVALUATION DEBUG] Action - " + attr +  " = " + value + " on focus ", repr(focus),file=sys.stderr)
                                        setattr(focus, attr, value)
                                if action.span is not None: #respan
                                    if not isinstance(focus, folia.AbstractSpanAnnotation): raise QueryError("Can only perform RESPAN on span annotation elements!")
                                    spanset = next(action.span(query, contextselector, True, debug)) #there can be only one
                                    focus.setspan(*spanset)

                                query._touch(focus)
                            elif action.action == "DELETE":
                                if debug: print("[FQL EVALUATION DEBUG] Action - Applying DELETE to focus ", repr(focus),file=sys.stderr)
                                p = focus.parent
                                if action.restore == "ORIGINAL":
                                    index = p.getindex(focus, False, False)
                                    if not isinstance(focus, folia.Correction):
                                        raise QueryError("RESTORE ORIGINAL can only be performed when the focus is a correction")
                                    #restore original
                                    for original in reversed(focus.original()):
                                        if debug: print("[FQL EVALUATION DEBUG] Action - Restoring original: ", repr(original),file=sys.stderr)
                                        original.parent = p
                                        p.insert(index, original)
                                p.remove(focus)
                                #we set the parent back on the element we return, so return types like ancestor-focus work
                                focus.parent = p
                            elif action.action == "SUBSTITUTE":
                                if debug: print("[FQL EVALUATION DEBUG] Action - Applying SUBSTITUTE to target ", repr(focus),file=sys.stderr)
                                if not isinstance(target,SpanSet) or not target: raise QueryError("SUBSTITUTE requires a target SPAN")
                                focusselection.remove(focus)

                                if not substitution:
                                    #this is the first SUBSTITUTE in a chain
                                    prev = target[0].parent
                                    for e in target[1:]:
                                        if e.parent != prev:
                                            raise QueryError("SUBSTITUTE can only be performed when the target items share the same parent")

                                    substitution['parent'] = target[0].parent
                                    substitution['index'] = 0
                                    substitution['span'] = target
                                    substitution['new'] = []

                                    #find insertion index:
                                    for i, e in enumerate(target[0].parent):
                                        if e is target[0]:
                                            substitution['index'] = i
                                            break

                                substitution['new'].append( (action.focus.Class, action.assignments, action.subactions)  )



                if action.action in ("ADD","APPEND","PREPEND") or (action.action == "EDIT" and not focusselection):
                    if debug: print("[FQL EVALUATION DEBUG] Action - Applying " + action.action + " to targets",file=sys.stderr)
                    if not action.focus.Class:
                        raise QueryError("Focus of action has no class!")

                    isspan = issubclass(action.focus.Class, folia.AbstractSpanAnnotation)
                    isspanrole = issubclass(action.focus.Class, folia.AbstractSpanRole)
                    if 'set' not in action.assignments and action.focus.Class not in (folia.Description, folia.Comment, folia.Feature) and not isspanrole:
                        if action.focus.set and action.focus.set != "undefined":
                            action.assignments['set'] = action.focus.set
                        elif action.focus.Class.XMLTAG in query.defaultsets:
                            action.assignments['set'] = action.focus.set = query.defaultsets[action.focus.Class.XMLTAG]
                        else:
                            action.assignments['set'] = action.focus.set = query.doc.defaultset(action.focus.Class)


                    if isinstance(contextselector, tuple) and len(contextselector) == 2:
                        targetselection = contextselector[0](*contextselector[1])
                    else:
                        targetselection = contextselector

                    for target in targetselection:
                        if action.form:
                            #Delegate action to form (= correction or alternative)
                            focusselection += list( action.form(query, action,None,target,debug) )
                        else:
                            if isinstance(target, SpanSet):
                                if action.action == "ADD" or action.action == "EDIT":
                                    if debug: print("[FQL EVALUATION DEBUG] Action - Applying " + action.action + " of " + action.focus.Class.__name__ + " to target spanset " + repr(target),file=sys.stderr)
                                    if action.span is not None and len(action.span) == 0:
                                        action.assignments['emptyspan'] = True
                                    focusselection.append( target[0].add(action.focus.Class, *target, **action.assignments) ) #handles span annotation too
                                    query._touch(focusselection[-1])
                            else:
                                if action.action == "ADD" or action.action == "EDIT":
                                    if debug: print("[FQL EVALUATION DEBUG] Action - Applying " + action.action + " of " + action.focus.Class.__name__ + " to target " + repr(target),file=sys.stderr)
                                    focusselection.append( target.add(action.focus.Class, **action.assignments) ) #handles span annotation too
                                    query._touch(focusselection[-1])
                                elif action.action == "APPEND":
                                    if debug: print("[FQL EVALUATION DEBUG] Action - Applying " + action.action + " of " + action.focus.Class.__name__ +" to target " + repr(target),file=sys.stderr)
                                    index = target.parent.getindex(target)
                                    if index == -1:
                                        raise QueryError("Insertion point for APPEND action not found")
                                    focusselection.append( target.parent.insert(index+1, action.focus.Class, **action.assignments) )
                                    query._touch(focusselection[-1])
                                elif action.action == "PREPEND":
                                    if debug: print("[FQL EVALUATION DEBUG] Action - Applying " + action.action + " of " + action.focus.Class.__name__ +" to target " + repr(target),file=sys.stderr)
                                    index = target.parent.getindex(target)
                                    if index == -1:
                                        raise QueryError("Insertion point for PREPEND action not found")
                                    focusselection.append( target.parent.insert(index, action.focus.Class, **action.assignments) )
                                    query._touch(focusselection[-1])

                        if isinstance(target, SpanSet):
                            if not target.partof(constrainedtargetselection):
                                constrainedtargetselection.append(target)
                        elif not any(x is target for x in constrainedtargetselection):
                            constrainedtargetselection.append(target)

                    if focusselection and action.span: #process SPAN keyword (ADD .. SPAN .. FOR .. rather than ADD ... FOR SPAN ..)
                        if not isspan: raise QueryError("Can only use SPAN with span annotation elements!")
                        for focus in focusselection:
                            spanset = next(action.span(query, contextselector, True, debug)) #there can be only one
                            focus.setspan(*spanset)

                if focusselection and action.subactions and not substitution:
                    for subaction in action.subactions:
                        #check if set is declared, if not, auto-declare
                        if debug: print("[FQL EVALUATION DEBUG] Action - Auto-declaring ",action.focus.Class.__name__, " of ", str(action.focus.set),file=sys.stderr)
                        subaction.focus.autodeclare(query.doc)
                        if debug: print("[FQL EVALUATION DEBUG] Action - Invoking subaction ", subaction.action,file=sys.stderr)
                        subaction(query, focusselection, debug ) #note: results of subactions will be silently discarded, they can never select anything

                if len(actions) > 1:
                    #consolidate results:
                    focusselection_all = []
                    for e in focusselection:
                        if isinstance(e, SpanSet):
                            if not e.partof(focusselection_all):
                                focusselection_all.append(e)
                        elif not any(x is e for x in focusselection_all):
                            focusselection_all.append(e)
                    constrainedtargetselection_all = []
                    for e in constrainedtargetselection:
                        if isinstance(e, SpanSet):
                            if not e.partof(constrainedtargetselection_all):
                                constrainedtargetselection_all.append(e)
                        elif not any(x is e for x in constrainedtargetselection_all):
                            constrainedtargetselection_all.append(e)

            if substitution:
                constrainedtargetselection_all = []
                constrainedtargetselection = []
                if action.form:
                    result = action.form.substitute(query, substitution, None, debug)
                    if len(actions) > 1:
                        focusselection_all.append(result)
                    else:
                        focusselection.append(result)
                else:
                    if debug: print("[FQL EVALUATION DEBUG] Action - Substitution - Removing target",file=sys.stderr)
                    for e in substitution['span']:
                        substitution['parent'].remove(e)

                    for i, (Class, assignments, subactions) in enumerate(substitution['new']):
                        if debug: print("[FQL EVALUATION DEBUG] Action - Substitution - Inserting substitution",file=sys.stderr)
                        e =  substitution['parent'].insert(substitution['index']+i, Class, **assignments)
                        for subaction in subactions:
                            subaction.focus.autodeclare(query.doc)
                            if debug: print("[FQL EVALUATION DEBUG] Action - Invoking subaction (in substitution) ", subaction.action,file=sys.stderr)
                            subaction(query, [e], debug ) #note: results of subactions will be silently discarded, they can never select anything

                        if len(actions) > 1:
                            focusselection_all.append(e)
                        else:
                            focusselection.append(e)

        if len(actions) > 1:
            return focusselection_all, constrainedtargetselection_all
        else:
            return focusselection, constrainedtargetselection



class Context(object):
    def __init__(self):
        self.format = "python"
        self.returntype = "focus"
        self.request = "all"
        self.defaults = {}
        self.defaultsets = {}

class Query(object):
    """This class represents an FQL query.

    Selecting a word with a particular text is done as follows, ``doc`` is an instance of :class:`pynlpl.formats.folia.Document`::

        query = fql.Query('SELECT w WHERE text = "house"')
        for word in query(doc):
            print(word)  #this will be an instance of folia.Word

    Regular expression matching can be done using the ``MATCHES`` operator::

        query = fql.Query('SELECT w WHERE text MATCHES "^house.*$"')
        for word in query(doc):
            print(word)

    The classes of other annotation types can be easily queried as follows::

        query = fql.Query('SELECT w WHERE :pos = "v"' AND :lemma = "be"')
        for word in query(doc):
            print(word)

    You can constrain your queries to a particular target selection using the ``FOR`` keyword::

        query = fql.Query('SELECT w WHERE text MATCHES "^house.*$" FOR s WHERE text CONTAINS "sell"')
        for word in query(doc):
            print(word)

    This construction also allows you to select the actual annotations. To select all people (a named entity) for words that are not John::

        query = fql.Query('SELECT entity WHERE class = "person" FOR w WHERE text != "John"')
        for entity in query(doc):
            print(entity) #this will be an instance of folia.Entity

    **FOR** statement may be chained, and Explicit IDs can be passed using the ``ID`` keyword::

        query = fql.Query('SELECT entity WHERE class = "person" FOR w WHERE text != "John" FOR div ID "section.21"')
        for entity in query(doc):
            print(entity)

    Sets are specified using the **OF** keyword, it can be omitted if there is only one for the annotation type, but will be required otherwise::

        query = fql.Query('SELECT su OF "http://some/syntax/set" WHERE class = "np"')
        for su in query(doc):
            print(su) #this will be an instance of folia.SyntacticUnit

    We have just covered just the **SELECT** keyword, FQL has other keywords for manipulating documents, such as **EDIT**, **ADD**, **APPEND** and **PREPEND**.

    Note:
        Consult the FQL documentation at https://github.com/proycon/foliadocserve/blob/master/README.rst for further documentation on the language.

    """
    def __init__(self, q, context=Context()):
        self.action = None
        self.targets = None
        self.declarations = []
        self.format = context.format
        self.returntype = context.returntype
        self.request = copy(context.request)
        self.defaults = copy(context.defaults)
        self.defaultsets = copy(context.defaultsets)
        self.parse(q)

    def parse(self, q, i=0):
        if not isinstance(q,UnparsedQuery):
            q = UnparsedQuery(q)

        l = len(q)
        if q.kw(i,"DECLARE"):
            try:
                Class = folia.XML2CLASS[q[i+1]]
            except:
                raise SyntaxError("DECLARE statement expects a FoLiA element, got: " + str(q[i+1]))

            if not Class.ANNOTATIONTYPE:
                raise SyntaxError("DECLARE statement for undeclarable element type: " + str(q[i+1]))

            i += 2

            defaults = {}
            decset = None
            if q.kw(i,"OF") and q[i+1]:
                i += 1
                decset = q[i]
                i += 1
                if q.kw(i,"WITH"):
                    i = getassignments(q,i+1,defaults)

            if not decset:
                raise SyntaxError("DECLARE statement must state a set")

            self.declarations.append( (Class, decset, defaults)  )

        if i < l:
            self.action,i = Action.parse(q,i)

            if q.kw(i,("FOR","IN")):
                self.targets, i = Target.parse(q,i)

            while i < l:
                if q.kw(i,"RETURN"):
                    self.returntype = q[i+1]
                    i+=2
                elif q.kw(i,"FORMAT"):
                    self.format = q[i+1]
                    i+=2
                elif q.kw(i,"REQUEST"):
                    self.request = q[i+1].split(",")
                    i+=2
                else:
                    raise SyntaxError("Unexpected " + str(q[i]) + " at position " + str(i) + " in: " + str(q))


        if i != l:
            raise SyntaxError("Expected end of query, got " + str(q[i]) + " in: " + str(q))

    def __call__(self, doc, wrap=True,debug=False):
        """Execute the query on the specified document"""

        self.doc = doc

        if debug: print("[FQL EVALUATION DEBUG] Query  - Starting on document ", doc.id,file=sys.stderr)

        if self.declarations:
            for Class, decset, defaults in self.declarations:
                if debug: print("[FQL EVALUATION DEBUG] Processing declaration for ", Class.__name__, "of",str(decset),file=sys.stderr)
                doc.declare(Class,decset,**defaults)

        if self.action:
            targetselector = doc
            if self.targets and not (isinstance(self.targets.targets[0], Selector) and self.targets.targets[0].Class in ("ALL", folia.Text)):
                targetselector = (self.targets, (self, targetselector, True, debug)) #function recipe to get the generator for the targets, (f, *args) (first is always recursive)

            focusselection, targetselection = self.action(self, targetselector, debug) #selecting focus elements further constrains the target selection (if any), return values will be lists

            if self.returntype == "nothing":
                return ""
            elif self.returntype == "focus":
                responseselection = focusselection
            elif self.returntype == "target" or self.returntype == "inner-target":
                responseselection = []
                for e in targetselection:
                    if not any(x is e for x in responseselection): #filter out duplicates
                        responseselection.append(e)
            elif self.returntype == "outer-target":
                raise NotImplementedError
            elif self.returntype == "ancestor" or self.returntype == "ancestor-focus":
                responseselection = []
                try:
                    responseselection.append( next(folia.commonancestors(folia.AbstractStructureElement,*focusselection)) )
                except StopIteration:
                    raise QueryError("No ancestors found for focus: " + str(repr(focusselection)))
            elif self.returntype == "ancestor-target":
                elems = []
                for e in targetselection:
                    if isinstance(e, SpanSet):
                        elems += e
                    else:
                        elems.append(e)
                responseselection = []
                try:
                    responseselection.append( next(folia.commonancestors(folia.AbstractStructureElement,*elems)) )
                except StopIteration:
                    raise QueryError("No ancestors found for targets: " + str(repr(targetselection)))
            else:
                raise QueryError("Invalid return type: " + self.returntype)

        else:
            responseselection = []

        if self.returntype == "nothing": #we're done
            return ""

        #convert response selection to proper format and return
        if self.format.startswith('single'):
            if len(responseselection) > 1:
                raise QueryError("A single response was expected, but multiple are returned")
            if self.format == "single-xml":
                if debug: print("[FQL EVALUATION DEBUG] Query  - Returning single-xml",file=sys.stderr)
                if not responseselection:
                    return ""
                else:
                    if isinstance(responseselection[0], SpanSet):
                            r = "<result>\n"
                            for e in responseselection[0]:
                                r += e.xmlstring(True)
                            r += "</result>\n"
                            return r
                    else:
                        return responseselection[0].xmlstring(True)
            elif self.format == "single-json":
                if debug: print("[FQL EVALUATION DEBUG] Query  - Returning single-json",file=sys.stderr)
                if not responseselection:
                    return "null"
                else:
                    return json.dumps(responseselection[0].json())
            elif self.format == "single-python":
                if debug: print("[FQL EVALUATION DEBUG] Query  - Returning single-python",file=sys.stderr)
                if not responseselection:
                    return None
                else:
                    return responseselection[0]
        else:
            if self.format == "xml":
                if debug: print("[FQL EVALUATION DEBUG] Query  - Returning xml",file=sys.stderr)
                if not responseselection:
                    if wrap:
                        return "<results></results>"
                    else:
                        return ""
                else:
                    if wrap:
                        r = "<results>\n"
                    else:
                        r = ""
                    for e in responseselection:
                        if isinstance(e, SpanSet):
                            r += "<result>\n"
                            for e2 in e:
                                r += "" + e2.xmlstring(True) + "\n"
                            r += "</result>\n"
                        else:
                            r += "<result>\n" + e.xmlstring(True) + "</result>\n"
                    if wrap:
                        r += "</results>\n"
                    return r
            elif self.format == "json":
                if debug: print("[FQL EVALUATION DEBUG] Query  - Returning json",file=sys.stderr)
                if not responseselection:
                    if wrap:
                        return "[]"
                    else:
                        return ""
                else:
                    if wrap:
                        s = "[ "
                    else:
                        s = ""
                    for e in responseselection:
                        if isinstance(e, SpanSet):
                            s += json.dumps([ e2.json() for e2 in e ] ) + ", "
                        else:
                            s += json.dumps(e.json()) + ", "
                    s = s.strip(", ")
                    if wrap:
                        s += "]"
                    return s
            else: #python and undefined formats
                if debug: print("[FQL EVALUATION DEBUG] Query  - Returning python",file=sys.stderr)
                return responseselection

        return QueryError("Invalid format: " + self.format)

    def _touch(self, *args):
        for e in args:
            if isinstance(e, folia.AbstractElement):
                e.changedbyquery = self
                self._touch(*e.data)