1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
|
#**************************************************************************
#* *
#* OCaml *
#* *
#* Gabriel Scherer, projet Parsifal, INRIA Saclay *
#* *
#* Copyright 2018 Institut National de Recherche en Informatique et *
#* en Automatique. *
#* *
#* All rights reserved. This file is distributed under the terms of *
#* the GNU Lesser General Public License version 2.1, with the *
#* special exception on linking described in the file LICENSE. *
#* *
#**************************************************************************
# The rules in this Makefile use Menhir to rebuild the OCaml compiler
# parser. They are included in the main Makefile, so should be invoked
# directly, for example 'make promote-menhir'. They must be called
# after any modification to parsing/parser.mly, for the modification
# to affect the parser linked in the produced compiler:
#
# - promote-menhir builds the parser from parser.mly and stores it in
# the boot/ directory, so that future builds of the compiler use the
# updated result. Use it to make permanent changes to the compiler
# parser.
#
# - demote-menhir undoes the effect of promote-menhir. The files in
# the boot/ directory that are affected by promote-menhir and are
# under version control are restored to their normal state (HEAD).
#
# - test-menhir builds the parser from parser.mly without storing it
# in the boot/ directory, and only checks that the generated parser
# builds correctly. Use it to quickly check if a parser.mly change
# breaks the build. If you want to test a compiler produced with
# the new parser, you must use promote-menhir instead.
# (Using this rule requires a partial compiler build as obtained
# by 'make core' or 'make world'.)
#
# - clean-menhir removes the files generated by Menhir from parsing/,
# keeping only the reference sources for the grammar.
#
# - depend-menhir updates the dependency information for the
# Menhir-generated parser, which is versioned in the OCaml repository
# like all other .depend files. It should be used when the dependencies
# (of the OCaml code in the grammar semantic actions) change.
MENHIR ?= menhir
## Unused tokens
# The tokens COMMENT, DOCSTRING and EOL are produced by special lexer
# modes used by other consumers than the parser.
# GREATERBRACKET ">]" was added by the parser by symmetry with "[<"
# (which is used in polymorphic variant), but is not currently used by
# the grammar.
unused_tokens := COMMENT DOCSTRING EOL GREATERRBRACKET
## Menhir's flags.
# The basic flags influence the analysis of the grammar and the construction
# of the automaton. The complete set of flags includes extra flags that
# influence type inference and code production.
MENHIRBASICFLAGS := \
--lalr \
--explain \
--dump \
--require-aliases \
--strict \
-lg 1 \
-la 1 \
$(addprefix --unused-token ,$(unused_tokens)) \
MENHIRFLAGS := \
$(MENHIRBASICFLAGS) \
--infer \
--ocamlc "$(call CONVERT_PATH, $(CAMLC)) $(OC_COMMON_COMPFLAGS) $(INCLUDES)" \
--fixed-exception \
--table \
--strategy simplified \
## promote-menhir
.PHONY: promote-menhir
promote-menhir: parsing/parser.mly
@ $(MAKE) import-menhirLib
$(MENHIR) $(MENHIRFLAGS) parsing/parser.mly
# The generated parser.ml may contain lexer directives containing
# the absolute path to Menhir's standard library on the promoter's machine.
# This is benign but will generate pointless churn if another developer
# rebuilds the same grammar (from the same Menhir version).
@ for f in $(addprefix parser.,ml mli) ; do \
sed \
's,^#\(.*\)"[^"]*/menhir/standard.mly",#\1"menhir/standard.mly",g' \
parsing/$$f \
> boot/menhir/$$f; \
rm parsing/$$f; \
done
# The import-menhirLib invocation in promote-menhir ensures that each
# update of the boot/ parser is paired with an update of the imported
# menhirLib; otherwise it would be easy to generate a parser and keep
# an incompatible version of menhirLib, which would fail at
# compile-time.
boot/menhir:
@$(MKDIR) $@
.PHONY: import-menhirLib
import-menhirLib: | boot/menhir
@cp $(addprefix $(shell $(MENHIR) --suggest-menhirLib)/menhirLib.,ml mli) $|
## demote-menhir
DEMOTE:=menhirLib.ml menhirLib.mli parser.ml parser.mli
.PHONY: demote-menhir
demote-menhir:
git checkout HEAD -- $(addprefix boot/menhir/,$(DEMOTE))
## test-menhir
# This rule assumes that the `parsing/` sources and its dependencies
# have already been compiled; 'make core' suffices to be in that
# state. We don't make 'core' an explicit dependency, as building
# 'test-menhir' repeatedly would rebuild the compiler each time
# (parser.ml has changed), without actually taking the changes from
# parser.mly into account ('core' uses the parser from boot/).
# The test-menhir target does not read or write the boot directory,
# it directly builds the parser in parsing/. In particular, it must
# duplicate the MenhirLib->CamlinternalMenhirlib renaming usually
# performed by the parsing/parser.ml import rule in the main
# Makefile.
.PHONY: test-menhir
test-menhir: parsing/parser.mly
$(MENHIR) $(MENHIRFLAGS) parsing/parser.mly
for f in $(addprefix parsing/parser.,ml mli) ; do \
cat $$f | sed "s/MenhirLib/CamlinternalMenhirLib/g" > $$f.tmp && \
mv $$f.tmp $$f ; \
done
$(MAKE) parsing/parser.cmo
## clean-menhir
partialclean-menhir::
rm -f \
$(addprefix parsing/parser.,ml mli) \
$(addprefix parsing/camlinternalMenhirLib.,ml mli) \
$(addprefix parsing/parser.,automaton conflicts) \
$(addprefix parsing/parser.,auto.messages) \
clean-menhir: partialclean-menhir
## depend-menhir
# The following rule depends on the OCAMLDEP_CMD variable defined in
# Makefile.common, so it can only be invoked from the main (root) Makefile
.PHONY: depend-menhir
depend-menhir:
$(MENHIR) --depend --ocamldep "$(OCAMLDEP_CMD)" \
parsing/parser.mly > .depend.menhir
include .depend.menhir
## interpret-menhir
# This rule runs Menhir in interactive mode.
# The user can enter sentences, such as:
# implementation: TYPE LIDENT EQUAL LIDENT EOF
# and see how Menhir interprets them.
interpret-menhir:
@ echo "Please wait, I am building the LALR automaton..."
@ $(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \
--interpret \
--interpret-show-cst \
--trace \
## list-parse-errors
# This rule runs Menhir's reachability analysis, which produces a list of all
# states where a syntax error can be detected (and a corresponding list of of
# erroneous sentences). This data is stored in parsing/parser.auto.messages.
# This analysis requires about 3 minutes and 6GB of RAM.
# The analysis is performed on a copy of the grammar where every block
# of text comprised between the markers BEGIN AVOID and END AVOID has
# been removed. This allows us to avoid certain syntactic forms in the
# sentences that we produce. See parser.mly for more explanations.
# Because of this, we must run Menhir twice: once on a modified copy of the
# grammar to produce the sentences, and once on the original grammar to update
# the auto-comments (which would otherwise be incorrect).
.PHONY: list-parse-errors
list-parse-errors:
@ tmp=`mktemp -d /tmp/parser.XXXX` && \
sed -e '/BEGIN AVOID/,/END AVOID/d' \
parsing/parser.mly > $$tmp/parser.mly && \
$(MENHIR) $(MENHIRBASICFLAGS) $$tmp/parser.mly \
--list-errors -la 2 \
> parsing/parser.auto.messages && \
rm -rf $$tmp
@ cp parsing/parser.auto.messages parsing/parser.auto.messages.bak
@ $(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \
--update-errors parsing/parser.auto.messages.bak \
> parsing/parser.auto.messages
@ rm -f parsing/parser.auto.messages.bak
## generate-parse-errors
# This rule assumes that [make list-parse-errors] has been run first.
# This rule turns the error sentences stored in parsing/parser.auto.messages
# into one .ml file.
# (It would in principle be preferable to create one file per sentence, but
# that would be much slower. We abuse the ability of the OCaml toplevel to
# resynchronize after an error, and put all sentences into a single file.)
# This requires Menhir 20201214 or newer.
GPE_DIR := tests/generated-parse-errors
GPE_ML := errors.ml
GPE_REF := errors.compilers.reference
GPE_START := implementation use_file toplevel_phrase
.PHONY: generate-parse-errors
generate-parse-errors:
@ \
mkdir -p testsuite/$(GPE_DIR) && \
$(MENHIR) $(MENHIRBASICFLAGS) parsing/parser.mly \
--echo-errors-concrete parsing/parser.auto.messages 2>/dev/null | \
(cd testsuite/$(GPE_DIR) && touch $(GPE_REF) && ( \
echo "(* TEST\n * toplevel\n*)" && \
while IFS= read -r symbolic ; do \
IFS= read -r concrete ; \
concrete=$${concrete#### Concrete syntax: } ; \
: '$$symbolic is the sentence in symbolic form' ; \
: '$$concrete is the sentence in concrete form' ; \
case "$$symbolic" in \
*": SEMISEMI"*) \
: 'If the sentence begins with SEMISEMI, ignore it. Our hack' ; \
: 'does not support these sentences, and there are only 6 of' ; \
: 'them anyway.' ; \
continue ;; \
*) \
case "$$symbolic" in \
*"EOF") \
: 'If the sentence ends with EOF, replace it on the fly' ; \
: 'with some other token (say, WHEN).' ; \
echo "#0 \"$${symbolic%%EOF}WHEN\"" ; \
echo "$$concrete when" ; \
echo ";;" ;; \
*) \
: 'Emit a # directive containing the symbolic sentence.' ; \
echo "#0 \"$$symbolic\"" ; \
: 'Emit the concrete sentence.' ; \
echo "$$concrete" ; \
: 'Emit a double semicolon to allow resynchronization.' ; \
echo ";;" ;; \
esac \
esac \
done) \
> $(GPE_ML) && \
: 'Count how many sentences we have emitted, per start symbol.' ; \
for symbol in $(GPE_START) ; do \
count=$$(grep -h -e "$$symbol:" $(GPE_ML) | wc -l) && \
echo "$$count sentences whose start symbol is $$symbol." ; \
done \
)
@ \
read -p "Re-generate the expected output for this test? " -n 1 -r && \
echo && \
if [[ $$REPLY =~ ^[Yy]$$ ]] ; then \
make -C testsuite promote DIR=$(GPE_DIR) >/dev/null 2>&1 && \
echo "Done." ; \
make classify-parse-errors ; \
else \
echo "OK, stop." ; \
fi
.PHONY: classify-parse-errors
classify-parse-errors:
@ ( \
cd testsuite/$(GPE_DIR) && \
echo "The parser's output can be described as follows:" && \
c=$$(grep "^Error: Syntax error" $(GPE_REF) | wc -l) && \
echo "$${c} syntax errors reported." && \
c=$$(grep "^Error: Syntax error$$" $(GPE_REF) | wc -l) && \
echo "$${c} errors without an explanation." && \
c=$$(grep "^Error: Syntax" $(GPE_REF) | grep expected | wc -l) && \
echo "$${c} errors with an indication of what was expected." && \
c=$$(grep "might be unmatched" $(GPE_REF) | wc -l) && \
echo "$${c} errors with an indication of an unmatched delimiter." && \
true)
|