File: FormatGen.h

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (597 lines) | stat: -rw-r--r-- 19,799 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
//===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains common classes for building custom assembly format parsers
// and generators.
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_
#define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_

#include "mlir/Support/LLVM.h"
#include "mlir/Support/LogicalResult.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SMLoc.h"
#include <vector>

namespace llvm {
class SourceMgr;
} // namespace llvm

namespace mlir {
namespace tblgen {

//===----------------------------------------------------------------------===//
// FormatToken
//===----------------------------------------------------------------------===//

/// This class represents a specific token in the input format.
class FormatToken {
public:
  /// Basic token kinds.
  enum Kind {
    // Markers.
    eof,
    error,

    // Tokens with no info.
    l_paren,
    r_paren,
    caret,
    colon,
    comma,
    equal,
    less,
    greater,
    question,
    star,
    pipe,

    // Keywords.
    keyword_start,
    kw_attr_dict,
    kw_attr_dict_w_keyword,
    kw_prop_dict,
    kw_custom,
    kw_functional_type,
    kw_oilist,
    kw_operands,
    kw_params,
    kw_qualified,
    kw_ref,
    kw_regions,
    kw_results,
    kw_struct,
    kw_successors,
    kw_type,
    keyword_end,

    // String valued tokens.
    identifier,
    literal,
    variable,
    string,
  };

  FormatToken(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {}

  /// Return the bytes that make up this token.
  StringRef getSpelling() const { return spelling; }

  /// Return the kind of this token.
  Kind getKind() const { return kind; }

  /// Return a location for this token.
  SMLoc getLoc() const;

  /// Returns true if the token is of the given kind.
  bool is(Kind kind) { return getKind() == kind; }

  /// Return if this token is a keyword.
  bool isKeyword() const {
    return getKind() > Kind::keyword_start && getKind() < Kind::keyword_end;
  }

private:
  /// Discriminator that indicates the kind of token this is.
  Kind kind;

  /// A reference to the entire token contents; this is always a pointer into
  /// a memory buffer owned by the source manager.
  StringRef spelling;
};

//===----------------------------------------------------------------------===//
// FormatLexer
//===----------------------------------------------------------------------===//

/// This class implements a simple lexer for operation assembly format strings.
class FormatLexer {
public:
  FormatLexer(llvm::SourceMgr &mgr, SMLoc loc);

  /// Lex the next token and return it.
  FormatToken lexToken();

  /// Emit an error to the lexer with the given location and message.
  FormatToken emitError(SMLoc loc, const Twine &msg);
  FormatToken emitError(const char *loc, const Twine &msg);

  FormatToken emitErrorAndNote(SMLoc loc, const Twine &msg, const Twine &note);

private:
  /// Return the next character in the stream.
  int getNextChar();

  /// Lex an identifier, literal, variable, or string.
  FormatToken lexIdentifier(const char *tokStart);
  FormatToken lexLiteral(const char *tokStart);
  FormatToken lexVariable(const char *tokStart);
  FormatToken lexString(const char *tokStart);

  /// Create a token with the current pointer and a start pointer.
  FormatToken formToken(FormatToken::Kind kind, const char *tokStart) {
    return FormatToken(kind, StringRef(tokStart, curPtr - tokStart));
  }

  /// The source manager containing the format string.
  llvm::SourceMgr &mgr;
  /// Location of the format string.
  SMLoc loc;
  /// Buffer containing the format string.
  StringRef curBuffer;
  /// Current pointer in the buffer.
  const char *curPtr;
};

//===----------------------------------------------------------------------===//
// FormatElement
//===----------------------------------------------------------------------===//

/// This class represents a single format element.
///
/// If you squint and take a close look, you can see the outline of a `Format`
/// dialect.
class FormatElement {
public:
  virtual ~FormatElement();

  // The top-level kinds of format elements.
  enum Kind { Literal, String, Variable, Whitespace, Directive, Optional };

  /// Support LLVM-style RTTI.
  static bool classof(const FormatElement *el) { return true; }

  /// Get the element kind.
  Kind getKind() const { return kind; }

protected:
  /// Create a format element with the given kind.
  FormatElement(Kind kind) : kind(kind) {}

private:
  /// The kind of the element.
  Kind kind;
};

/// The base class for all format elements. This class implements common methods
/// for LLVM-style RTTI.
template <FormatElement::Kind ElementKind>
class FormatElementBase : public FormatElement {
public:
  /// Support LLVM-style RTTI.
  static bool classof(const FormatElement *el) {
    return ElementKind == el->getKind();
  }

protected:
  /// Create a format element with the given kind.
  FormatElementBase() : FormatElement(ElementKind) {}
};

/// This class represents a literal element. A literal is either one of the
/// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g.
/// `literal`).
class LiteralElement : public FormatElementBase<FormatElement::Literal> {
public:
  /// Create a literal element with the given spelling.
  explicit LiteralElement(StringRef spelling) : spelling(spelling) {}

  /// Get the spelling of the literal.
  StringRef getSpelling() const { return spelling; }

private:
  /// The spelling of the variable, i.e. the string contained within the
  /// backticks.
  StringRef spelling;
};

/// This class represents a raw string that can contain arbitrary C++ code.
class StringElement : public FormatElementBase<FormatElement::String> {
public:
  /// Create a string element with the given contents.
  explicit StringElement(std::string value) : value(std::move(value)) {}

  /// Get the value of the string element.
  StringRef getValue() const { return value; }

private:
  /// The contents of the string.
  std::string value;
};

/// This class represents a variable element. A variable refers to some part of
/// the object being parsed, e.g. an attribute or operand on an operation or a
/// parameter on an attribute.
class VariableElement : public FormatElementBase<FormatElement::Variable> {
public:
  /// These are the kinds of variables.
  enum Kind {
    Attribute,
    Operand,
    Region,
    Result,
    Successor,
    Parameter,
    Property
  };

  /// Get the kind of variable.
  Kind getKind() const { return kind; }

protected:
  /// Create a variable with a kind.
  VariableElement(Kind kind) : kind(kind) {}

private:
  /// The kind of variable.
  Kind kind;
};

/// Base class for variable elements. This class implements common methods for
/// LLVM-style RTTI.
template <VariableElement::Kind VariableKind>
class VariableElementBase : public VariableElement {
public:
  /// An element is of this class if it is a variable and has the same variable
  /// type.
  static bool classof(const FormatElement *el) {
    if (auto *varEl = dyn_cast<VariableElement>(el))
      return VariableKind == varEl->getKind();
    return false;
  }

protected:
  /// Create a variable element with the given variable kind.
  VariableElementBase() : VariableElement(VariableKind) {}
};

/// This class represents a whitespace element, e.g. a newline or space. It is a
/// literal that is printed but never parsed. When the value is empty, i.e. ``,
/// a space is elided where one would have been printed automatically.
class WhitespaceElement : public FormatElementBase<FormatElement::Whitespace> {
public:
  /// Create a whitespace element.
  explicit WhitespaceElement(StringRef value) : value(value) {}

  /// Get the whitespace value.
  StringRef getValue() const { return value; }

private:
  /// The value of the whitespace element. Can be empty.
  StringRef value;
};

class DirectiveElement : public FormatElementBase<FormatElement::Directive> {
public:
  /// These are the kinds of directives.
  enum Kind {
    AttrDict,
    PropDict,
    Custom,
    FunctionalType,
    OIList,
    Operands,
    Ref,
    Regions,
    Results,
    Successors,
    Type,
    Params,
    Struct
  };

  /// Get the directive kind.
  Kind getKind() const { return kind; }

protected:
  /// Create a directive element with a kind.
  DirectiveElement(Kind kind) : kind(kind) {}

private:
  /// The directive kind.
  Kind kind;
};

/// Base class for directive elements. This class implements common methods for
/// LLVM-style RTTI.
template <DirectiveElement::Kind DirectiveKind>
class DirectiveElementBase : public DirectiveElement {
public:
  /// Create a directive element with the specified kind.
  DirectiveElementBase() : DirectiveElement(DirectiveKind) {}

  /// A format element is of this class if it is a directive element and has the
  /// same kind.
  static bool classof(const FormatElement *el) {
    if (auto *directiveEl = dyn_cast<DirectiveElement>(el))
      return DirectiveKind == directiveEl->getKind();
    return false;
  }
};

/// This class represents a custom format directive that is implemented by the
/// user in C++. The directive accepts a list of arguments that is passed to the
/// C++ function.
class CustomDirective : public DirectiveElementBase<DirectiveElement::Custom> {
public:
  /// Create a custom directive with a name and list of arguments.
  CustomDirective(StringRef name, std::vector<FormatElement *> &&arguments)
      : name(name), arguments(std::move(arguments)) {}

  /// Get the custom directive name.
  StringRef getName() const { return name; }

  /// Get the arguments to the custom directive.
  ArrayRef<FormatElement *> getArguments() const { return arguments; }

private:
  /// The name of the custom directive. The name is used to call two C++
  /// methods: `parse{name}` and `print{name}` with the given arguments.
  StringRef name;
  /// The arguments with which to call the custom functions. These are either
  /// variables (for which the functions are responsible for populating) or
  /// references to variables.
  std::vector<FormatElement *> arguments;
};

/// This class represents a reference directive. This directive can be used to
/// reference but not bind a previously bound variable or format object. Its
/// current only use is to pass variables as arguments to the custom directive.
class RefDirective : public DirectiveElementBase<DirectiveElement::Ref> {
public:
  /// Create a reference directive with the single referenced child.
  RefDirective(FormatElement *arg) : arg(arg) {}

  /// Get the reference argument.
  FormatElement *getArg() const { return arg; }

private:
  /// The referenced argument.
  FormatElement *arg;
};

/// This class represents a group of elements that are optionally emitted based
/// on an optional variable "anchor" and a group of elements that are emitted
/// when the anchor element is not present.
class OptionalElement : public FormatElementBase<FormatElement::Optional> {
public:
  /// Create an optional group with the given child elements.
  OptionalElement(std::vector<FormatElement *> &&thenElements,
                  std::vector<FormatElement *> &&elseElements,
                  unsigned thenParseStart, unsigned elseParseStart,
                  FormatElement *anchor, bool inverted)
      : thenElements(std::move(thenElements)),
        elseElements(std::move(elseElements)), thenParseStart(thenParseStart),
        elseParseStart(elseParseStart), anchor(anchor), inverted(inverted) {}

  /// Return the `then` elements of the optional group. Drops the first
  /// `thenParseStart` whitespace elements if `parseable` is true.
  ArrayRef<FormatElement *> getThenElements(bool parseable = false) const {
    return llvm::ArrayRef(thenElements)
        .drop_front(parseable ? thenParseStart : 0);
  }

  /// Return the `else` elements of the optional group. Drops the first
  /// `elseParseStart` whitespace elements if `parseable` is true.
  ArrayRef<FormatElement *> getElseElements(bool parseable = false) const {
    return llvm::ArrayRef(elseElements)
        .drop_front(parseable ? elseParseStart : 0);
  }

  /// Return the anchor of the optional group.
  FormatElement *getAnchor() const { return anchor; }

  /// Return true if the optional group is inverted.
  bool isInverted() const { return inverted; }

private:
  /// The child elements emitted when the anchor is present.
  std::vector<FormatElement *> thenElements;
  /// The child elements emitted when the anchor is not present.
  std::vector<FormatElement *> elseElements;
  /// The index of the first element that is parsed in `thenElements`. That is,
  /// the first non-whitespace element.
  unsigned thenParseStart;
  /// The index of the first element that is parsed in `elseElements`. That is,
  /// the first non-whitespace element.
  unsigned elseParseStart;
  /// The anchor element of the optional group.
  FormatElement *anchor;
  /// Whether the optional group condition is inverted and the anchor element is
  /// in the else group.
  bool inverted;
};

//===----------------------------------------------------------------------===//
// FormatParserBase
//===----------------------------------------------------------------------===//

/// Base class for a parser that implements an assembly format. This class
/// defines a common assembly format syntax and the creation of format elements.
/// Subclasses will need to implement parsing for the format elements they
/// support.
class FormatParser {
public:
  /// Vtable anchor.
  virtual ~FormatParser();

  /// Parse the assembly format.
  FailureOr<std::vector<FormatElement *>> parse();

protected:
  /// The current context of the parser when parsing an element.
  enum Context {
    /// The element is being parsed in a "top-level" context, i.e. at the top of
    /// the format or in an optional group.
    TopLevelContext,
    /// The element is being parsed as a custom directive child.
    CustomDirectiveContext,
    /// The element is being parsed as a type directive child.
    TypeDirectiveContext,
    /// The element is being parsed as a reference directive child.
    RefDirectiveContext,
    /// The element is being parsed as a struct directive child.
    StructDirectiveContext
  };

  /// Create a format parser with the given source manager and a location.
  explicit FormatParser(llvm::SourceMgr &mgr, llvm::SMLoc loc)
      : lexer(mgr, loc), curToken(lexer.lexToken()) {}

  /// Allocate and construct a format element.
  template <typename FormatElementT, typename... Args>
  FormatElementT *create(Args &&...args) {
    // FormatElementT *ptr = allocator.Allocate<FormatElementT>();
    // ::new (ptr) FormatElementT(std::forward<Args>(args)...);
    // return ptr;
    auto mem = std::make_unique<FormatElementT>(std::forward<Args>(args)...);
    FormatElementT *ptr = mem.get();
    allocator.push_back(std::move(mem));
    return ptr;
  }

  //===--------------------------------------------------------------------===//
  // Element Parsing

  /// Parse a single element of any kind.
  FailureOr<FormatElement *> parseElement(Context ctx);
  /// Parse a literal.
  FailureOr<FormatElement *> parseLiteral(Context ctx);
  /// Parse a string.
  FailureOr<FormatElement *> parseString(Context ctx);
  /// Parse a variable.
  FailureOr<FormatElement *> parseVariable(Context ctx);
  /// Parse a directive.
  FailureOr<FormatElement *> parseDirective(Context ctx);
  /// Parse an optional group.
  FailureOr<FormatElement *> parseOptionalGroup(Context ctx);

  /// Parse a custom directive.
  FailureOr<FormatElement *> parseCustomDirective(llvm::SMLoc loc, Context ctx);

  /// Parse a format-specific variable kind.
  virtual FailureOr<FormatElement *>
  parseVariableImpl(llvm::SMLoc loc, StringRef name, Context ctx) = 0;
  /// Parse a format-specific directive kind.
  virtual FailureOr<FormatElement *>
  parseDirectiveImpl(llvm::SMLoc loc, FormatToken::Kind kind, Context ctx) = 0;

  //===--------------------------------------------------------------------===//
  // Format Verification

  /// Verify that the format is well-formed.
  virtual LogicalResult verify(llvm::SMLoc loc,
                               ArrayRef<FormatElement *> elements) = 0;
  /// Verify the arguments to a custom directive.
  virtual LogicalResult
  verifyCustomDirectiveArguments(llvm::SMLoc loc,
                                 ArrayRef<FormatElement *> arguments) = 0;
  /// Verify the elements of an optional group.
  virtual LogicalResult
  verifyOptionalGroupElements(llvm::SMLoc loc,
                              ArrayRef<FormatElement *> elements,
                              FormatElement *anchor) = 0;

  //===--------------------------------------------------------------------===//
  // Lexer Utilities

  /// Emit an error at the given location.
  LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) {
    lexer.emitError(loc, msg);
    return failure();
  }

  /// Emit an error and a note at the given notation.
  LogicalResult emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
                                 const Twine &note) {
    lexer.emitErrorAndNote(loc, msg, note);
    return failure();
  }

  /// Parse a single token of the expected kind.
  FailureOr<FormatToken> parseToken(FormatToken::Kind kind, const Twine &msg) {
    if (!curToken.is(kind))
      return emitError(curToken.getLoc(), msg);
    FormatToken tok = curToken;
    consumeToken();
    return tok;
  }

  /// Advance the lexer to the next token.
  void consumeToken() {
    assert(!curToken.is(FormatToken::eof) && !curToken.is(FormatToken::error) &&
           "shouldn't advance past EOF or errors");
    curToken = lexer.lexToken();
  }

  /// Get the current token.
  FormatToken peekToken() { return curToken; }

private:
  /// The format parser retains ownership of the format elements in a bump
  /// pointer allocator.
  // FIXME: FormatElement with `std::vector` need to be converted to use
  // trailing objects.
  // llvm::BumpPtrAllocator allocator;
  std::vector<std::unique_ptr<FormatElement>> allocator;
  /// The format lexer to use.
  FormatLexer lexer;
  /// The current token in the lexer.
  FormatToken curToken;
};

//===----------------------------------------------------------------------===//
// Utility Functions
//===----------------------------------------------------------------------===//

/// Whether a space needs to be emitted before a literal. E.g., two keywords
/// back-to-back require a space separator, but a keyword followed by '<' does
/// not require a space.
bool shouldEmitSpaceBefore(StringRef value, bool lastWasPunctuation);

/// Returns true if the given string can be formatted as a keyword.
bool canFormatStringAsKeyword(StringRef value,
                              function_ref<void(Twine)> emitError = nullptr);

/// Returns true if the given string is valid format literal element.
/// If `emitError` is provided, it is invoked with the reason for the failure.
bool isValidLiteral(StringRef value,
                    function_ref<void(Twine)> emitError = nullptr);

/// Whether a failure in parsing the assembly format should be a fatal error.
extern llvm::cl::opt<bool> formatErrorIsFatal;

} // namespace tblgen
} // namespace mlir

#endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_