File: ClangCommentHTMLNamedCharacterReferenceEmitter.cpp

package info (click to toggle)
flang 20181226-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 181,072 kB
  • sloc: cpp: 1,182,685; ansic: 598,652; objc: 103,775; f90: 57,054; python: 15,041; fortran: 13,601; lisp: 10,416; perl: 2,460; asm: 2,148; sh: 1,544; awk: 995; cs: 565; xml: 403; lex: 295; makefile: 225; pascal: 130
file content (85 lines) | stat: -rw-r--r-- 2,855 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This tablegen backend emits an fficient function to translate HTML named
// character references to UTF-8 sequences.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <vector>

using namespace llvm;

/// Convert a code point to the corresponding UTF-8 sequence represented
/// as a C string literal.
///
/// \returns true on success.
static bool translateCodePointToUTF8(unsigned CodePoint,
                                     SmallVectorImpl<char> &CLiteral) {
  char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
  char *TranslatedPtr = Translated;
  if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
    return false;

  StringRef UTF8(Translated, TranslatedPtr - Translated);

  raw_svector_ostream OS(CLiteral);
  OS << "\"";
  for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
    OS << "\\x";
    OS.write_hex(static_cast<unsigned char>(UTF8[i]));
  }
  OS << "\"";

  return true;
}

namespace clang {
void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
                                                  raw_ostream &OS) {
  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
  std::vector<StringMatcher::StringPair> NameToUTF8;
  SmallString<32> CLiteral;
  for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
       I != E; ++I) {
    Record &Tag = **I;
    std::string Spelling = Tag.getValueAsString("Spelling");
    uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
    CLiteral.clear();
    CLiteral.append("return ");
    if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
      SrcMgr.PrintMessage(Tag.getLoc().front(),
                          SourceMgr::DK_Error,
                          Twine("invalid code point"));
      continue;
    }
    CLiteral.append(";");

    StringMatcher::StringPair Match(Spelling, CLiteral.str());
    NameToUTF8.push_back(Match);
  }

  emitSourceFileHeader("HTML named character reference to UTF-8 "
                       "translation", OS);

  OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
        "                                             StringRef Name) {\n";
  StringMatcher("Name", NameToUTF8, OS).Emit();
  OS << "  return StringRef();\n"
     << "}\n\n";
}

} // end namespace clang