File: Comment.cc

package info (click to toggle)
rumur 2020.12.20-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 3,292 kB
  • sloc: cpp: 17,090; ansic: 2,537; objc: 1,542; python: 1,120; sh: 538; yacc: 536; lex: 229; lisp: 15; makefile: 5
file content (162 lines) | stat: -rw-r--r-- 3,668 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#include <cstddef>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <cstdio>
#include <iostream>
#include <rumur/Comment.h>
#include <sstream>
#include <string>
#include <vector>

namespace rumur {

namespace {

/// a file interface that supports lookahead
class File {

 private:
  position pos;

  std::istream &in;
  std::string buffered;

 public:
  explicit File(std::istream &in_): pos(nullptr, 1, 1), in(in_) { }

  /// read a new character from the file
  char getchar() {

    assert(!buffered.empty());

    char c = buffered[0];
    buffered = buffered.substr(1);

    if (c == '\n') {
      pos.lines();
    } else {
      pos.columns();
    }

    return c;
  }

  /// read the next `count` characters
  std::string read(size_t count) {
    std::string result;
    for (size_t i = 0; !eof() && i < count; ++i)
      result += getchar();
    return result;
  }

  /// lookahead at the next `count` characters while retaining them
  std::string peek(size_t count = 1) {

    // buffer enough to support our lookahead
    while (buffered.size() < count && in) {
      char c;
      if (in.read(&c, sizeof(c)))
        buffered += c;
    }

    return buffered.substr(0, count);
  }

  /// are the upcoming characters the given string?
  bool next_is(const std::string &expectation) {
    return peek(expectation.size()) == expectation;
  }

  /// have we reached the end of the file?
  bool eof() {

    // do we have known remaining characters?
    if (!buffered.empty())
      return false;

    // ensure buffer is populated or we have triggered EOF on the stream
    (void)peek();

    return buffered.empty() && in.eof();
  }

  position get_position() const {
    return pos;
  }
};

}

std::vector<Comment> parse_comments(std::istream &input) {

  std::vector<Comment> result;

  for (File in(input); !in.eof();) {

    // string?
    {
      bool is_quote = in.next_is("\"");
      bool is_smart_quote = in.next_is("“");
      if (is_quote || is_smart_quote) {
        // discard the quote starter
        (void)in.read(strlen(is_quote ? "\"" : "“"));
        // swallow the quote itself
        while (!in.eof()) {
          if (in.next_is("\\\"")) {
            (void)in.read(strlen("\\\""));
          } else if (in.next_is("\\”")) {
            (void)in.read(strlen("\\”"));
          } else if (in.next_is("\"")) {
            (void)in.read(strlen("\""));
            break;
          } else if (in.next_is("”")) {
            (void)in.read(strlen("”"));
            break;
          } else {
            (void)in.getchar();
          }
        }
        continue;
      }
    }

    // single line comment?
    if (in.next_is("--")) {
      position begin = in.get_position();
      // discard the comment starter
      (void)in.read(strlen("--"));
      // consume the comment body
      std::ostringstream content;
      while (!in.eof() && !in.next_is("\n"))
        content << in.getchar();
      result.push_back(Comment{content.str(), false, location(begin, in.get_position())});
      continue;

    // multiline comment?
    } else if (in.next_is("/*")) {
      position begin = in.get_position();
      // discard the comment starter
      (void)in.read(strlen("/*"));
      // consume the comment body;
      std::ostringstream content;
      while (!in.eof()) {
        if (in.next_is("*/")) {
          (void)in.read(strlen("*/"));
          break;
        }
        content << in.getchar();
      }
      result.push_back(Comment{content.str(), true, location(begin, in.get_position())});

    // otherwise, something irrelevant
    } else {
      (void)in.getchar();

    }
  }

  return result;
}

}