File: strings_tool.cpp

package info (click to toggle)
freespace2 24.2.0%2Brepack-3
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 43,740 kB
  • sloc: cpp: 595,005; ansic: 21,741; python: 1,174; sh: 457; makefile: 243; xml: 181
file content (292 lines) | stat: -rw-r--r-- 8,675 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
// Declares clang::SyntaxOnlyAction.
#include <cstdint>
#include <iostream>
#include <mutex>
#include <set>

#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/Frontend/CompilerInstance.h"
#include "clang/Frontend/CompilerInvocation.h"
#include "clang/Frontend/FrontendActions.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"
// Declares llvm::cl::extrahelp.
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Path.h"

#ifdef USE_BOOST_REGEX
#include <boost/regex.hpp>
namespace regex = boost;
#else
#include <regex>
namespace regex = std::regex;
#endif

using namespace clang;
using namespace clang::tooling;
using namespace clang::ast_matchers;
using namespace llvm;

void replaceAll(std::string& str, const std::string& from, const std::string& to) {
	size_t start_pos = 0;
	while ((start_pos = str.find(from, start_pos)) != std::string::npos) {
		str.replace(start_pos, from.length(), to);
		start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
	}
}

// Apply a custom category to all command-line options so that they are the
// only ones displayed.
static llvm::cl::OptionCategory StringsToolCategory("strings_tool options");

// CommonOptionsParser declares HelpMessage with a description of the common
// command-line options related to the compilation database and input files.
// It's nice to have this help message in all tools.
static cl::extrahelp CommonHelp(CommonOptionsParser::HelpMessage);

// A help message for this specific tool can be added afterwards.
static cl::extrahelp MoreHelp("\nExtracts translatable strings (in the form of XSTR calls) from the source code and "
                              "writes them in the strings.tbl format.\n");

auto xstrMatcher = callExpr(callee(functionDecl(hasName("XSTR"))), hasArgument(0, stringLiteral().bind("text")),
                            hasArgument(1, integerLiteral().bind("id")))
                       .bind("xstrCall");

auto uiXstrMatcher =
    initListExpr(hasType(cxxRecordDecl(hasName("::UI_XSTR"))), has(ignoringParenImpCasts(stringLiteral().bind("text"))),
                 has(integerLiteral().bind("id")))
        .bind("uiXstr");

std::string getEscapedStringLiteralContent(const clang::StringLiteral* lit) {
	if (!lit->isAscii() && !lit->isUTF8()) {
		// We only support strings with ASCII/Utf-8 encoding
		return "";
	}

	auto text_str = lit->getString().str();

	// Escape new lines
	replaceAll(text_str, "\n", "\\n");
	// Escape tab characters
	replaceAll(text_str, "\t", "\\t");
	// Escape string characters
	replaceAll(text_str, "\"", "\\\"");

	return text_str;
}

int64_t getIntValue(const clang::IntegerLiteral* lit) {
	if (!lit->getValue().isSignedIntN(64)) {
		// Only signed int of up to 64-bit are supported
		throw std::runtime_error("Only up to 64-bit integers are supported!");
	}

	return lit->getValue().getSExtValue();
}

std::string getSourceFileName(SourceLocation loc, SourceManager* manager) {
	if (loc.isMacroID()) {
		// Get the start/end expansion locations
		std::pair<SourceLocation, SourceLocation> expansionRange = manager->getExpansionRange(loc);

		// We're just interested in the start location
		loc = expansionRange.first;
	}

	return manager->getFilename(loc).str();
}

class XstrManager {

  public:
	struct XstrInfo {
		int64_t id;
		clang::SourceRange def;
		std::string text;
		std::string file;

		bool operator<(const XstrInfo& other) {
			if (file != other.file) {
				return file < other.file;
			}
			return id < other.id;
		}
	};

  public:
	void addXstr(int64_t id, const std::string& text, ASTContext* ctx, const clang::Stmt* sourceStmt) {
		if (id < 0) {
			// Ignore invalid ids
			return;
		}

		std::unique_lock<std::mutex> guard(_mapping_lock);

		auto iter = _mapping.find(id);

		XstrInfo info;
		info.id   = id;
		info.def  = sourceStmt->getSourceRange();
		info.text = text;
		info.file = getSourceFileName(sourceStmt->getLocStart(), &ctx->getSourceManager());

		if (iter == _mapping.end()) {
			_mapping.emplace(id, info);
			return;
		}

		if (iter->second.text != text) {
			ctx->getDiagnostics().Report(sourceStmt->getLocStart(),
			                             ctx->getDiagnostics().getDiagnosticIDs()->getCustomDiagID(
			                                 DiagnosticIDs::Warning,
			                                 "Found duplicate XSTR id usage with "
			                                 "different text! Previous text was: "
			                                 "\"%0\" but new text is \"%1\". Previous text was used in \"%2\"."))
			    << iter->second.text << text << iter->second.file;
		}
	}

	const std::vector<XstrInfo> getStrings() const {
		std::vector<XstrInfo> out;
		for (auto& entry : _mapping) {
			out.push_back(entry.second);
		}
		std::sort(out.begin(), out.end());
		return out;
	}

  private:
	std::mutex _mapping_lock;
	std::unordered_map<int64_t, XstrInfo> _mapping;
};

class UiXstrPrinter : public MatchFinder::MatchCallback {
	XstrManager* _manager = nullptr;

  public:
	UiXstrPrinter(XstrManager* manager) : _manager(manager) {}

	void run(const MatchFinder::MatchResult& Result) override {
		auto varDecls = Result.Nodes.getNodeAs<clang::InitListExpr>("uiXstr");
		auto text_lit = Result.Nodes.getNodeAs<clang::StringLiteral>("text");
		auto id_lit   = Result.Nodes.getNodeAs<clang::IntegerLiteral>("id");

		auto text = getEscapedStringLiteralContent(text_lit);
		int64_t id;
		try {
			id = getIntValue(id_lit);
		} catch (const std::runtime_error&) {
			return;
		}

		_manager->addXstr(id, text, Result.Context, varDecls);
	}
};

class XstrPrinter : public MatchFinder::MatchCallback {
	XstrManager* _manager = nullptr;

  public:
	XstrPrinter(XstrManager* manager) : _manager(manager) {}

	void run(const MatchFinder::MatchResult& Result) override {
		auto call_expr = Result.Nodes.getNodeAs<clang::CallExpr>("xstrCall");
		auto text_lit  = Result.Nodes.getNodeAs<clang::StringLiteral>("text");
		auto id_lit    = Result.Nodes.getNodeAs<clang::IntegerLiteral>("id");

		auto text = getEscapedStringLiteralContent(text_lit);
		if (text.empty()) {
			// We only support strings with ASCII/Utf-8 encoding
			return;
		}

		int64_t id;
		try {
			id = getIntValue(id_lit);
		} catch (const std::runtime_error&) {
			return;
		}

		_manager->addXstr(id, text, Result.Context, call_expr);
	}
};

class DiagnosticLogger : public SourceFileCallbacks {
  public:
	bool handleBeginSource(CompilerInstance& CI) override {
		for (auto& input : CI.getFrontendOpts().Inputs) {
			std::cerr << "Processing \"" << input.getFile().str() << "\"\n";
		}
		return true;
	}
};

std::vector<std::string> resolveRegexes(const CompilationDatabase& compilations,
                                        const std::vector<std::string>& regexes) {
	std::set<std::string> allFiles;
	auto compilationFiles = compilations.getAllFiles();

	for (auto& regex : regexes) {
		try {
			regex::regex file_regex(regex);

			for (auto& file : compilationFiles) {
				if (regex::regex_match(file, file_regex)) {
					allFiles.emplace(file);
				}
			}
		} catch (regex::regex_error& e) {
			// Syntax error in the regular expression
			std::cerr << "Skipping bad regex '" << regex << "': " << e.what() << std::endl;
		}
	}

	std::vector<std::string> outVec;
	std::copy(allFiles.begin(), allFiles.end(), std::back_inserter(outVec));
	return outVec;
}

void write_in_table_syntax(std::ostream& out, const std::vector<XstrManager::XstrInfo>& strings) {
	std::string last_file;

	out << "#English";

	for (auto& info : strings) {
		if (info.file != last_file) {
			out << "\n";
			out << ";-------------------------------------------------\n";
			out << "; File: " << llvm::sys::path::filename(info.file).str() << "\n";
			out << ";-------------------------------------------------\n";
			out << "\n";
			last_file = info.file;
		}

		out << info.id << ", \"" << info.text << "\"\n";
	}
}

int main(int argc, const char** argv) {
	CommonOptionsParser OptionsParser(argc, argv, StringsToolCategory, llvm::cl::OneOrMore);

	std::vector<std::string> resolved_files =
	    resolveRegexes(OptionsParser.getCompilations(), OptionsParser.getSourcePathList());

	ClangTool Tool(OptionsParser.getCompilations(), resolved_files);

	XstrManager xstrManager;

	XstrPrinter Printer(&xstrManager);
	UiXstrPrinter uiPrinter(&xstrManager);

	MatchFinder Finder;
	Finder.addMatcher(xstrMatcher, &Printer);
	Finder.addMatcher(uiXstrMatcher, &uiPrinter);

	DiagnosticLogger logger;
	auto res = Tool.run(newFrontendActionFactory(&Finder, &logger).get());

	write_in_table_syntax(std::cout, xstrManager.getStrings());

	return res;
}