1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
|
//===--- Format.cpp -----------------------------------------*- C++-*------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Format.h"
#include "support/Logger.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Format/Format.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/Support/Unicode.h"
namespace clang {
namespace clangd {
namespace {
/// Append closing brackets )]} to \p Code to make it well-formed.
/// Clang-format conservatively refuses to format files with unmatched brackets
/// as it isn't sure where the errors are and so can't correct.
/// When editing, it's reasonable to assume code before the cursor is complete.
void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
SourceManagerForFile FileSM("mock_file.cpp", Code);
auto &SM = FileSM.get();
FileID FID = SM.getMainFileID();
Lexer Lex(FID, SM.getBufferOrFake(FID), SM,
format::getFormattingLangOpts(Style));
Token Tok;
std::vector<char> Brackets;
while (!Lex.LexFromRawLexer(Tok)) {
switch(Tok.getKind()) {
case tok::l_paren:
Brackets.push_back(')');
break;
case tok::l_brace:
Brackets.push_back('}');
break;
case tok::l_square:
Brackets.push_back(']');
break;
case tok::r_paren:
if (!Brackets.empty() && Brackets.back() == ')')
Brackets.pop_back();
break;
case tok::r_brace:
if (!Brackets.empty() && Brackets.back() == '}')
Brackets.pop_back();
break;
case tok::r_square:
if (!Brackets.empty() && Brackets.back() == ']')
Brackets.pop_back();
break;
default:
continue;
}
}
// Attempt to end any open comments first.
Code.append("\n// */\n");
Code.append(Brackets.rbegin(), Brackets.rend());
}
static StringRef commentMarker(llvm::StringRef Line) {
for (StringRef Marker : {"///", "//"}){
auto I = Line.rfind(Marker);
if (I != StringRef::npos)
return Line.substr(I, Marker.size());
}
return "";
}
llvm::StringRef firstLine(llvm::StringRef Code) {
return Code.take_until([](char C) { return C == '\n'; });
}
llvm::StringRef lastLine(llvm::StringRef Code) {
llvm::StringRef Rest = Code;
while (!Rest.empty() && Rest.back() != '\n')
Rest = Rest.drop_back();
return Code.substr(Rest.size());
}
// Filename is needed for tooling::Replacement and some overloads of reformat().
// Its value should not affect the outcome. We use the default from reformat().
llvm::StringRef Filename = "<stdin>";
// tooling::Replacement from overlapping StringRefs: From must be part of Code.
tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
llvm::StringRef To) {
assert(From.begin() >= Code.begin() && From.end() <= Code.end());
// The filename is required but ignored.
return tooling::Replacement(Filename, From.data() - Code.data(),
From.size(), To);
}
// High-level representation of incremental formatting changes.
// The changes are made in two steps.
// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
// comment markers when splitting a line comment with a newline).
// 2) a selective clang-format run:
// - the "source code" passed to clang format is the code up to the cursor,
// a placeholder for the cursor, and some closing brackets
// - the formatting is restricted to the cursor and (possibly) other ranges
// (e.g. the old line when inserting a newline).
// - changes before the cursor are applied, those after are discarded.
struct IncrementalChanges {
// Changes that should be applied before running clang-format.
tooling::Replacements Changes;
// Ranges of the original source code that should be clang-formatted.
// The CursorProxyText will also be formatted.
std::vector<tooling::Range> FormatRanges;
// The source code that should stand in for the cursor when clang-formatting.
// e.g. after inserting a newline, a line-comment at the cursor is used to
// ensure that the newline is preserved.
std::string CursorPlaceholder;
};
// After a newline:
// - we continue any line-comment that was split
// - we format the old line in addition to the cursor
// - we represent the cursor with a line comment to preserve the newline
IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
unsigned Cursor) {
IncrementalChanges Result;
// Before newline, code looked like:
// leading^trailing
// After newline, code looks like:
// leading
// indentation^trailing
// Where indentation was added by the editor.
StringRef Trailing = firstLine(Code.substr(Cursor));
StringRef Indentation = lastLine(Code.take_front(Cursor));
if (Indentation.data() == Code.data()) {
vlog("Typed a newline, but we're still on the first line!");
return Result;
}
StringRef Leading =
lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
// Strip leading whitespace on trailing line.
StringRef TrailingTrim = Trailing.ltrim();
if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
cantFail(Result.Changes.add(
replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
// If we split a comment, replace indentation with a comment marker.
// If the editor made the new line a comment, also respect that.
StringRef CommentMarker = commentMarker(Leading);
bool NewLineIsComment = !commentMarker(Indentation).empty();
if (!CommentMarker.empty() &&
(NewLineIsComment || !commentMarker(NextLine).empty() ||
(!TrailingTrim.empty() && !TrailingTrim.startswith("//")))) {
using llvm::sys::unicode::columnWidthUTF8;
// We indent the new comment to match the previous one.
StringRef PreComment =
Leading.take_front(CommentMarker.data() - Leading.data());
std::string IndentAndComment =
(std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ")
.str();
cantFail(
Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
} else {
// Remove any indentation and let clang-format re-add it.
// This prevents the cursor marker dragging e.g. an aligned comment with it.
cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
}
// If we put a the newline inside a {} pair, put } on its own line...
if (CommentMarker.empty() && Leading.endswith("{") &&
Trailing.startswith("}")) {
cantFail(
Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
// ...and format it.
Result.FormatRanges.push_back(
tooling::Range(Trailing.data() - Code.data() + 1, 1));
}
// Format the whole leading line.
Result.FormatRanges.push_back(
tooling::Range(Leading.data() - Code.data(), Leading.size()));
// We use a comment to represent the cursor, to preserve the newline.
// A trailing identifier improves parsing of e.g. for without braces.
// Exception: if the previous line has a trailing comment, we can't use one
// as the cursor (they will be aligned). But in this case we don't need to.
Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
return Result;
}
IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
llvm::StringRef InsertedText) {
IncrementalChanges Result;
if (InsertedText == "\n")
return getIncrementalChangesAfterNewline(Code, Cursor);
Result.CursorPlaceholder = " /**/";
return Result;
}
// Returns equivalent replacements that preserve the correspondence between
// OldCursor and NewCursor. If OldCursor lies in a replaced region, that
// replacement will be split.
std::vector<tooling::Replacement>
split(const tooling::Replacements &Replacements, unsigned OldCursor,
unsigned NewCursor) {
std::vector<tooling::Replacement> Result;
int LengthChange = 0;
for (const tooling::Replacement &R : Replacements) {
if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
Result.push_back(R);
LengthChange += R.getReplacementText().size() - R.getLength();
} else if (R.getOffset() < OldCursor) { // overlaps cursor
int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
assert(ReplacementSplit >= 0 &&
ReplacementSplit <= int(R.getReplacementText().size()) &&
"NewCursor incompatible with OldCursor!");
Result.push_back(tooling::Replacement(
R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
R.getReplacementText().take_front(ReplacementSplit)));
Result.push_back(tooling::Replacement(
R.getFilePath(), OldCursor,
R.getLength() - (OldCursor - R.getOffset()),
R.getReplacementText().drop_front(ReplacementSplit)));
} else if (R.getOffset() >= OldCursor) { // after cursor
Result.push_back(R);
}
}
return Result;
}
} // namespace
// We're simulating the following sequence of changes:
// - apply the pre-formatting edits (see getIncrementalChanges)
// - insert a placeholder for the cursor
// - format some of the resulting code
// - remove the cursor placeholder again
// The replacements we return are produced by composing these.
//
// The text we actually pass to clang-format is slightly different from this,
// e.g. we have to close brackets. We ensure these differences are *after*
// all the regions we want to format, and discard changes in them.
std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
llvm::StringRef InsertedText, format::FormatStyle Style) {
IncrementalChanges Incremental =
getIncrementalChanges(OriginalCode, OriginalCursor, InsertedText);
// Never *remove* lines in response to pressing enter! This annoys users.
if (InsertedText == "\n") {
Style.MaxEmptyLinesToKeep = 1000;
Style.KeepEmptyLinesAtTheStartOfBlocks = true;
}
// Compute the code we want to format:
// 1) Start with code after the pre-formatting edits.
std::string CodeToFormat = cantFail(
tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
// 2) Truncate code after the last interesting range.
unsigned FormatLimit = Cursor;
for (tooling::Range &R : Incremental.FormatRanges)
FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
CodeToFormat.resize(FormatLimit);
// 3) Insert a placeholder for the cursor.
CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
// 4) Append brackets after FormatLimit so the code is well-formed.
closeBrackets(CodeToFormat, Style);
// Determine the ranges to format:
std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
// Ranges after the cursor need to be adjusted for the placeholder.
for (auto &R : RangesToFormat) {
if (R.getOffset() > Cursor)
R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
R.getLength());
}
// We also format the cursor.
RangesToFormat.push_back(
tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
// Also update FormatLimit for the placeholder, we'll use this later.
FormatLimit += Incremental.CursorPlaceholder.size();
// Run clang-format, and truncate changes at FormatLimit.
tooling::Replacements FormattingChanges;
format::FormattingAttemptStatus Status;
for (const tooling::Replacement &R : format::reformat(
Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
cantFail(FormattingChanges.add(R));
else if(R.getOffset() < FormatLimit) { // Overlaps limit.
if (R.getReplacementText().empty()) // Deletions are easy to handle.
cantFail(FormattingChanges.add(tooling::Replacement(Filename,
R.getOffset(), FormatLimit - R.getOffset(), "")));
else
// Hopefully won't happen in practice?
elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
Cursor, CodeToFormat);
}
}
if (!Status.FormatComplete)
vlog("Incremental format incomplete at line {0}", Status.Line);
// Now we are ready to compose the changes relative to OriginalCode.
// edits -> insert placeholder -> format -> remove placeholder.
// We must express insert/remove as Replacements.
tooling::Replacements InsertCursorPlaceholder(
tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
unsigned FormattedCursorStart =
FormattingChanges.getShiftedCodePosition(Cursor),
FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
Cursor + Incremental.CursorPlaceholder.size());
tooling::Replacements RemoveCursorPlaceholder(
tooling::Replacement(Filename, FormattedCursorStart,
FormattedCursorEnd - FormattedCursorStart, ""));
// We can't simply merge() and return: tooling::Replacements will combine
// adjacent edits left and right of the cursor. This gives the right source
// code, but loses information about where the cursor is!
// Fortunately, none of the individual passes lose information, so:
// - we use merge() to compute the final Replacements
// - we chain getShiftedCodePosition() to compute final cursor position
// - we split the final Replacements at the cursor position, so that
// each Replacement lies either before or after the cursor.
tooling::Replacements Final;
unsigned FinalCursor = OriginalCursor;
#ifndef NDEBUG
std::string FinalCode = std::string(OriginalCode);
dlog("Initial code: {0}", FinalCode);
#endif
for (auto Pass :
std::vector<std::pair<const char *, const tooling::Replacements *>>{
{"Pre-formatting changes", &Incremental.Changes},
{"Insert placeholder", &InsertCursorPlaceholder},
{"clang-format", &FormattingChanges},
{"Remove placeholder", &RemoveCursorPlaceholder}}) {
Final = Final.merge(*Pass.second);
FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
#ifndef NDEBUG
FinalCode =
cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
dlog("After {0}:\n{1}^{2}", Pass.first,
StringRef(FinalCode).take_front(FinalCursor),
StringRef(FinalCode).drop_front(FinalCursor));
#endif
}
return split(Final, OriginalCursor, FinalCursor);
}
unsigned
transformCursorPosition(unsigned Offset,
const std::vector<tooling::Replacement> &Replacements) {
unsigned OriginalOffset = Offset;
for (const auto &R : Replacements) {
if (R.getOffset() + R.getLength() <= OriginalOffset) {
// Replacement is before cursor.
Offset += R.getReplacementText().size();
Offset -= R.getLength();
} else if (R.getOffset() < OriginalOffset) {
// Replacement overlaps cursor.
// Preserve position within replacement text, as far as possible.
unsigned PositionWithinReplacement = Offset - R.getOffset();
if (PositionWithinReplacement > R.getReplacementText().size()) {
Offset += R.getReplacementText().size();
Offset -= PositionWithinReplacement;
}
} else {
// Replacement after cursor.
break; // Replacements are sorted, the rest are also after the cursor.
}
}
return Offset;
}
} // namespace clangd
} // namespace clang
|