File: recovery_lpm_fuzzer.cc

package info (click to toggle)
chromium 135.0.7049.95-1~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 5,959,392 kB
  • sloc: cpp: 34,198,526; ansic: 7,100,035; javascript: 3,985,800; python: 1,395,489; asm: 896,754; xml: 722,891; pascal: 180,504; sh: 94,909; perl: 88,388; objc: 79,739; sql: 53,020; cs: 41,358; fortran: 24,137; makefile: 22,501; php: 13,699; tcl: 10,142; yacc: 8,822; ruby: 7,350; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; awk: 197; sed: 36
file content (409 lines) | stat: -rw-r--r-- 14,634 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifdef UNSAFE_BUFFERS_BUILD
// TODO(crbug.com/40285824): Remove this and convert code to safer constructs.
#pragma allow_unsafe_buffers
#endif

// This fuzzer constructs a DB from fuzzer-derived SQL statements and then
// mutates the file with fuzzer-derived XOR masks before exercising recovery.

#include <fuzzer/FuzzedDataProvider.h>

#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <ios>
#include <iostream>
#include <optional>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>

#include "base/check.h"
#include "base/check_op.h"
#include "base/command_line.h"
#include "base/containers/span.h"
#include "base/files/file.h"
#include "base/files/file_enumerator.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/files/scoped_temp_dir.h"
#include "base/logging.h"
#include "base/strings/cstring_view.h"
#include "base/strings/strcat.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
#include "base/strings/string_util.h"
#include "base/test/bind.h"
#include "base/test/scoped_logging_settings.h"
#include "build/buildflag.h"
#include "sql/database.h"
#include "sql/fuzzers/sql_disk_corruption.pb.h"
#include "sql/recovery.h"
#include "sql/statement.h"
#include "sql/test/test_helpers.h"
#include "testing/libfuzzer/proto/lpm_interface.h"
#include "third_party/sqlite/fuzz/sql_query_grammar.pb.h"
#include "third_party/sqlite/fuzz/sql_query_proto_to_string.h"

namespace {

// usage: LPM_ADDITIONAL_ARGS="..." sql_recovery_lpm_fuzzer testcases...
//
// Positional args:
//   testcases                  One or more testcase files to run.
//
// Optional additional args (passed in through the LPM_ADDITIONAL_ARGS
// environment variable):
//   --dump_input               Prints the testcase file to the console in a
//   human readable format.
//   --out_db_path <file path>  Copies the database after it's been mutated to
//   the given path.

std::optional<base::CommandLine> GetCommandLine() {
  char* additional_args = std::getenv("LPM_ADDITIONAL_ARGS");
  if (additional_args == nullptr) {
    return std::nullopt;
  }
  std::vector<std::string> argv = base::SplitString(
      additional_args, " ", base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
#if BUILDFLAG(IS_WIN)
  std::vector<std::wstring> wargv(argv.size());
  std::ranges::transform(
      argv.begin(), argv.end(), wargv.begin(),
      [](std::string str) { return std::wstring(str.begin(), str.end()); });
  return base::CommandLine::FromArgvWithoutProgram(wargv);
#else
  return base::CommandLine::FromArgvWithoutProgram(argv);
#endif
}

// Initializes and manages state shared between fuzzer iterations. Use this to
// interact with global variables, environment variables, the filesystem, etc.
class Environment {
 public:
  Environment()
      : temp_dir_(MakeTempDir()),
        db_path_(GetTempFilePath("db.sqlite")),
        should_dump_input_(std::getenv("LPM_DUMP_NATIVE_INPUT") != nullptr) {
    auto command_line = GetCommandLine();
    if (command_line) {
      should_dump_input_ =
          should_dump_input_ || command_line->HasSwitch("dump_input");
      if (command_line->HasSwitch("out_db_path")) {
        out_db_path_ = MakeAbsoluteFilePath(
                           command_line->GetSwitchValuePath("out_db_path"))
                           .AppendASCII("db")
                           .AddExtensionASCII("sqlite");
      }
    }

    // Logging must be initialized before `ScopedLoggingSettings`. See
    // <https://crbug.com/331909454>.
    logging::InitLogging(logging::LoggingSettings{
        // The default logging destination on Windows is `LOG_TO_FILE`, which
        // would require us to set `LoggingSettings::log_file_path`.
        .logging_dest =
            logging::LOG_TO_SYSTEM_DEBUG_LOG | logging::LOG_TO_STDERR,
    });
    logging::SetMinLogLevel(logging::LOGGING_ERROR);
  }

  ~Environment() { AssertTempDirIsEmpty(); }

  // By convention, the LPM_DUMP_NATIVE_INPUT environment variable indicates
  // that the fuzzer should print its input in a readable format.
  bool should_dump_input() const { return should_dump_input_; }

  // The path to the database's backing file.
  const base::FilePath& db_path() const { return db_path_; }

  // The path the database is copied to after it's been mutated.
  const base::FilePath& out_db_path() const { return out_db_path_; }

  // Deletes the backing file and related journal files.
  void DeleteDbFiles() const {
    CHECK(base::DeleteFile(GetTempFilePath("db.sqlite")));
    CHECK(base::DeleteFile(GetTempFilePath("db.sqlite-journal")));
    CHECK(base::DeleteFile(GetTempFilePath("db.sqlite-wal")));
  }

  void AssertTempDirIsEmpty() const {
    if (base::IsDirectoryEmpty(temp_dir_.GetPath())) {
      return;
    }

    base::FileEnumerator files(temp_dir_.GetPath(), /*recursive=*/true,
                               base::FileEnumerator::FileType::FILES |
                                   base::FileEnumerator::FileType::DIRECTORIES);
    LOG(ERROR) << "Unexpected files or directories in temp dir:";
    files.ForEach(
        [](const base::FilePath& path) { LOG(ERROR) << "  " << path; });
    LOG(FATAL) << "Expected temp dir to be empty: " << temp_dir_.GetPath();
  }

 private:
  static base::ScopedTempDir MakeTempDir() {
#if BUILDFLAG(IS_POSIX) || BUILDFLAG(IS_FUCHSIA)
    base::CommandLine::Init(0, nullptr);
    base::FilePath shmem_temp_dir;
    CHECK(base::GetShmemTempDir(false, &shmem_temp_dir));
    base::ScopedTempDir temp_dir;
    CHECK(temp_dir.CreateUniqueTempDirUnderPath(shmem_temp_dir));
    return temp_dir;
#else
    base::ScopedTempDir temp_dir;
    CHECK(temp_dir.CreateUniqueTempDir());
    return temp_dir;
#endif
  }

  base::FilePath GetTempFilePath(std::string_view name) const {
    return temp_dir_.GetPath().AppendASCII(name);
  }

  base::ScopedTempDir temp_dir_;
  base::FilePath db_path_;
  bool should_dump_input_ = false;
  base::FilePath out_db_path_;
};

// A wrapper around the fuzzer's input proto. Does some preprocessing to map the
// input to a higher-level test case.
class TestCase {
 public:
  // A single mutation instruction.
  struct Mutation {
    int64_t pos;
    uint64_t xor_mask;
  };

  explicit TestCase(const sql_fuzzers::RecoveryFuzzerTestCase& input)
      : strategy_(RecoveryStrategyFromInt(input.recovery_strategy())),
        wal_mode_(input.wal_mode()),
        sql_statement_(sql_fuzzer::SQLQueriesToString(input.queries())),
        sql_statement_after_open_(
            sql_fuzzer::SQLQueriesToString(input.queries_after_open())) {
    // Parse the input's `mutations` map as `Mutation` structs.
    mutations_.reserve(input.mutations_size());
    for (const auto& [pos, xor_mask] : input.mutations()) {
      // Ignore the zero mask because it is XOR's identity value.
      mutations_.emplace_back(pos, xor_mask ? xor_mask : 1);
    }
  }

  sql::Recovery::Strategy strategy() const { return strategy_; }
  bool wal_mode() const { return wal_mode_; }
  base::span<const Mutation> mutations() const { return mutations_; }
  base::cstring_view sql_statement() const { return sql_statement_; }
  base::cstring_view sql_statement_after_open() const {
    return sql_statement_after_open_;
  }

  // Print as a human-readable string.
  std::ostream& Print(std::ostream& os) const {
    os << "Test Case:" << std::endl;
    os << "- strategy: " << DebugFormat(strategy_) << std::endl;
    os << "- wal_mode: " << (wal_mode_ ? "true" : "false") << std::endl;
    os << "- mutations: " << std::endl;
    os << std::hex;
    for (const Mutation& mutation : mutations()) {
      os << "    {pos=0x" << mutation.pos << ", xor_mask=0x"
         << mutation.xor_mask << "}," << std::endl;
    }
    os << std::dec;
    os << "- sql_statement: " << DebugFormat(sql_statement()) << std::endl;
    os << "- sql_statement_after_open: "
       << DebugFormat(sql_statement_after_open()) << std::endl;
    return os;
  }

 private:
  // Converts an arbitrary int to a valid enum value.
  static sql::Recovery::Strategy RecoveryStrategyFromInt(int input);
  // Converts arbitrary bytes in `s` to a human-readable ASCII string.
  // Non-printable characters are hex-escaped.
  static std::string DebugFormat(std::string_view s);
  // Converts the value of `strategy`, which must be a valid enum value, to a
  // human-readable string.
  static constexpr const char* DebugFormat(sql::Recovery::Strategy strategy);

  // Fields parsed from the fuzzer input:
  const sql::Recovery::Strategy strategy_ =
      sql::Recovery::Strategy::kRecoverOrRaze;
  const bool wal_mode_ = false;
  std::vector<Mutation> mutations_;
  const std::string sql_statement_;
  const std::string sql_statement_after_open_;
};

std::ostream& operator<<(std::ostream& os, const TestCase& test_case) {
  return test_case.Print(os);
}

}  // namespace

DEFINE_PROTO_FUZZER(const sql_fuzzers::RecoveryFuzzerTestCase& fuzzer_input) {
  static Environment env;

  // Ignore this input if it includes any "ATTACH DATABASE" queries. These
  // queries may cause SQLite to create files like `file::memory:` in the
  // current working directory, which is undesirable. (See how `AttachDatabase`
  // is handled in //third_party/sqlite/fuzz/sql_query_proto_to_string.cc.)
  //
  // TODO: A slight improvement would be to filter out individual "ATTACH
  // DATABASE" queries rather than throwing away the whole test case.
  if (std::ranges::any_of(fuzzer_input.queries().extra_queries(),
                          &sql_query_grammar::SQLQuery::has_attach_db) ||
      std::ranges::any_of(fuzzer_input.queries_after_open().extra_queries(),
                          &sql_query_grammar::SQLQuery::has_attach_db)) {
    return;
  }

  // The purpose of this fuzzer is to throw *corrupted* database files at the
  // recovery module. If there are no mutations, this test case is out of scope.
  if (fuzzer_input.mutations().empty()) {
    return;
  }

  TestCase test_case(fuzzer_input);

  if (env.should_dump_input()) {
    std::cout << test_case;
  }

  sql::Database database(
      sql::DatabaseOptions().set_wal_mode(test_case.wal_mode()),
      sql::test::kTestTag);
  CHECK(database.Open(env.db_path()));

  // Bootstrap the database with SQL queries derived from `fuzzer_input`.
  {
    // SQLite may warn us about errors in these queries, e.g. "unknown database
    // foo". Temporarily silence those warnings.
    logging::ScopedLoggingSettings scoped_logging;
    logging::SetMinLogLevel(logging::LOGGING_FATAL);
    std::ignore = database.Execute(test_case.sql_statement());
  }
  database.Close();

  // Mutate the backing file. Skip the expensive file operations when there are
  // no bytes to mutate.
  std::optional<int64_t> file_length = GetFileSize(env.db_path());
  if (*file_length > 0) {
    base::File file(env.db_path(), base::File::FLAG_OPEN |
                                       base::File::FLAG_READ |
                                       base::File::FLAG_WRITE);
    // Apply each mutation without sorting by file position. These random-access
    // file operations could be a performance bottleneck if the temp directory
    // is on a physical disk.
    for (TestCase::Mutation mutation : test_case.mutations()) {
      // File read/write operations expect positions to point within the file.
      mutation.pos %= *file_length;
      if (mutation.pos < 0) {
        mutation.pos = 0;
      }

      uint64_t buf = 0;
      const int num_read =
          file.Read(mutation.pos, reinterpret_cast<char*>(&buf), sizeof(buf));
      CHECK_NE(num_read, -1);
      if (num_read == 0) {
        continue;
      }

      buf ^= mutation.xor_mask;

      // Write `buf` back to the file, being careful not to add bytes to the
      // file that did not exist before.
      CHECK_NE(
          file.Write(mutation.pos, reinterpret_cast<char*>(&buf), num_read),
          -1);
    }
    CHECK_EQ(*file_length, file.GetLength());
  }

  if (!env.out_db_path().empty()) {
    base::CopyFile(env.db_path(), env.out_db_path());
  }

  bool attempted_recovery = false;
  auto error_callback =
      base::BindLambdaForTesting([&](int extended_error, sql::Statement*) {
        if (!attempted_recovery) {
          attempted_recovery = sql::Recovery::RecoverIfPossible(
              &database, extended_error, test_case.strategy());
        }
      });
  database.set_error_callback(std::move(error_callback));

  // Reopen the database after potentially corrupting the file. This may run
  // the error callback.
  const bool opened = database.Open(env.db_path());
  if (opened) {
    logging::ScopedLoggingSettings scoped_logging;
    logging::SetMinLogLevel(logging::LOGGING_FATAL);
    std::ignore = database.Execute(test_case.sql_statement_after_open());

    database.Close();
  }

  // Delete the backing file to prepare for the next iteration.
  env.DeleteDbFiles();
  // Ensure that no unexpected files were created in the temp directory.
  env.AssertTempDirIsEmpty();
}

namespace {

sql::Recovery::Strategy TestCase::RecoveryStrategyFromInt(int input) {
  static_assert(
      std::is_same_v<std::underlying_type<sql::Recovery::Strategy>::type,
                     decltype(input)>,
      "sql::Recovery::Strategy's underlying type must match the input");

  const auto strategy = static_cast<sql::Recovery::Strategy>(input);

  // Ensure that we remember to update the fuzzer if more strategies are added.
  switch (strategy) {
    case sql::Recovery::Strategy::kRecoverOrRaze:
    case sql::Recovery::Strategy::kRecoverWithMetaVersionOrRaze:
      return strategy;
  }
  // When `input` is out of range, return a default value.
  return sql::Recovery::Strategy::kRecoverOrRaze;
}

std::string TestCase::DebugFormat(std::string_view s) {
  std::string out;
  out.reserve(s.length() + 2);
  out.push_back('"');
  for (char c : s) {
    if (base::IsAsciiPrintable(c)) {
      out.push_back(c);
    } else {
      out.push_back('\\');
      out.push_back('x');
      base::AppendHexEncodedByte(static_cast<uint8_t>(c), /*output=*/out);
    }
  }
  out.push_back('"');
  return out;
}

constexpr const char* TestCase::DebugFormat(sql::Recovery::Strategy strategy) {
  switch (strategy) {
    case sql::Recovery::Strategy::kRecoverOrRaze:
      return "kRecoverOrRaze";
    case sql::Recovery::Strategy::kRecoverWithMetaVersionOrRaze:
      return "kRecoverWithMetaVersionOrRaze";
  }
}

}  // namespace