File: CheckedIndex.swift

package info (click to toggle)
swiftlang 6.1.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,791,604 kB
  • sloc: cpp: 9,901,740; ansic: 2,201,431; asm: 1,091,827; python: 308,252; objc: 82,166; f90: 80,126; lisp: 38,358; pascal: 25,559; sh: 20,429; ml: 5,058; perl: 4,745; makefile: 4,484; awk: 3,535; javascript: 3,018; xml: 918; fortran: 664; cs: 573; ruby: 396
file content (520 lines) | stat: -rw-r--r-- 21,426 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2020 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#if compiler(>=6)
import Foundation
@preconcurrency package import IndexStoreDB
package import LanguageServerProtocol
import SKLogging
import SwiftExtensions
#else
import Foundation
@preconcurrency import IndexStoreDB
import LanguageServerProtocol
import SKLogging
import SwiftExtensions
#endif

/// Essentially a `DocumentManager` from the `SourceKitLSP` module.
///
/// Protocol is needed because the `SemanticIndex` module is lower-level than the `SourceKitLSP` module.
package protocol InMemoryDocumentManager {
  /// Returns true if the file at the given URI has a different content in the document manager than on-disk. This is
  /// the case if the user made edits to the file but didn't save them yet.
  func fileHasInMemoryModifications(_ uri: DocumentURI) -> Bool
}

package enum IndexCheckLevel {
  /// Consider the index out-of-date only if the source file has been deleted on disk.
  ///
  /// This is usually a good default because: When a file gets modified, it's likely that some of the line:column
  /// locations in it are still correct – eg. if only one line is modified and if lines are inserted/deleted all
  /// locations above are still correct.
  /// For locations that are out of date, showing stale results is one of the best ways of communicating to the user
  /// that the index is out-of-date and that they need to rebuild. We might want to reconsider this default when we have
  /// background indexing.
  case deletedFiles

  /// Consider the index out-of-date if the source file has been deleted or modified on disk.
  case modifiedFiles

  /// Consider the index out-of-date if the source file has been deleted or modified on disk or if there are
  /// in-memory modifications in the given `DocumentManager`.
  case inMemoryModifiedFiles(InMemoryDocumentManager)
}

/// A wrapper around `IndexStoreDB` that checks if returned symbol occurrences are up-to-date with regard to a
/// `IndexCheckLevel`.
///
/// - SeeAlso: Comment on `IndexOutOfDateChecker`
package final class CheckedIndex {
  private var checker: IndexOutOfDateChecker
  private let index: IndexStoreDB

  /// Maps the USR of a symbol to its name and the name of all its containers, from outermost to innermost.
  ///
  /// It is important that we cache this because we might find a lot of symbols in the same container for eg. workspace
  /// symbols (eg. consider many symbols in the same C++ namespace). If we didn't cache this value, then we would need
  /// to perform a `primaryDefinitionOrDeclarationOccurrence` lookup for all of these containers, which is expensive.
  ///
  /// Since we don't expect `CheckedIndex` to be outlive a single request it is acceptable to cache these results
  /// without having any invalidation logic (similar to how we don't invalide results cached in
  /// `IndexOutOfDateChecker`).
  ///
  /// ### Examples
  /// If we have
  /// ```swift
  /// struct Foo {}
  /// ``` then
  /// `containerNamesCache[<usr of Foo>]` will be `["Foo"]`.
  ///
  /// If we have
  /// ```swift
  /// struct Bar {
  ///   struct Foo {}
  /// }
  /// ```, then
  /// `containerNamesCache[<usr of Foo>]` will be `["Bar", "Foo"]`.
  private var containerNamesCache: [String: [String]] = [:]

  fileprivate init(index: IndexStoreDB, checkLevel: IndexCheckLevel) {
    self.index = index
    self.checker = IndexOutOfDateChecker(checkLevel: checkLevel)
  }

  package var unchecked: UncheckedIndex {
    return UncheckedIndex(index)
  }

  @discardableResult
  package func forEachSymbolOccurrence(
    byUSR usr: String,
    roles: SymbolRole,
    _ body: (SymbolOccurrence) -> Bool
  ) -> Bool {
    index.forEachSymbolOccurrence(byUSR: usr, roles: roles) { occurrence in
      guard self.checker.isUpToDate(occurrence.location) else {
        return true  // continue
      }
      return body(occurrence)
    }
  }

  package func occurrences(ofUSR usr: String, roles: SymbolRole) -> [SymbolOccurrence] {
    return index.occurrences(ofUSR: usr, roles: roles).filter { checker.isUpToDate($0.location) }
  }

  package func occurrences(relatedToUSR usr: String, roles: SymbolRole) -> [SymbolOccurrence] {
    return index.occurrences(relatedToUSR: usr, roles: roles).filter { checker.isUpToDate($0.location) }
  }

  @discardableResult package func forEachCanonicalSymbolOccurrence(
    containing pattern: String,
    anchorStart: Bool,
    anchorEnd: Bool,
    subsequence: Bool,
    ignoreCase: Bool,
    body: (SymbolOccurrence) -> Bool
  ) -> Bool {
    index.forEachCanonicalSymbolOccurrence(
      containing: pattern,
      anchorStart: anchorStart,
      anchorEnd: anchorEnd,
      subsequence: subsequence,
      ignoreCase: ignoreCase
    ) { occurrence in
      guard self.checker.isUpToDate(occurrence.location) else {
        return true  // continue
      }
      return body(occurrence)
    }
  }

  package func symbols(inFilePath path: String) -> [Symbol] {
    guard self.hasUpToDateUnit(for: DocumentURI(filePath: path, isDirectory: false)) else {
      return []
    }
    return index.symbols(inFilePath: path)
  }

  /// Returns all unit test symbol in unit files that reference one of the main files in `mainFilePaths`.
  package func unitTests(referencedByMainFiles mainFilePaths: [String]) -> [SymbolOccurrence] {
    return index.unitTests(referencedByMainFiles: mainFilePaths).filter { checker.isUpToDate($0.location) }
  }

  /// Returns all the files that (transitively) include the header file at the given path.
  ///
  /// If `crossLanguage` is set to `true`, Swift files that import a header through a module will also be reported.
  package func mainFilesContainingFile(uri: DocumentURI, crossLanguage: Bool = false) -> [DocumentURI] {
    return index.mainFilesContainingFile(path: uri.pseudoPath, crossLanguage: crossLanguage).compactMap {
      let uri = DocumentURI(filePath: $0, isDirectory: false)
      guard checker.indexHasUpToDateUnit(for: uri, mainFile: nil, index: self.index) else {
        return nil
      }
      return uri
    }
  }

  /// Returns all unit test symbols in the index.
  package func unitTests() -> [SymbolOccurrence] {
    return index.unitTests().filter { checker.isUpToDate($0.location) }
  }

  /// Return `true` if a unit file has been indexed for the given file path after its last modification date.
  ///
  /// This means that at least a single build configuration of this file has been indexed since its last modification.
  ///
  /// If `mainFile` is passed, then `url` is a header file that won't have a unit associated with it. `mainFile` is
  /// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit
  /// for `mainFile` is newer than the mtime of the header file at `url`.
  package func hasUpToDateUnit(for uri: DocumentURI, mainFile: DocumentURI? = nil) -> Bool {
    return checker.indexHasUpToDateUnit(for: uri, mainFile: mainFile, index: index)
  }

  /// Returns true if the file at the given URI has a different content in the document manager than on-disk. This is
  /// the case if the user made edits to the file but didn't save them yet.
  ///
  /// - Important: This must only be called on a `CheckedIndex` with a `checkLevel` of `inMemoryModifiedFiles`
  package func fileHasInMemoryModifications(_ uri: DocumentURI) -> Bool {
    return checker.fileHasInMemoryModifications(uri)
  }

  /// If there are any definition occurrences of the given USR, return these.
  /// Otherwise return declaration occurrences.
  package func definitionOrDeclarationOccurrences(ofUSR usr: String) -> [SymbolOccurrence] {
    let definitions = occurrences(ofUSR: usr, roles: [.definition])
    if !definitions.isEmpty {
      return definitions
    }
    return occurrences(ofUSR: usr, roles: [.declaration])
  }

  /// Find a `SymbolOccurrence` that is considered the primary definition of the symbol with the given USR.
  ///
  /// If the USR has an ambiguous definition, the most important role of this function is to deterministically return
  /// the same result every time.
  package func primaryDefinitionOrDeclarationOccurrence(ofUSR usr: String) -> SymbolOccurrence? {
    let result = definitionOrDeclarationOccurrences(ofUSR: usr).sorted().first
    if result == nil {
      logger.error("Failed to find definition of \(usr) in index")
    }
    return result
  }

  /// The names of all containers the symbol is contained in, from outermost to innermost.
  ///
  /// ### Examples
  /// In the following, the container names of `test` are `["Foo"]`.
  /// ```swift
  /// struct Foo {
  ///   func test() {}
  /// }
  /// ```
  ///
  /// In the following, the container names of `test` are `["Bar", "Foo"]`.
  /// ```swift
  /// struct Bar {
  ///   struct Foo {
  ///     func test() {}
  ///   }
  /// }
  /// ```
  package func containerNames(of symbol: SymbolOccurrence) -> [String] {
    // The container name of accessors is the container of the surrounding variable.
    let accessorOf = symbol.relations.filter { $0.roles.contains(.accessorOf) }
    if let primaryVariable = accessorOf.sorted().first {
      if accessorOf.count > 1 {
        logger.fault("Expected an occurrence to an accessor of at most one symbol, not multiple")
      }
      if let primaryVariable = primaryDefinitionOrDeclarationOccurrence(ofUSR: primaryVariable.symbol.usr) {
        return containerNames(of: primaryVariable)
      }
    }

    let containers = symbol.relations.filter { $0.roles.contains(.childOf) }
    if containers.count > 1 {
      logger.fault("Expected an occurrence to a child of at most one symbol, not multiple")
    }
    let container = containers.filter {
      switch $0.symbol.kind {
      case .module, .namespace, .enum, .struct, .class, .protocol, .extension, .union:
        return true
      case .unknown, .namespaceAlias, .macro, .typealias, .function, .variable, .field, .enumConstant,
        .instanceMethod, .classMethod, .staticMethod, .instanceProperty, .classProperty, .staticProperty, .constructor,
        .destructor, .conversionFunction, .parameter, .using, .concept, .commentTag:
        return false
      }
    }.sorted().first

    guard var containerSymbol = container?.symbol else {
      return []
    }
    if let cached = containerNamesCache[containerSymbol.usr] {
      return cached
    }

    if containerSymbol.kind == .extension,
      let extendedSymbol = self.occurrences(relatedToUSR: containerSymbol.usr, roles: .extendedBy).first?.symbol
    {
      containerSymbol = extendedSymbol
    }
    let result: [String]

    // Use `forEachSymbolOccurrence` instead of `primaryDefinitionOrDeclarationOccurrence` to get a symbol occurrence
    // for the container because it can be significantly faster: Eg. when searching for a C++ namespace (such as `llvm`),
    // it may be declared in many files. Finding the canonical definition means that we would need to scan through all
    // of these files. But we expect all all of these declarations to have the same parent container names and we don't
    // care about locations here.
    var containerDefinition: SymbolOccurrence?
    forEachSymbolOccurrence(byUSR: containerSymbol.usr, roles: [.definition, .declaration]) { occurrence in
      containerDefinition = occurrence
      return false  // stop iteration
    }
    if let containerDefinition {
      result = self.containerNames(of: containerDefinition) + [containerSymbol.name]
    } else {
      result = [containerSymbol.name]
    }
    containerNamesCache[containerSymbol.usr] = result
    return result
  }
}

/// A wrapper around `IndexStoreDB` that allows the retrieval of a `CheckedIndex` with a specified check level or the
/// access of the underlying `IndexStoreDB`. This makes sure that accesses to the raw `IndexStoreDB` are explicit (by
/// calling `underlyingIndexStoreDB`) and we don't accidentally call into the `IndexStoreDB` when we wanted a
/// `CheckedIndex`.
package struct UncheckedIndex: Sendable {
  package let underlyingIndexStoreDB: IndexStoreDB

  package init?(_ index: IndexStoreDB?) {
    guard let index else {
      return nil
    }
    self.underlyingIndexStoreDB = index
  }

  package init(_ index: IndexStoreDB) {
    self.underlyingIndexStoreDB = index
  }

  package func checked(for checkLevel: IndexCheckLevel) -> CheckedIndex {
    return CheckedIndex(index: underlyingIndexStoreDB, checkLevel: checkLevel)
  }

  /// Wait for IndexStoreDB to be updated based on new unit files written to disk.
  package func pollForUnitChangesAndWait() {
    self.underlyingIndexStoreDB.pollForUnitChangesAndWait()
  }
}

/// Helper class to check if symbols from the index are up-to-date or if the source file has been modified after it was
/// indexed. Modifications include both changes to the file on disk as well as modifications to the file that have not
/// been saved to disk (ie. changes that only live in `DocumentManager`).
///
/// The checker caches mod dates of source files. It should thus not be long lived. Its intended lifespan is the
/// evaluation of a single request.
private struct IndexOutOfDateChecker {
  private let checkLevel: IndexCheckLevel

  /// The last modification time of a file. Can also represent the fact that the file does not exist.
  private enum ModificationTime {
    case fileDoesNotExist
    case date(Date)
  }

  private enum Error: Swift.Error, CustomStringConvertible {
    case fileAttributesDontHaveModificationDate

    var description: String {
      switch self {
      case .fileAttributesDontHaveModificationDate:
        return "File attributes don't contain a modification date"
      }
    }
  }

  /// Caches whether a document has modifications in `documentManager` that haven't been saved to disk yet.
  private var fileHasInMemoryModificationsCache: [DocumentURI: Bool] = [:]

  /// Document URIs to modification times that have already been computed.
  private var modTimeCache: [DocumentURI: ModificationTime] = [:]

  /// Document URIs to whether they exist on the file system
  private var fileExistsCache: [DocumentURI: Bool] = [:]

  init(checkLevel: IndexCheckLevel) {
    self.checkLevel = checkLevel
  }

  // MARK: - Public interface

  /// Returns `true` if the source file for the given symbol location exists and has not been modified after it has been
  /// indexed.
  mutating func isUpToDate(_ symbolLocation: SymbolLocation) -> Bool {
    let uri = DocumentURI(filePath: symbolLocation.path, isDirectory: false)
    switch checkLevel {
    case .inMemoryModifiedFiles(let documentManager):
      if fileHasInMemoryModifications(uri, documentManager: documentManager) {
        return false
      }
      fallthrough
    case .modifiedFiles:
      do {
        let sourceFileModificationDate = try modificationDate(of: uri)
        switch sourceFileModificationDate {
        case .fileDoesNotExist:
          return false
        case .date(let sourceFileModificationDate):
          return sourceFileModificationDate <= symbolLocation.timestamp
        }
      } catch {
        logger.fault("Unable to determine if SymbolLocation is up-to-date: \(error.forLogging)")
        return true
      }
    case .deletedFiles:
      return fileExists(at: uri)
    }
  }

  /// Return `true` if a unit file has been indexed for the given file path after its last modification date.
  ///
  /// This means that at least a single build configuration of this file has been indexed since its last modification.
  ///
  /// If `mainFile` is passed, then `filePath` is a header file that won't have a unit associated with it. `mainFile` is
  /// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit
  /// for `mainFile` is newer than the mtime of the header file at `url`.
  mutating func indexHasUpToDateUnit(for filePath: DocumentURI, mainFile: DocumentURI?, index: IndexStoreDB) -> Bool {
    switch checkLevel {
    case .inMemoryModifiedFiles(let documentManager):
      if fileHasInMemoryModifications(filePath, documentManager: documentManager) {
        // If there are in-memory modifications to the file, we can't have an up-to-date unit since we only index files
        // on disk.
        return false
      }
      // If there are no in-memory modifications check if there are on-disk modifications.
      fallthrough
    case .modifiedFiles:
      guard
        let filePathStr = orLog("Realpath for up-to-date", { try (mainFile ?? filePath).fileURL?.realpath.filePath }),
        let lastUnitDate = index.dateOfLatestUnitFor(filePath: filePathStr)
      else {
        return false
      }
      do {
        let sourceModificationDate = try modificationDate(of: filePath)
        switch sourceModificationDate {
        case .fileDoesNotExist:
          return false
        case .date(let sourceModificationDate):
          return sourceModificationDate <= lastUnitDate
        }
      } catch {
        logger.fault("Unable to determine if source file has up-to-date unit: \(error.forLogging)")
        return true
      }
    case .deletedFiles:
      // If we are asked if the index has an up-to-date unit for a source file, we can reasonably assume that this
      // source file exists (otherwise, why are we doing the query at all). Thus, there's nothing to check here.
      return true
    }
  }

  // MARK: - Cached check primitives

  /// `documentManager` must always be the same between calls to `hasFileInMemoryModifications` since it is not part of
  /// the cache key. This is fine because we always assume the `documentManager` to come from the associated value of
  /// `CheckLevel.imMemoryModifiedFiles`, which is constant.
  private mutating func fileHasInMemoryModifications(
    _ uri: DocumentURI,
    documentManager: InMemoryDocumentManager
  ) -> Bool {
    if let cached = fileHasInMemoryModificationsCache[uri] {
      return cached
    }
    let hasInMemoryModifications = documentManager.fileHasInMemoryModifications(uri)
    fileHasInMemoryModificationsCache[uri] = hasInMemoryModifications
    return hasInMemoryModifications
  }

  /// Returns true if the file at the given URI has a different content in the document manager than on-disk. This is
  /// the case if the user made edits to the file but didn't save them yet.
  ///
  /// - Important: This must only be called on an `IndexOutOfDateChecker` with a `checkLevel` of `inMemoryModifiedFiles`
  mutating func fileHasInMemoryModifications(_ uri: DocumentURI) -> Bool {
    switch checkLevel {
    case .inMemoryModifiedFiles(let documentManager):
      return fileHasInMemoryModifications(uri, documentManager: documentManager)
    case .modifiedFiles, .deletedFiles:
      logger.fault(
        "fileHasInMemoryModifications(at:) must only be called on an `IndexOutOfDateChecker` with check level .inMemoryModifiedFiles"
      )
      return false
    }
  }

  private static func modificationDate(atPath path: String) throws -> Date {
    let attributes = try FileManager.default.attributesOfItem(atPath: path)
    guard let modificationDate = attributes[FileAttributeKey.modificationDate] as? Date else {
      throw Error.fileAttributesDontHaveModificationDate
    }
    return modificationDate
  }

  private func modificationDateUncached(of uri: DocumentURI) throws -> ModificationTime {
    do {
      guard var fileURL = uri.fileURL else {
        return .fileDoesNotExist
      }
      var modificationDate = try Self.modificationDate(atPath: fileURL.filePath)

      // Get the maximum mtime in the symlink chain as the modification date of the URI. That way if either the symlink
      // is changed to point to a different file or if the underlying file is modified, the modification time is
      // updated.
      while let relativeSymlinkDestination = try? FileManager.default.destinationOfSymbolicLink(
        atPath: fileURL.filePath
      ),
        let symlinkDestination = URL(string: relativeSymlinkDestination, relativeTo: fileURL)
      {
        fileURL = symlinkDestination
        modificationDate = max(modificationDate, try Self.modificationDate(atPath: fileURL.filePath))
      }

      return .date(modificationDate)
    } catch let error as NSError where error.domain == NSCocoaErrorDomain && error.code == NSFileReadNoSuchFileError {
      return .fileDoesNotExist
    }
  }

  private mutating func modificationDate(of uri: DocumentURI) throws -> ModificationTime {
    if let cached = modTimeCache[uri] {
      return cached
    }
    let modTime = try modificationDateUncached(of: uri)
    modTimeCache[uri] = modTime
    return modTime
  }

  private mutating func fileExists(at uri: DocumentURI) -> Bool {
    if let cached = fileExistsCache[uri] {
      return cached
    }
    let fileExists =
      if let fileUrl = uri.fileURL {
        FileManager.default.fileExists(at: fileUrl)
      } else {
        false
      }
    fileExistsCache[uri] = fileExists
    return fileExists
  }
}