1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2024 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
import Foundation
/// A `PopularityIndex` is constructed from symbol reference frequencies and uses that data to bestow
/// `Popularity` bonuses on completions.
package struct PopularityIndex {
/// The namespace of a symbol.
///
/// Examples
/// * `Swift.Array.append(:)` would be `Scope(container: "Array", module: "Swift")`
/// * `Swift.Array` would be `Scope(container: nil, module: "Swift")`.
///
/// This library imposes no constraints on formatting `container`. It's entirely up to the client to
/// decide how precise to be, and how to spell values. They could use `[String]`, `Array<String>`
/// or `Array`. It only matters that they refer to types consistently. They're also free to model
/// inner types with strings like `List.Node`.
package struct Scope: Hashable {
package var container: String?
package var module: String
package init(container: String?, module: String) {
self.module = module
self.container = container
}
}
/// A name within a scope.
///
/// Examples
/// * `Swift.Array.append(:)` would be:
/// * `Symbol(name: "append(:)", scope: Scope(container: "Array", module: "Swift"))`
/// * `Swift.Array` would be:
/// * `Symbol(name: "Array", scope: Scope(container: nil, module: "Swift"))`
///
/// This library imposes no constraints on formatting `name`. It's entirely up to the client to use
/// consistent values. For example, they could independently track overloads by including types
/// in function names, or they could combine all related methods by tracking only function base
/// names.
package struct Symbol: Hashable {
package var name: String
package var scope: Scope
package init(name: String, scope: Scope) {
self.name = name
self.scope = scope
}
}
package private(set) var symbolPopularity: [Symbol: PopularityScoreComponent] = [:]
package private(set) var modulePopularity: [String: PopularityScoreComponent] = [:]
private var knownScopes = Set<Scope>()
/// Clients can use this to find a relevant `Scope`.
/// To contruct a `Symbol` to pass to `popularity(of:)`.
package func isKnownScope(_ scope: Scope) -> Bool {
return knownScopes.contains(scope)
}
/// - Parameters:
/// - `symbolReferencePercentages`: Symbol reference percentages per scope.
/// For example, if the data that produced the symbol reference percentags had 1 call to `Array.append(:)`,
/// 3 calls to `Array.count`, and 1 call to `String.append(:)` the table would be:
/// ```
/// [
/// "Swift.Array" : [
/// "append(:)" : 0.25,
/// "count" : 0.75
/// ],
/// "Swift.String" : [
/// "append(:)" : 1.0
/// ]
/// ]
/// ```
/// - `notoriousSymbols`: Symbols from this list will get a significant penalty.
/// - `popularModules`: Symbols from these modules will get a slight bonus.
/// - `notoriousModules`: symbols from these modules will get a significant penalty.
package init(
symbolReferencePercentages: [Scope: [String: Double]],
notoriousSymbols: [Symbol],
popularModules: [String],
notoriousModules: [String]
) {
knownScopes = Set(symbolReferencePercentages.keys)
raisePopularities(symbolReferencePercentages: symbolReferencePercentages)
raisePopularities(popularModules: popularModules)
// Even if data shows that it's popular, if we manually penalized it, always do that.
lowerPopularities(notoriousModules: notoriousModules)
lowerPopularities(notoriousSymbols: notoriousSymbols)
}
fileprivate init() {}
private mutating func raisePopularities(symbolReferencePercentages: [Scope: [String: Double]]) {
for (scope, namedReferencePercentages) in symbolReferencePercentages {
if let maxReferencePercentage = namedReferencePercentages.lazy.map(\.value).max() {
for (completion, referencePercentage) in namedReferencePercentages {
let symbol = Symbol(name: completion, scope: scope)
let normalizedScore = referencePercentage / maxReferencePercentage // 0...1
let flattenedScore = pow(normalizedScore, 0.25) // Don't make it so much of a winner takes all
symbolPopularity.raise(
symbol,
toAtLeast: Popularity.scoreComponent(probability: flattenedScore, category: .index)
)
}
}
}
}
private mutating func lowerPopularities(notoriousSymbols: [Symbol]) {
symbolPopularity.lower(notoriousSymbols, toAtMost: Availability.deprecated.scoreComponent)
}
private mutating func lowerPopularities(notoriousModules: [String]) {
modulePopularity.lower(notoriousModules, toAtMost: Availability.deprecated.scoreComponent)
}
private mutating func raisePopularities(popularModules: [String]) {
modulePopularity.raise(popularModules, toAtLeast: Popularity.scoreComponent(probability: 0.0, category: .index))
}
package func popularity(of symbol: Symbol) -> Popularity {
let symbolPopularity = symbolPopularity[symbol] ?? .none
let modulePopularity = modulePopularity[symbol.scope.module] ?? .none
return Popularity(symbolComponent: symbolPopularity.value, moduleComponent: modulePopularity.value)
}
}
fileprivate extension Dictionary where Value == PopularityScoreComponent {
mutating func raise(_ key: Key, toAtLeast minimum: Double) {
let leastPopular = PopularityScoreComponent(value: -Double.infinity)
if self[key, default: leastPopular].value < minimum {
self[key] = PopularityScoreComponent(value: minimum)
}
}
mutating func lower(_ key: Key, toAtMost maximum: Double) {
let mostPopular = PopularityScoreComponent(value: Double.infinity)
if self[key, default: mostPopular].value > maximum {
self[key] = PopularityScoreComponent(value: maximum)
}
}
mutating func raise(_ keys: [Key], toAtLeast minimum: Double) {
for key in keys {
raise(key, toAtLeast: minimum)
}
}
mutating func lower(_ keys: [Key], toAtMost maximum: Double) {
for key in keys {
lower(key, toAtMost: maximum)
}
}
}
/// Implement coding with BinaryCodable without singing up for package conformance
extension PopularityIndex {
package enum SerializationVersion: Int {
case initial
}
private struct SerializableSymbol: Hashable, BinaryCodable {
var symbol: Symbol
init(symbol: Symbol) {
self.symbol = symbol
}
init(_ decoder: inout BinaryDecoder) throws {
let name = try String(&decoder)
let container = try String?(&decoder)
let module = try String(&decoder)
symbol = Symbol(name: name, scope: Scope(container: container, module: module))
}
func encode(_ encoder: inout BinaryEncoder) {
encoder.write(symbol.name)
encoder.write(symbol.scope.container)
encoder.write(symbol.scope.module)
}
}
package func serialize(version: SerializationVersion) -> [UInt8] {
BinaryEncoder.encode(contentVersion: version.rawValue) { encoder in
encoder.write(symbolPopularity.mapKeys(overwritingDuplicates: .affirmative, SerializableSymbol.init))
encoder.write(modulePopularity)
}
}
package static func deserialize(data serialization: [UInt8]) throws -> Self {
try BinaryDecoder.decode(bytes: serialization) { decoder in
switch SerializationVersion(rawValue: decoder.contentVersion) {
case .initial:
var index = Self()
index.symbolPopularity = try [SerializableSymbol: PopularityScoreComponent](&decoder).mapKeys(
overwritingDuplicates: .affirmative,
\.symbol
)
index.modulePopularity = try [String: PopularityScoreComponent](&decoder)
return index
case .none:
throw GenericError("Unknown \(String(describing: self)) serialization format")
}
}
}
}
|