File: WordCount.swift

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (277 lines) | stat: -rw-r--r-- 9,241 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
//===--- WordCount.swift --------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

import TestsUtils

//
// Mini benchmark counting words in a longer string.
// Measures performance of
//  - Iterating over the Characters in a String
//  - Extracting short substrings as Strings
//  - Set<Character> lookup performance
//  - Set<String> init from sequence of short Strings, with duplicates
//  - Uniquing initializer for Dictionary with short string keys
//

public let benchmarks = [
  BenchmarkInfo(
    name: "WordSplitASCII",
    runFunction: run_WordSplitASCII,
    tags: [.validation, .api, .String, .algorithm, .unstable],
    setUpFunction: { buildWorkload() },
    legacyFactor: 10
  ),
  BenchmarkInfo(
    name: "WordSplitUTF16",
    runFunction: run_WordSplitUTF16,
    tags: [.validation, .api, .String, .algorithm, .unstable],
    setUpFunction: { buildWorkload() },
    legacyFactor: 10
  ),
  BenchmarkInfo(
    name: "WordCountUniqueASCII",
    runFunction: run_WordCountUniqueASCII,
    tags: [.validation, .api, .String, .Dictionary, .algorithm],
    setUpFunction: { buildWorkload() },
    legacyFactor: 10
  ),
  BenchmarkInfo(
    name: "WordCountUniqueUTF16",
    runFunction: run_WordCountUniqueUTF16,
    tags: [.validation, .api, .String, .Dictionary, .algorithm],
    setUpFunction: { buildWorkload() },
    legacyFactor: 10
  ),
  BenchmarkInfo(
    name: "WordCountHistogramASCII",
    runFunction: run_WordCountHistogramASCII,
    tags: [.validation, .api, .String, .Dictionary, .algorithm],
    setUpFunction: { buildWorkload() },
    legacyFactor: 100
  ),
  BenchmarkInfo(
    name: "WordCountHistogramUTF16",
    runFunction: run_WordCountHistogramUTF16,
    tags: [.validation, .api, .String, .Dictionary, .algorithm],
    setUpFunction: { buildWorkload() },
    legacyFactor: 100
  ),
]

let asciiText = """
**Welcome to Swift!**

Swift is a high-performance system programming language.  It has a clean and
modern syntax, offers seamless access to existing C and Objective-C code and
frameworks, and is memory safe by default.

Although inspired by Objective-C and many other languages, Swift is not itself a
C-derived language. As a complete and independent language, Swift packages core
features like flow control, data structures, and functions, with high-level
constructs like objects, protocols, closures, and generics. Swift embraces
modules, eliminating the need for headers and the code duplication they entail.

To learn more about the programming language, visit swift.org.

## Contributing to Swift

Contributions to Swift are welcomed and encouraged! Please see the
Contributing to Swift guide.

To be a truly great community, Swift.org needs to welcome developers from all
walks of life, with different backgrounds, and with a wide range of
experience. A diverse and friendly community will have more great ideas, more
unique perspectives, and produce more great code. We will work diligently to
make the Swift community welcoming to everyone.

To give clarity of what is expected of our members, Swift has adopted the code
of conduct defined by the Contributor Covenant. This document is used across
many open source communities, and we think it articulates our values well. For
more, see the Code of Conduct.

## Getting Started

These instructions give the most direct path to a working Swift development
environment. To build from source you will need 2 GB of disk space for the
source code and over 20 GB of disk space for the build artifacts. A clean build
can take multiple hours, but incremental builds will finish much faster.
"""

let utf16Text = """
✨🌟 Welcome tö Swift! ⭐️✨

Swift is a high-performance system programming language.  It has a clean and
modern syntax, offers seamless access tö existing C and Objective-C code and
frameworks, and is memory safe by default.

Although inspired by Objective-C and many othér languages, Swift is not itself a
C-derived language. As a complete and independent language, Swift packages core
features li\u{30A}ke flow control, data structures, and functions, with
high-level constructs li\u{30A}ke objects, protöcols, closures, and
generics. Swift embraces modules, eliminating thé need for headers and thé code
duplication théy entail.

Tö learn more about thé programming language, visit swift.org.

☞ Contributing tö Swift

Contributions tö Swift are welcomed and encouraged! Please see thé
Contributing tö Swift guide.

Tö be a truly great community, Swift.org needs tö welcome developers from all
walks of life, with different backgrounds, and with a wide range of
experience. A diverse and friendly community will have more great ideas, more
unique perspectives, and produce more great code. We will work diligently tö
make thé Swift community welcoming tö everyone.

Tö give clarity of what is expected of our members, Swift has adopted thé code
of conduct defined by thé Contributör Covenant. This document is used across
many open source communities, and we think it articulates our values well. For
more, see thé Code of Conduct.

☞ Getting Started

Thése instructions give thé most direct path tö a working Swift development
environment. Tö build from source you will need 2 GB of disk space for thé
source code and over 20 GB of disk space for thé build artifacts. A clean build
can take multiple hours, but incremental builds will finish much faster.
"""

@inline(never)
func buildWorkload() {
  blackHole(someAlphanumerics)
  blackHole(asciiWords)
  blackHole(utf16Words)
}

// A partial set of Unicode alphanumeric characters. (ASCII letters with at most
// one diacritic (of a limited selection), plus ASCII digits.)
let someAlphanumerics: Set<Character> = {
  let baseAlphabet = Set(
    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".unicodeScalars)
  let someCombiningDiacriticalMarks: Set<Unicode.Scalar> =
    Set((0x300..<0x310).map { Unicode.Scalar($0)! })

  var alphanumerics: Set<Character> = []
  for base in baseAlphabet {
    alphanumerics.insert(Character(base))
    for mark in someCombiningDiacriticalMarks {
      var v = String.UnicodeScalarView()
      v.append(base)
      v.append(mark)
      alphanumerics.insert(Character(String(v)))
    }
  }
  alphanumerics.formUnion("0123456789")
  return alphanumerics
}()

extension Character {
  var isAlphanumeric: Bool {
    return someAlphanumerics.contains(self)
  }
}

struct Words: IteratorProtocol, Sequence {
  public typealias Iterator = Words

  let text: String
  var nextIndex: String.Index

  init(_ text: String) {
    self.text = text
    self.nextIndex = text.startIndex
  }

  mutating func next() -> String? {
    while nextIndex != text.endIndex && !text[nextIndex].isAlphanumeric {
      text.formIndex(after: &nextIndex)
    }
    let start = nextIndex
    while nextIndex != text.endIndex && text[nextIndex].isAlphanumeric {
      text.formIndex(after: &nextIndex)
    }
    guard start < nextIndex else { return nil }
    return String(text[start..<nextIndex])
  }
}

@inline(never)
public func run_WordSplitASCII(_ n: Int) {
  for _ in 1...n {
    let words = Array(Words(identity(asciiText)))
    check(words.count == 280)
    blackHole(words)
  }
}

@inline(never)
public func run_WordSplitUTF16(_ n: Int) {
  for _ in 1...n {
    let words = Array(Words(identity(utf16Text)))
    check(words.count == 280)
    blackHole(words)
  }
}

let asciiWords = Array(Words(asciiText))
let utf16Words = Array(Words(utf16Text))

@inline(never)
public func run_WordCountUniqueASCII(_ n: Int) {
  for _ in 1...10*n {
    let words = Set(identity(asciiWords))
    check(words.count == 168)
    blackHole(words)
  }
}

@inline(never)
public func run_WordCountUniqueUTF16(_ n: Int) {
  for _ in 1...10*n {
    let words = Set(identity(utf16Words))
    check(words.count == 168)
    blackHole(words)
  }
}

/// Returns an array of all words in the supplied string, along with their
/// number of occurrences. The array is sorted by decreasing frequency.
/// (Words are case-sensitive and only support a limited subset of Unicode.)
@inline(never)
func histogram<S: Sequence>(for words: S) -> [(String, Int)]
where S.Element == String {
  let histogram = Dictionary<String, Int>(
    words.lazy.map { ($0, 1) },
    uniquingKeysWith: +)
  return histogram.sorted { (-$0.1, $0.0) < (-$1.1, $1.0) }
}

@inline(never)
public func run_WordCountHistogramASCII(_ n: Int) {
  for _ in 1...n {
    let words = histogram(for: identity(asciiWords))
    check(words.count == 168)
    check(words[0] == ("and", 15))
    blackHole(words)
  }
}

@inline(never)
public func run_WordCountHistogramUTF16(_ n: Int) {
  for _ in 1...n {
    let words = histogram(for: identity(utf16Words))
    check(words.count == 168)
    check(words[0] == ("and", 15))
    blackHole(words)
  }
}