File: name_mangler.go

package info (click to toggle)
golang-github-go-openapi-swag 1%3A0.25.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,200 kB
  • sloc: sh: 30; makefile: 3
file content (381 lines) | stat: -rw-r--r-- 11,291 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
// Copyright 2015 go-swagger maintainers
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package mangling

import (
	"strings"
	"unicode"
)

// NameMangler knows how to transform sentences or words into
// identifiers that are a better fit in contexts such as:
//
//   - unexported or exported go variable identifiers
//   - file names
//   - camel cased identifiers
//   - ...
//
// The [NameMangler] is safe for concurrent use, save for its [NameMangler.AddInitialisms] method,
// which is not.
//
// # Known limitations
//
// At this moment, the [NameMangler] doesn't play well with "all caps" text:
//
// unless every single upper-cased word is declared as an initialism, capitalized words would generally
// not be transformed with the expected result, e.g.
//
//	ToFileName("THIS_IS_ALL_CAPS")
//
// yields the weird outcome
//
//	"t_h_i_s_i_s_a_l_l_c_a_p_s"
type NameMangler struct {
	options

	index *indexOfInitialisms

	splitter              splitter
	splitterWithPostSplit splitter

	_ struct{}
}

// NewNameMangler builds a name mangler ready to convert strings.
//
// The default name mangler is configured with default common initialisms and all default options.
func NewNameMangler(opts ...Option) NameMangler {
	m := NameMangler{
		options: optionsWithDefaults(opts),
		index:   newIndexOfInitialisms(),
	}
	m.addInitialisms(m.commonInitialisms...)

	// a splitter that returns matches lexemes as ready-to-assemble strings:
	// details of the lexemes are redeemed.
	m.splitter = newSplitter(
		withInitialismsCache(&m.index.initialismsCache),
		withReplaceFunc(m.replaceFunc),
	)

	// a splitter that returns matches lexemes ready for post-processing
	m.splitterWithPostSplit = newSplitter(
		withInitialismsCache(&m.index.initialismsCache),
		withReplaceFunc(m.replaceFunc),
		withPostSplitInitialismCheck,
	)

	return m
}

// AddInitialisms declares extra initialisms to the mangler.
//
// It declares extra words as "initialisms" (i.e. words that won't be camel cased or titled cased),
// on top of the existing list of common initialisms (such as ID, HTTP...).
//
// Added words must start with a (unicode) letter. If some don't, they are ignored.
// Added words are either fully capitalized or mixed-cased. Lower-case only words are considered capitalized.
//
// It is typically used just after initializing the [NameMangler].
//
// When all initialisms are known at the time the mangler is initialized, it is preferable to
// use [NewNameMangler] with the option [WithAdditionalInitialisms].
//
// Adding initialisms mutates the mangler and should not be carried out concurrently with other calls to the mangler.
func (m *NameMangler) AddInitialisms(words ...string) {
	m.addInitialisms(words...)
}

// Initialisms renders the list of initialisms supported by this mangler.
func (m *NameMangler) Initialisms() []string {
	return m.index.initialisms
}

// Camelize a single word.
//
// Example:
//
//   - "HELLO" and "hello" become "Hello".
func (m NameMangler) Camelize(word string) string {
	ru := []rune(word)

	switch len(ru) {
	case 0:
		return ""
	case 1:
		return string(unicode.ToUpper(ru[0]))
	default:
		camelized := poolOfBuffers.BorrowBuffer(len(word))
		camelized.Grow(len(word))
		defer func() {
			poolOfBuffers.RedeemBuffer(camelized)
		}()

		camelized.WriteRune(unicode.ToUpper(ru[0]))
		for _, ru := range ru[1:] {
			camelized.WriteRune(unicode.ToLower(ru))
		}

		return camelized.String()
	}
}

// ToFileName generates a suitable snake-case file name from a sentence.
//
// It lower-cases everything with underscore (_) as a word separator.
//
// Examples:
//
//   - "Hello, Swagger" becomes "hello_swagger"
//   - "HelloSwagger" becomes "hello_swagger"
func (m NameMangler) ToFileName(name string) string {
	inptr := m.split(name)
	in := *inptr
	out := make([]string, 0, len(in))

	for _, w := range in {
		out = append(out, lower(w))
	}
	poolOfStrings.RedeemStrings(inptr)

	return strings.Join(out, "_")
}

// ToCommandName generates a suitable CLI command name from a sentence.
//
// It lower-cases everything with dash (-) as a word separator.
//
// Examples:
//
//   - "Hello, Swagger" becomes "hello-swagger"
//   - "HelloSwagger" becomes "hello-swagger"
func (m NameMangler) ToCommandName(name string) string {
	inptr := m.split(name)
	in := *inptr
	out := make([]string, 0, len(in))

	for _, w := range in {
		out = append(out, lower(w))
	}
	poolOfStrings.RedeemStrings(inptr)

	return strings.Join(out, "-")
}

// ToHumanNameLower represents a code name as a human-readable series of words.
//
// It lower-cases everything with blank space as a word separator.
//
// NOTE: parts recognized as initialisms just keep their original casing.
//
// Examples:
//
//   - "Hello, Swagger" becomes "hello swagger"
//   - "HelloSwagger" or "Hello-Swagger" become "hello swagger"
func (m NameMangler) ToHumanNameLower(name string) string {
	s := m.splitterWithPostSplit
	in := s.split(name)
	out := make([]string, 0, len(*in))

	for _, w := range *in {
		if !w.IsInitialism() {
			out = append(out, lower(w.GetOriginal()))
		} else {
			out = append(out, trim(w.GetOriginal()))
		}
	}

	poolOfLexems.RedeemLexems(in)

	return strings.Join(out, " ")
}

// ToHumanNameTitle represents a code name as a human-readable series of titleized words.
//
// It titleizes every word with blank space as a word separator.
//
// Examples:
//
//   - "hello, Swagger" becomes "Hello Swagger"
//   - "helloSwagger" becomes "Hello Swagger"
func (m NameMangler) ToHumanNameTitle(name string) string {
	s := m.splitterWithPostSplit
	in := s.split(name)

	out := make([]string, 0, len(*in))
	for _, w := range *in {
		original := trim(w.GetOriginal())
		if !w.IsInitialism() {
			out = append(out, m.Camelize(original))
		} else {
			out = append(out, original)
		}
	}
	poolOfLexems.RedeemLexems(in)

	return strings.Join(out, " ")
}

// ToJSONName generates a camelized single-word version of a sentence.
//
// The output assembles every camelized word, but for the first word, which
// is lower-cased.
//
// Example:
//
//   - "Hello_swagger" becomes "helloSwagger"
func (m NameMangler) ToJSONName(name string) string {
	inptr := m.split(name)
	in := *inptr
	out := make([]string, 0, len(in))

	for i, w := range in {
		if i == 0 {
			out = append(out, lower(w))
			continue
		}
		out = append(out, m.Camelize(trim(w)))
	}

	poolOfStrings.RedeemStrings(inptr)

	return strings.Join(out, "")
}

// ToVarName generates a legit unexported go variable name from a sentence.
//
// The generated name plays well with linters (see also [NameMangler.ToGoName]).
//
// Examples:
//
//   - "Hello_swagger" becomes "helloSwagger"
//   - "Http_server" becomes "httpServer"
//
// This name applies the same rules as [NameMangler.ToGoName] (legit exported variable), save the
// capitalization of the initial rune.
//
// Special case: when the initial part is a recognized as an initialism (like in the example above),
// the full part is lower-cased.
func (m NameMangler) ToVarName(name string) string {
	return m.goIdentifier(name, false)
}

// ToGoName generates a legit exported go variable name from a sentence.
//
// The generated name plays well with most linters.
//
// ToGoName abides by the go "exported" symbol rule starting with an upper-case letter.
//
// Examples:
//
//   - "hello_swagger" becomes "HelloSwagger"
//   - "Http_server" becomes "HTTPServer"
//
// # Edge cases
//
// Whenever the first rune is not eligible to upper case, a special prefix is prepended to the resulting name.
// By default this is simply "X" and you may customize this behavior using the [WithGoNamePrefixFunc] option.
//
// This happens when the first rune is not a letter, e.g. a digit, or a symbol that has no word transliteration
// (see also [WithReplaceFunc] about symbol transliterations),
// as well as for most East Asian or Devanagari runes, for which there is no such concept as upper-case.
//
// # Linting
//
// [revive], the successor of golint is the reference linter.
//
// This means that [NameMangler.ToGoName] supports the initialisms that revive checks (see also [DefaultInitialisms]).
//
// At this moment, there is no attempt to transliterate unicode into ascii, meaning that some linters
// (e.g. asciicheck, gosmopolitan) may croak on go identifiers generated from unicode input.
//
// [revive]: https://github.com/mgechev/revive
func (m NameMangler) ToGoName(name string) string {
	return m.goIdentifier(name, true)
}

func (m NameMangler) goIdentifier(name string, exported bool) string {
	s := m.splitterWithPostSplit
	lexems := s.split(name)
	defer func() {
		poolOfLexems.RedeemLexems(lexems)
	}()
	lexemes := *lexems

	if len(lexemes) == 0 {
		return ""
	}

	result := poolOfBuffers.BorrowBuffer(len(name))
	defer func() {
		poolOfBuffers.RedeemBuffer(result)
	}()

	firstPart := lexemes[0]
	if !exported {
		if ok := firstPart.WriteLower(result, true); !ok {
			// NOTE: an initialism as the first part is lower-cased: no longer generates stuff like hTTPxyz.
			//
			// same prefixing rule applied to unexported variable as to an exported one, so that we have consistent
			// names, whether the generated identifier is exported or not.
			result.WriteString(strings.ToLower(m.prefixFunc()(name)))
			result.WriteString(lexemes[0].GetOriginal())
		}
	} else {
		if ok := firstPart.WriteTitleized(result, true); !ok {
			// "repairs" a lexeme that doesn't start with a letter to become
			// the start a legit go name. The current strategy is very crude and simply adds a fixed prefix,
			// e.g. "X".
			// For instance "1_sesame_street" would be split into lexemes ["1", "sesame", "street"] and
			// the first one ("1") would result in something like "X1" (with the default prefix function).
			//
			// NOTE: no longer forcing the first part to be fully upper-cased
			result.WriteString(m.prefixFunc()(name))
			result.WriteString(lexemes[0].GetOriginal())
		}
	}

	for _, lexem := range lexemes[1:] {
		// NOTE: no longer forcing initialism parts to be fully upper-cased:
		// * pluralized initialism preserve their trailing "s"
		// * mixed-cased initialisms, such as IPv4, are preserved
		if ok := lexem.WriteTitleized(result, false); !ok {
			// it's not titleized: perhaps it's too short, perhaps the first rune is not a letter.
			// write anyway
			result.WriteString(lexem.GetOriginal())
		}
	}

	return result.String()
}

func (m *NameMangler) addInitialisms(words ...string) {
	m.index.add(words...)
	m.index.buildCache()
}

// split calls the inner splitter.
func (m NameMangler) split(str string) *[]string {
	s := m.splitter
	lexems := s.split(str)
	result := poolOfStrings.BorrowStrings()

	for _, lexem := range *lexems {
		*result = append(*result, lexem.GetOriginal())
	}
	poolOfLexems.RedeemLexems(lexems)

	return result
}