File: title.go

package info (click to toggle)
golang-github-jdkato-prose 1.1.0%2Bgit20171031.e27abfd-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 12,848 kB
  • sloc: python: 115; makefile: 55; sh: 21
file content (108 lines) | stat: -rw-r--r-- 3,423 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package transform

import (
	"regexp"
	"strings"
	"unicode"
	"unicode/utf8"

	"github.com/jdkato/prose/internal/util"
)

// An IgnoreFunc is a TitleConverter callback that decides whether or not the
// the string word should be capitalized. firstOrLast indicates whether or not
// word is the first or last word in the given string.
type IgnoreFunc func(word string, firstOrLast bool) bool

// A TitleConverter converts a string to title case according to its style.
type TitleConverter struct {
	ignore IgnoreFunc
}

var (
	// APStyle states to:
	// 1. Capitalize the principal words, including prepositions and
	//    conjunctions of four or more letters.
	// 2. Capitalize an article – the, a, an – or words of fewer than four
	//    letters if it is the first or last word in a title.
	APStyle IgnoreFunc = optionsAP

	// ChicagoStyle states to lowercase articles (a, an, the), coordinating
	// conjunctions (and, but, or, for, nor), and prepositions, regardless of
	// length, unless they are the first or last word of the title.
	ChicagoStyle IgnoreFunc = optionsChicago
)

// NewTitleConverter returns a new TitleConverter set to enforce the specified
// style.
func NewTitleConverter(style IgnoreFunc) *TitleConverter {
	return &TitleConverter{ignore: style}
}

// Title returns a copy of the string s in title case format.
func (tc *TitleConverter) Title(s string) string {
	idx, pos := 0, 0
	t := sanitizer.Replace(s)
	end := len(t)
	return splitRE.ReplaceAllStringFunc(s, func(m string) string {
		sm := strings.ToLower(m)
		pos = strings.Index(t[idx:], m) + idx
		prev := charAt(t, pos-1)
		ext := utf8.RuneCountInString(m)
		idx = pos + ext
		if tc.ignore(sm, pos == 0 || idx == end) &&
			(prev == ' ' || prev == '-' || prev == '/') &&
			charAt(t, pos-2) != ':' && charAt(t, pos-2) != '-' &&
			(charAt(t, pos+ext) != '-' || charAt(t, pos-1) == '-') {
			return sm
		}
		return toTitle(m, prev)
	})
}

func optionsAP(word string, bounding bool) bool {
	return !bounding && util.StringInSlice(word, smallWords)
}

func optionsChicago(word string, bounding bool) bool {
	return !bounding && (util.StringInSlice(word, smallWords) || util.StringInSlice(word, prepositions))
}

var smallWords = []string{
	"a", "an", "and", "as", "at", "but", "by", "en", "for", "if", "in", "nor",
	"of", "on", "or", "per", "the", "to", "vs", "vs.", "via", "v", "v."}

var prepositions = []string{
	"with", "from", "into", "during", "including", "until", "against", "among",
	"throughout", "despite", "towards", "upon", "concerning", "about", "over",
	"through", "before", "between", "after", "since", "without", "under",
	"within", "along", "following", "across", "beyond", "around", "down",
	"near", "above"}

var splitRE = regexp.MustCompile(`[\p{N}\p{L}]+[^\s-/]*`)

// sanitizer replaces a set of Unicode characters with ASCII equivalents.
var sanitizer = strings.NewReplacer(
	"\u201c", `"`,
	"\u201d", `"`,
	"\u2018", "'",
	"\u2019", "'",
	"\u2013", "-",
	"\u2014", "-",
	"\u2026", "...")

// charAt returns the ith character of s, if it exists. Otherwise, it returns
// the first character.
func charAt(s string, i int) byte {
	if i >= 0 && i < len(s) {
		return s[i]
	}
	return s[0]
}

// toTitle returns a copy of the string m with its first Unicode letter mapped
// to its title case.
func toTitle(m string, prev byte) string {
	r, size := utf8.DecodeRuneInString(m)
	return string(unicode.ToTitle(r)) + m[size:]
}