File: whitespace.go

package info (click to toggle)
golang-github-johanneskaufmann-html-to-markdown 2.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,084 kB
  • sloc: makefile: 3
file content (79 lines) | stat: -rw-r--r-- 2,138 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
package collapse

import (
	"unsafe"
)

func byteSliceToString(b []byte) string {
	/* #nosec G103 */
	return *(*string)(unsafe.Pointer(&b))
}

func replaceAnyWhitespaceWithSpace(source string) string {
	if source == "" {
		return source
	}

	// Some performance optimizations:
	// - If no replacement was done, we return the original slice and dont allocate.
	// - We batch appends
	var ret []byte
	makeIfNeeded := func() {
		if ret == nil {
			ret = make([]byte, 0, len(source))
		}
	}

	startNormal := 0
	startMatch := -1
	for i := 0; i < len(source); i++ {
		isWhitespace := source[i] == ' ' || source[i] == '\r' || source[i] == '\n' || source[i] == '\t'

		if startMatch == -1 && isWhitespace {
			// Start of newlines
			startMatch = i
			continue
		} else if startMatch != -1 && isWhitespace {
			// Middle of newlines
			continue
		} else if startMatch != -1 {
			// Character after the last newline character

			count := i - startMatch
			if count == 1 && source[startMatch] == ' ' {
				// There was only one `isWhitespace` match & that is a space.
				// So the replacement would be exactly the same...
			} else {
				makeIfNeeded()
				ret = append(ret, source[startNormal:startMatch]...)
				ret = append(ret, byte(' '))
				startNormal = i
			}

			startMatch = -1
		}
	}

	if startMatch == -1 && startNormal == 0 {
		// a) no changes need to be done
	} else if startMatch == -1 {
		// b) Only the normal characters until the end still need to be added
		makeIfNeeded()
		ret = append(ret, source[startNormal:]...)
	} else if ret == nil && len(source)-startMatch == 1 && source[startMatch] == ' ' {
		// c) There is a match, but it is exactly the same as the replacement
		//    If there is no new slice, we can skip the replacement.
	} else {
		// d) The match still needs to be replaced (and possible the previous normal characters be added)
		makeIfNeeded()
		ret = append(ret, source[startNormal:startMatch]...)
		ret = append(ret, byte(' '))
	}

	if ret == nil {
		// Huray, we did not do any allocations with make()
		// and instead just return the original slice.
		return source
	}
	return byteSliceToString(ret)
}