File: consecutive_newlines.go

package info (click to toggle)
golang-github-johanneskaufmann-html-to-markdown 2.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,084 kB
  • sloc: makefile: 3
file content (139 lines) | stat: -rw-r--r-- 3,299 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
package textutils

import (
	"bytes"
	"unicode/utf8"
)

func TrimUnnecessaryHardLineBreaks(content []byte) []byte {
	content = bytes.ReplaceAll(content, []byte("  \n\n"), []byte("\n\n"))
	content = bytes.ReplaceAll(content, []byte("  \n  \n"), []byte("\n\n"))
	content = bytes.ReplaceAll(content, []byte("  \n \n"), []byte("\n\n"))
	// out = bytes.ReplaceAll(out, []byte("\n  \n"), []byte("\n\n"))

	return content
}

func TrimConsecutiveNewlines(input []byte) []byte {
	var result []byte
	newlineCount := 0
	spaceBuffer := []byte{}

	for i := 0; i < len(input); {
		r, size := utf8.DecodeRune(input[i:])

		if r == '\n' {
			newlineCount++
			if newlineCount <= 2 {
				// Preserve up to 2 newlines, including preceding spaces
				result = append(result, spaceBuffer...)
				result = append(result, '\n')
				spaceBuffer = spaceBuffer[:0] // Clear space buffer
			} else {
				// Skip additional newlines
				spaceBuffer = spaceBuffer[:0] // Clear space buffer
			}
		} else if r == ' ' {
			// Collect spaces into the space buffer
			spaceBuffer = append(spaceBuffer, input[i:i+size]...)
		} else {
			// Reset newline count and append non-newline characters
			newlineCount = 0
			result = append(result, spaceBuffer...)
			result = append(result, input[i:i+size]...)
			spaceBuffer = spaceBuffer[:0] // Clear space buffer
		}

		i += size
	}

	// Append any trailing spaces
	result = append(result, spaceBuffer...)

	return result
}

/*
func TrimConsecutiveNewlines(source []byte) []byte {
	// Some performance optimizations:
	// - If no replacement was done, we return the original slice and don't allocate.
	// - We batch appends

	var ret []byte

	startNormal := 0
	startMatch := -1

	count := 0
	// for i, b := range source {
	for i := 0; i < len(source); i++ {
		r, size := utf8.DecodeRune(source[i:])
		_ = size

		isNewline := r == '\n' // || r == marker.MarkerLineBreak
		if isNewline {
			count += 1
		}

		if startMatch == -1 && isNewline {
			// Start of newlines
			startMatch = i
			i = i + size - 1
			continue
		} else if startMatch != -1 && isNewline {
			// Middle of newlines
			i = i + size - 1
			continue
		} else if startMatch != -1 {
			// Character after the last newline character

			if count > 2 {
				if ret == nil {
					ret = make([]byte, 0, len(source))
				}

				ret = append(ret, source[startNormal:startMatch]...)
				ret = append(ret, '\n', '\n')
				startNormal = i
			}

			startMatch = -1
			count = 0
		}
	}

	getStartEnd := func() (int, int, bool, bool) {
		if startMatch == -1 && startNormal == 0 {
			// a) no changes need to be done
			return -1, -1, false, false
		}

		if count <= 2 {
			// b) Only the normal characters still need to be added
			return startNormal, len(source), true, false
		}

		// c) The match still needs to be replaced (and possible the previous normal characters be added)
		return startNormal, startMatch, true, true
	}

	start, end, isKeepNeeded, isReplaceNeeded := getStartEnd()
	if isKeepNeeded {
		if ret == nil {
			ret = make([]byte, 0, len(source))
		}

		ret = append(ret, source[start:end]...)
		if isReplaceNeeded {
			ret = append(ret, '\n', '\n')
		}
	}

	if ret == nil {
		// Hurray, we did not do any allocations with make()
		// and instead just return the original slice.
		return source
	}
	return ret
}
*/