File: escape.go

package info (click to toggle)
golang-github-johanneskaufmann-html-to-markdown 2.3.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,080 kB
  • sloc: makefile: 3
file content (89 lines) | stat: -rw-r--r-- 1,917 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
package converter

import (
	"unicode/utf8"

	"github.com/JohannesKaufmann/html-to-markdown/v2/marker"
)

const (
	actionKeep   = iota
	actionEscape = iota
)

// IMPORTANT: Only internally we assume it is only byte
var placeholderByte byte = marker.BytesMarkerEscaping[0]

func (conv *Converter) escapeContent(chars []byte) []byte {
	if conv.escapeMode == EscapeModeDisabled {
		return chars
	}

	newChars := make([]byte, 0, len(chars))
	for index := 0; index < len(chars); index++ {
		if chars[index] == '\u0000' {
			// For security reasons, the Unicode character U+0000 must be replaced with the REPLACEMENT CHARACTER (U+FFFD).
			newChars = append(newChars, []byte(string('\ufffd'))...)
			continue
		}

		r, _ := utf8.DecodeRune(chars[index:])

		isMarkdownChar := conv.checkIsEscapedChar(r)
		if isMarkdownChar {
			newChars = append(newChars, placeholderByte, chars[index])
		} else {
			newChars = append(newChars, chars[index])
		}
	}

	return newChars
}

func (conv *Converter) unEscapeContent(chars []byte) []byte {
	if conv.escapeMode == EscapeModeDisabled {
		return chars
	}

	checkElements := func(index int) int {
		for _, handler := range conv.getUnEscapeHandlers() {
			if skip := handler.Value(chars, index); skip != -1 {
				return skip
			}
		}

		return -1
	}

	changes := make([]uint8, len(chars))
	for index := 0; index < len(chars); index++ {

		if chars[index] != placeholderByte {
			continue
		}
		if index+1 >= len(chars) {
			break
		}

		skip := checkElements(index + 1)
		if skip == -1 {
			continue
		}
		changes[index] = actionEscape
		index += skip - 1
	}

	newChars := make([]byte, 0, len(chars))
	for index, char := range chars {
		if char != placeholderByte {
			newChars = append(newChars, char)
			continue
		}

		// What to do with this placeholder? Should we escape or not?
		if changes[index] == actionEscape {
			newChars = append(newChars, '\\')
		}
	}
	return newChars
}