File: strikethrough.go

package info (click to toggle)
golang-github-johanneskaufmann-html-to-markdown 2.3.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,080 kB
  • sloc: makefile: 3
file content (106 lines) | stat: -rw-r--r-- 2,836 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package strikethrough

import (
	"bytes"
	"unicode"

	"github.com/JohannesKaufmann/dom"
	"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
	"github.com/JohannesKaufmann/html-to-markdown/v2/internal/domutils"
	"github.com/JohannesKaufmann/html-to-markdown/v2/internal/escape"
	"github.com/JohannesKaufmann/html-to-markdown/v2/internal/textutils"
	"golang.org/x/net/html"
)

type option func(p *strikethroughPlugin)

func WithDelimiter(delimiter string) option {
	return func(p *strikethroughPlugin) {
		p.delimiter = delimiter
	}
}

type strikethroughPlugin struct {
	delimiter string
}

// Strikethrough converts `<strike>`, `<s>`, and `<del>` elements
func NewStrikethroughPlugin(opts ...option) converter.Plugin {
	plugin := &strikethroughPlugin{}
	for _, opt := range opts {
		opt(plugin)
	}

	if plugin.delimiter == "" {
		plugin.delimiter = "~~"
	}

	return plugin
}

func (s *strikethroughPlugin) Name() string {
	return "strikethrough"
}
func (s *strikethroughPlugin) Init(conv *converter.Converter) error {
	conv.Register.PreRenderer(s.handlePreRender, converter.PriorityStandard)

	conv.Register.EscapedChar('~')
	conv.Register.UnEscaper(s.handleUnEscapers, converter.PriorityStandard)

	conv.Register.Renderer(s.handleRender, converter.PriorityStandard)

	return nil
}

func (s *strikethroughPlugin) handlePreRender(ctx converter.Context, doc *html.Node) {
	domutils.RemoveRedundant(doc, nameIsBothStrikethough)
	domutils.MergeAdjacent(doc, nameIsStrikethough)
}

func (s *strikethroughPlugin) handleUnEscapers(chars []byte, index int) int {
	if chars[index] != '~' {
		return -1
	}

	next := escape.GetNextAsRune(chars, index)

	nextIsWhitespace := unicode.IsSpace(next) || next == 0
	if nextIsWhitespace {
		// "not followed by Unicode whitespace"
		return -1
	}

	return 1
}

func nameIsStrikethough(node *html.Node) bool {
	name := dom.NodeName(node)

	return name == "del" || name == "s" || name == "strike"
}
func nameIsBothStrikethough(a *html.Node, b *html.Node) bool {
	return nameIsStrikethough(a) && nameIsStrikethough(b)
}

func (s strikethroughPlugin) handleRender(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
	if nameIsStrikethough(n) {
		return s.renderStrikethrough(ctx, w, n)
	}

	return converter.RenderTryNext
}
func (s strikethroughPlugin) renderStrikethrough(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
	var buf bytes.Buffer
	ctx.RenderChildNodes(ctx, &buf, n)

	content := buf.Bytes()

	// If there is a newline character between the start and end delimiter
	// the delimiters won't be recognized. Either we remove all newline characters
	// OR on _every_ line we put start & end delimiters.
	content = textutils.DelimiterForEveryLine(content, []byte(s.delimiter))

	w.Write(content)

	return converter.RenderSuccess
}