1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
|
package strikethrough
import (
"bytes"
"unicode"
"github.com/JohannesKaufmann/dom"
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"github.com/JohannesKaufmann/html-to-markdown/v2/internal/domutils"
"github.com/JohannesKaufmann/html-to-markdown/v2/internal/escape"
"github.com/JohannesKaufmann/html-to-markdown/v2/internal/textutils"
"golang.org/x/net/html"
)
type option func(p *strikethroughPlugin)
func WithDelimiter(delimiter string) option {
return func(p *strikethroughPlugin) {
p.delimiter = delimiter
}
}
type strikethroughPlugin struct {
delimiter string
}
// Strikethrough converts `<strike>`, `<s>`, and `<del>` elements
func NewStrikethroughPlugin(opts ...option) converter.Plugin {
plugin := &strikethroughPlugin{}
for _, opt := range opts {
opt(plugin)
}
if plugin.delimiter == "" {
plugin.delimiter = "~~"
}
return plugin
}
func (s *strikethroughPlugin) Name() string {
return "strikethrough"
}
func (s *strikethroughPlugin) Init(conv *converter.Converter) error {
conv.Register.PreRenderer(s.handlePreRender, converter.PriorityStandard)
conv.Register.EscapedChar('~')
conv.Register.UnEscaper(s.handleUnEscapers, converter.PriorityStandard)
conv.Register.Renderer(s.handleRender, converter.PriorityStandard)
return nil
}
func (s *strikethroughPlugin) handlePreRender(ctx converter.Context, doc *html.Node) {
domutils.RemoveRedundant(doc, nameIsBothStrikethough)
domutils.MergeAdjacent(doc, nameIsStrikethough)
}
func (s *strikethroughPlugin) handleUnEscapers(chars []byte, index int) int {
if chars[index] != '~' {
return -1
}
next := escape.GetNextAsRune(chars, index)
nextIsWhitespace := unicode.IsSpace(next) || next == 0
if nextIsWhitespace {
// "not followed by Unicode whitespace"
return -1
}
return 1
}
func nameIsStrikethough(node *html.Node) bool {
name := dom.NodeName(node)
return name == "del" || name == "s" || name == "strike"
}
func nameIsBothStrikethough(a *html.Node, b *html.Node) bool {
return nameIsStrikethough(a) && nameIsStrikethough(b)
}
func (s strikethroughPlugin) handleRender(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
if nameIsStrikethough(n) {
return s.renderStrikethrough(ctx, w, n)
}
return converter.RenderTryNext
}
func (s strikethroughPlugin) renderStrikethrough(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
var buf bytes.Buffer
ctx.RenderChildNodes(ctx, &buf, n)
content := buf.Bytes()
// If there is a newline character between the start and end delimiter
// the delimiters won't be recognized. Either we remove all newline characters
// OR on _every_ line we put start & end delimiters.
content = textutils.DelimiterForEveryLine(content, []byte(s.delimiter))
w.Write(content)
return converter.RenderSuccess
}
|