1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
|
package commonmark
import (
"bytes"
"strings"
"unicode/utf8"
"github.com/JohannesKaufmann/dom"
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"github.com/JohannesKaufmann/html-to-markdown/v2/internal/textutils"
"github.com/JohannesKaufmann/html-to-markdown/v2/marker"
"golang.org/x/net/html"
)
func (c *commonmark) renderInlineCode(_ converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
// TODO: configure delimeter in options?
fenceChar := '`'
codeContent, _ := getCodeWithoutTags(n)
// TODO: debug flag?
if len(codeContent) == 0 {
// fmt.Println("expected an empty inline code to be already removed")
// panic("expected an empty inline code to be already removed")
}
// TODO: configurable function to decide if inline or block?
if bytes.Contains(codeContent, []byte("\n")) {
// fmt.Println("inline code contains newlines")
// return c.renderBlockCode(ctx, w, n, render)
}
if bytes.TrimSpace(codeContent) == nil {
// No stripping occurs if the code span contains _only_ spaces:
w.WriteRune(fenceChar)
w.Write(codeContent)
w.WriteRune(fenceChar)
return converter.RenderSuccess
}
// Newlines in the text aren't great, since this is inline code and not a code block.
// Newlines will be stripped anyway in the browser, but it won't be recognized as code
// from the markdown parser when there is more than one newline.
codeContent = textutils.CollapseInlineCodeContent(codeContent)
code := string(codeContent)
maxCount := textutils.CalculateCodeFenceOccurrences(fenceChar, code)
maxCount++
fence := strings.Repeat(string(fenceChar), maxCount)
// Code contains a backtick as first character
if strings.HasPrefix(code, "`") {
code = " " + code
}
// Code contains a backtick as last character
if strings.HasSuffix(code, "`") {
code = code + " "
}
w.WriteString(fence)
w.WriteString(code)
w.WriteString(fence)
return converter.RenderSuccess
}
func (c *commonmark) renderBlockCode(_ converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
code, infoString := getCodeWithoutTags(n)
if bytes.HasSuffix(code, []byte("\n")) {
code = code[:len(code)-1]
}
fenceChar, _ := utf8.DecodeRuneInString(c.CodeBlockFence)
fence := textutils.CalculateCodeFence(fenceChar, string(code))
// We want to keep the original content inside the code block untouched.
// Because multiple newlines would be trimmed, we temporarily replace it with another character.
code = bytes.ReplaceAll(code, []byte("\n"), marker.BytesMarkerCodeBlockNewline)
w.WriteString("\n\n")
w.WriteString(fence)
w.WriteString(infoString)
w.WriteRune('\n')
w.Write(code)
w.WriteRune('\n')
w.WriteString(fence)
w.WriteString("\n\n")
return converter.RenderSuccess
}
func getCodeLanguage(n *html.Node) string {
class := dom.GetAttributeOr(n, "class", "")
parts := strings.Split(class, " ")
for _, part := range parts {
if !strings.Contains(part, "language-") && !strings.Contains(part, "lang-") {
continue
}
part = strings.Replace(part, "language-", "", 1)
part = strings.Replace(part, "lang-", "", 1)
return part
}
return ""
}
func getCodeWithoutTags(startNode *html.Node) ([]byte, string) {
var buf bytes.Buffer
var infoString string
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && (n.Data == "code" || n.Data == "pre") {
// TODO: what if multiple elements have an info string?
if infoString == "" {
infoString = getCodeLanguage(n)
}
}
// - - - //
if n.Type == html.ElementNode && (n.Data == "style" || n.Data == "script" || n.Data == "textarea") {
return
}
if n.Type == html.ElementNode && (n.Data == "br" || n.Data == "div") {
buf.WriteString("\n")
}
if n.Type == html.TextNode {
// if strings.TrimSpace(n.Data) == "" && strings.Contains(n.Data, "\n") {
// buf.WriteString("\n")
// }
buf.WriteString(n.Data)
return
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(startNode)
return buf.Bytes(), infoString
}
|