1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
|
package converter
import (
"bytes"
"context"
"errors"
"io"
"slices"
"strings"
"golang.org/x/net/html"
)
type convertOption struct {
domain string
context context.Context
}
type ConvertOptionFunc func(o *convertOption)
func WithContext(ctx context.Context) ConvertOptionFunc {
return func(o *convertOption) {
o.context = ctx
}
}
// WithDomain provides a base `domain` to the converter and
// to the `AssembleAbsoluteURL` function.
//
// If a *relative* url is encountered (in an image or link) then the `domain` is used
// to convert it to a *absolute* url.
func WithDomain(domain string) ConvertOptionFunc {
return func(o *convertOption) {
o.domain = domain
}
}
func (conv *Converter) setError(err error) {
conv.m.Lock()
defer conv.m.Unlock()
conv.err = err
}
func (conv *Converter) getError() error {
conv.m.RLock()
defer conv.m.RUnlock()
return conv.err
}
var errNoRenderHandlers = errors.New(`no render handlers are registered. did you forget to register the "commonmark" and "base" plugins?`)
var errBasePluginMissing = errors.New(`you registered the "commonmark" plugin but the "base" plugin is also required`)
// ConvertNode converts a `*html.Node` to a markdown byte slice.
//
// If you have already parsed an HTML page using the `html.Parse()` function
// from the "golang.org/x/net/html" package then you can pass this node
// directly to the converter.
func (conv *Converter) ConvertNode(doc *html.Node, opts ...ConvertOptionFunc) ([]byte, error) {
if err := conv.getError(); err != nil {
// There can be errors while calling `Init` on the plugins (e.g. validation errors).
// Now is the first opportunity where we can return that error.
return nil, err
}
conv.m.Lock()
option := &convertOption{}
for _, fn := range opts {
fn(option)
}
conv.m.Unlock()
// If there are no render handlers registered this is
// usually a user error - since people want the Commonmark Plugin in 99% of cases.
if len(conv.getRenderHandlers()) == 0 {
return nil, errNoRenderHandlers
}
containsCommonmark := slices.Contains(conv.registeredPlugins, "commonmark")
containsBase := slices.Contains(conv.registeredPlugins, "base")
if containsCommonmark && !containsBase {
return nil, errBasePluginMissing
}
// - - - - - - - - - - - - - - - - - - - //
state := newGlobalState()
if option.context == nil {
option.context = context.Background()
}
ctx := option.context
ctx = provideDomain(ctx, option.domain)
ctx = provideAssembleAbsoluteURL(ctx, defaultAssembleAbsoluteURL)
ctx = state.provideGlobalState(ctx)
customCtx := newConverterContext(ctx, conv)
// - - - - - - - - - - - - - - - - - - - //
// Pre-Render
for _, handler := range conv.getPreRenderHandlers() {
handler.Value(customCtx, doc)
}
// Render
var buf bytes.Buffer
conv.handleRenderNode(customCtx, &buf, doc)
// Post-Render
result := buf.Bytes()
for _, handler := range conv.getPostRenderHandlers() {
result = handler.Value(customCtx, result)
}
return result, nil
}
// ConvertReader converts the html from the reader to markdown.
//
// Under the hood `html.Parse()` is used to parse the HTML.
func (conv *Converter) ConvertReader(r io.Reader, opts ...ConvertOptionFunc) ([]byte, error) {
doc, err := html.Parse(r)
if err != nil {
return nil, err
}
return conv.ConvertNode(doc, opts...)
}
// ConvertString converts a html-string to a markdown-string.
//
// Under the hood `html.Parse()` is used to parse the HTML.
func (conv *Converter) ConvertString(htmlInput string, opts ...ConvertOptionFunc) (string, error) {
r := strings.NewReader(htmlInput)
output, err := conv.ConvertReader(r, opts...)
if err != nil {
return "", err
}
return string(output), nil
}
|