1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
|
package table
import (
"fmt"
"sync"
"github.com/JohannesKaufmann/dom"
"github.com/JohannesKaufmann/html-to-markdown/v2/converter"
"golang.org/x/net/html"
)
type option func(p *tablePlugin) error
type SpanCellBehavior string
const (
// SpanBehaviorEmpty renders an empty cell.
SpanBehaviorEmpty SpanCellBehavior = "empty"
// SpanBehaviorMirror renders the same content as the original cell.
SpanBehaviorMirror SpanCellBehavior = "mirror"
)
// WithSpanCellBehavior configures how cells affected by colspan/rowspan attributes
// should be rendered. When a cell spans multiple columns or rows, the affected cells
// can either be empty or contain the same content as the original cell.
func WithSpanCellBehavior(behavior SpanCellBehavior) option {
return func(p *tablePlugin) error {
switch behavior {
case "":
// TODO: should we allow empty string?
return nil
case SpanBehaviorEmpty, SpanBehaviorMirror:
p.spanCellBehavior = behavior
return nil
default:
return fmt.Errorf("unknown value %q for span cell behavior", behavior)
}
}
}
// WithSkipEmptyRows configures the table plugin to omit empty rows from the output.
// An empty row is defined as a row where all cells contain no content or only whitespace.
// When set to true, empty rows will be omitted from the output. When false (default),
// all rows are preserved.
func WithSkipEmptyRows(skip bool) option {
return func(p *tablePlugin) error {
p.skipEmptyRows = skip
return nil
}
}
// WithHeaderPromotion configures whether the first row should be treated as a header
// when the table has no explicit header row (e.g. <th> elements). When set to true, the
// first row will be converted to a header row with separator dashes. When false (default),
// all rows are treated as regular content.
func WithHeaderPromotion(promote bool) option {
return func(p *tablePlugin) error {
p.promoteFirstRowToHeader = promote
return nil
}
}
// WithPresentationTables configures whether tables marked with role="presentation"
// should be converted to markdown. When set to true, presentation tables will be
// converted like regular tables. When false (default), these tables are skipped
// since they typically represent layout rather than semantic content.
func WithPresentationTables(convert bool) option {
return func(p *tablePlugin) error {
p.convertPresentationTables = convert
return nil
}
}
type tablePlugin struct {
m sync.RWMutex
err error
spanCellBehavior SpanCellBehavior
skipEmptyRows bool
promoteFirstRowToHeader bool
convertPresentationTables bool
}
func (p *tablePlugin) setError(err error) {
p.m.Lock()
defer p.m.Unlock()
p.err = err
}
func (p *tablePlugin) getError() error {
p.m.RLock()
defer p.m.RUnlock()
return p.err
}
func NewTablePlugin(opts ...option) converter.Plugin {
plugin := &tablePlugin{}
for _, opt := range opts {
err := opt(plugin)
if err != nil {
plugin.setError(err)
break
}
}
return plugin
}
func (s *tablePlugin) Name() string {
return "table"
}
func (s *tablePlugin) Init(conv *converter.Converter) error {
if err := s.getError(); err != nil {
// Any error raised from the option func
return err
}
conv.Register.EscapedChar('|')
conv.Register.Renderer(s.handleRender, converter.PriorityStandard)
return nil
}
func (s *tablePlugin) handleRender(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
name := dom.NodeName(n)
switch name {
case "table":
return s.renderTable(ctx, w, n)
case "tr":
// Normally, when the "table" gets rendered we do NOT go into this case.
// But as a fallback we separate the rows through newlines.
return s.renderFallbackRow(ctx, w, n)
}
return converter.RenderTryNext
}
func (s *tablePlugin) renderFallbackRow(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
w.WriteString("\n\n")
ctx.RenderChildNodes(ctx, w, n)
w.WriteString("\n\n")
return converter.RenderSuccess
}
|