File: list_items.go

package info (click to toggle)
golang-github-johanneskaufmann-html-to-markdown 2.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,084 kB
  • sloc: makefile: 3
file content (53 lines) | stat: -rw-r--r-- 1,415 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
package domutils

import (
	"context"
	"strings"

	"github.com/JohannesKaufmann/dom"
	"golang.org/x/net/html"
	"golang.org/x/net/html/atom"
)

// MoveListItems moves non-"li" nodes into the previous "li" nodes.
func MoveListItems(ctx context.Context, n *html.Node) {
	if n.Type == html.ElementNode && (n.Data == "ol" || n.Data == "ul") {
		var previousLi *html.Node

		// Collect children to avoid modifying the slice while iterating.
		children := dom.AllChildNodes(n)

		for _, child := range children {
			if child.Type == html.ElementNode && child.Data == "li" {
				previousLi = child
			} else if child.Type == html.TextNode && strings.TrimSpace(child.Data) == "" {
				// Skip the node, probably just formatting of code
			} else {
				// We expect that inside an "ol"/"ul" there are *only* "li" nodes.
				// But sometimes that is not the case...

				if previousLi != nil {
					// There is a previous "li" node,
					// so we move this content into the other "li" node.
					n.RemoveChild(child)

					previousLi.AppendChild(child)
				} else {
					// There is no previous "li" node,
					// so we wrap this node with it's own "li" node.

					newNode := &html.Node{
						Type:     html.ElementNode,
						DataAtom: atom.Li,
						Data:     "li",
					}
					previousLi = dom.WrapNode(child, newNode)
				}
			}
		}
	}

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		MoveListItems(ctx, c)
	}
}