File: markdown.go

package info (click to toggle)
golang-github-docker-cli-docs-tool 0.8.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 424 kB
  • sloc: makefile: 4
file content (100 lines) | stat: -rw-r--r-- 4,057 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
// Copyright 2017 cli-docs-tool authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package clidocstool

import (
	"regexp"
	"strings"
	"unicode"
)

var (
	// mdHeading matches MarkDown H1..h6 headings. Note that this regex may produce
	// false positives for (e.g.) comments in code-blocks (# this is a comment),
	// so should not be used as a generic regex for other purposes.
	mdHeading = regexp.MustCompile(`^([#]{1,6})\s(.*)$`)
	// htmlAnchor matches inline HTML anchors. This is intended to only match anchors
	// for our use-case; DO NOT consider using this as a generic regex, or at least
	// not before reading https://stackoverflow.com/a/1732454/1811501.
	htmlAnchor = regexp.MustCompile(`<a\s+(?:name|id)="?([^"]+)"?\s*></a>\s*`)
	// relativeLink matches parts of internal links between .md documents
	// e.g. "](buildx_build.md)"
	relativeLink = regexp.MustCompile(`\]\((\.\/)?[a-z-_]+\.md(#.*)?\)`)
)

// getSections returns all H2 sections by title (lowercase)
func getSections(mdString string) map[string]string {
	parsedContent := strings.Split("\n"+mdString, "\n## ")
	sections := make(map[string]string, len(parsedContent))
	for _, s := range parsedContent {
		if strings.HasPrefix(s, "#") {
			// not a H2 Section
			continue
		}
		parts := strings.SplitN(s, "\n", 2)
		if len(parts) == 2 {
			sections[strings.ToLower(parts[0])] = parts[1]
		}
	}
	return sections
}

// cleanupMarkDown cleans up the MarkDown passed in mdString for inclusion in
// YAML. It removes trailing whitespace and substitutes tabs for four spaces
// to prevent YAML switching to use "compact" form; ("line1  \nline\t2\n")
// which, although equivalent, is hard to read.
func cleanupMarkDown(mdString string) (md string, anchors []string) {
	// remove leading/trailing whitespace, and replace tabs in the whole content
	mdString = strings.TrimSpace(mdString)
	mdString = strings.ReplaceAll(mdString, "\t", "    ")
	mdString = strings.ReplaceAll(mdString, "https://docs.docker.com", "")

	// Rewrite internal links, replacing relative paths with absolute path
	// e.g. from [docker buildx build](buildx_build.md#build-arg)
	// to [docker buildx build](/reference/cli/docker/buildx/build/#build-arg)
	mdString = relativeLink.ReplaceAllStringFunc(mdString, func(link string) string {
		link = strings.TrimLeft(link, "](./")
		link = strings.ReplaceAll(link, "_", "/")
		link = strings.ReplaceAll(link, ".md", "/")
		return "](/reference/cli/docker/" + link
	})

	var id string
	// replace trailing whitespace per line, and handle custom anchors
	lines := strings.Split(mdString, "\n")
	for i := 0; i < len(lines); i++ {
		lines[i] = strings.TrimRightFunc(lines[i], unicode.IsSpace)
		lines[i], id = convertHTMLAnchor(lines[i])
		if id != "" {
			anchors = append(anchors, id)
		}
	}
	return strings.Join(lines, "\n"), anchors
}

// convertHTMLAnchor converts inline anchor-tags in headings (<a name=myanchor></a>)
// to an extended-markdown property ({#myanchor}). Extended Markdown properties
// are not supported in GitHub Flavored Markdown, but are supported by Jekyll,
// and lead to cleaner HTML in our docs, and prevents duplicate anchors.
// It returns the converted MarkDown heading and the custom ID (if present)
func convertHTMLAnchor(mdLine string) (md string, customID string) {
	if m := mdHeading.FindStringSubmatch(mdLine); len(m) > 0 {
		if a := htmlAnchor.FindStringSubmatch(m[2]); len(a) > 0 {
			customID = a[1]
			mdLine = m[1] + " " + htmlAnchor.ReplaceAllString(m[2], "") + " {#" + customID + "}"
		}
	}
	return mdLine, customID
}