1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
|
package cmd
import (
"crypto/sha256"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/bmatcuk/doublestar/v4"
)
type outputType string
const (
outputTypeStdout outputType = "stdout"
outputTypeDirectory outputType = "directory"
outputTypeFile outputType = "file"
)
// The user can indicate that they mean a directory by having a slash as the suffix.
func hasFolderSuffix(outputPath string) bool {
// Note: We generally support the os.PathSeparator (e.g. "\" on windows).
// But also "/" is always supported.
return strings.HasSuffix(outputPath, string(os.PathSeparator)) || strings.HasSuffix(outputPath, "/")
}
func determineOutputType(_inputPath string, countInputs int, outputPath string) (outputType, error) {
if outputPath == "" {
if countInputs > 1 {
return "", NewCLIError(
fmt.Errorf("when processing multiple input files --output needs to be a directory"),
Paragraph("Here is how you can use a glob to match multiple files:"),
CodeBlock(`html2markdown --input "src/*.html" --output "dist/"`),
)
}
return outputTypeStdout, nil
}
if hasFolderSuffix(outputPath) {
return outputTypeDirectory, nil
}
// - - - - - - - - - //
// We can now assume that the output path specifies a file.
// But let's make sure...
if countInputs > 1 {
// There are multiple inputs, so the input MUST have been a glob or directory.
// It also means that the output MUST be a directory.
dir := filepath.Base(outputPath)
return "", NewCLIError(
fmt.Errorf(`when processing multiple input files, --output "%s" must end with "%s" to indicate a directory`, dir, dir+"/"),
)
}
// TODO: The glob can also be a folder with just one file...
// So we should check if the path contains any glob characters.
// Check if output path exists
if outInfo, err := os.Stat(outputPath); err == nil {
if outInfo.IsDir() {
dir := filepath.Base(outputPath)
return "", NewCLIError(
fmt.Errorf(`path "%s" exists and is a directory, did you mean "%s"?`, dir, dir+"/"),
Paragraph(`The --output must end with "/" to indicate a directory`),
)
}
return outputTypeFile, nil
}
if filepath.Ext(filepath.Base(outputPath)) != "" {
// With a file extension it is LIKELY to be a file.
return outputTypeFile, nil
}
// Default to file for single input
return outputTypeFile, nil
}
func calculateOutputPaths(inputFilepath string, inputs []*input) error {
globBase, _ := doublestar.SplitPattern(
filepath.ToSlash(filepath.Clean(inputFilepath)),
)
allBasenames := make(map[string]int)
for _, input := range inputs {
basenameWithExt := filepath.Base(input.inputFullFilepath)
basename := fileNameWithoutExtension(basenameWithExt)
val := allBasenames[basename]
if val == 0 {
// -> The standard filename
input.outputFullFilepath = basename + ".md"
} else {
relativePath, err := filepath.Rel(globBase, input.inputFullFilepath)
if err != nil {
return err
}
// We hash the relative path (based from the globBase)
// since the globBase is *the same* for all files.
// Bonus: It makes testing easier as the temporary folder does not matter.
hash := hashFilepath(relativePath)
// -> The filename for duplicates
input.outputFullFilepath = basename + "." + hash[:10] + ".md"
}
allBasenames[basename]++
}
return nil
}
func hashFilepath(path string) string {
h := sha256.New()
h.Write([]byte(
// Ensure that regardless of operating system the path has the same format.
// Bonus: Easier testing as we always hash the same.
filepath.ToSlash(path),
))
bs := h.Sum(nil)
return fmt.Sprintf("%x", bs)
}
func ensureOutputDirectories(outputType outputType, outputFilepath string) error {
if outputType == outputTypeDirectory {
return os.MkdirAll(outputFilepath, os.ModePerm)
} else if outputType == outputTypeFile {
path := filepath.Dir(outputFilepath)
return os.MkdirAll(path, os.ModePerm)
} else {
return nil
}
}
func (cli *CLI) writeOutput(outputType outputType, filename string, markdown []byte) error {
switch outputType {
case outputTypeDirectory:
{
err := WriteFile(filepath.Join(cli.config.outputFilepath, filename), markdown, cli.config.outputOverwrite)
if err != nil {
if errors.Is(err, os.ErrExist) {
return fmt.Errorf("output path %q already exists. Use --output-overwrite to replace existing files", cli.config.outputFilepath)
}
return fmt.Errorf("error while writing the file into the directory: %w", err)
}
return nil
}
case outputTypeFile:
{
err := WriteFile(cli.config.outputFilepath, markdown, cli.config.outputOverwrite)
if err != nil {
if errors.Is(err, os.ErrExist) {
return fmt.Errorf("output path %q already exists. Use --output-overwrite to replace existing files", cli.config.outputFilepath)
}
return fmt.Errorf("error while writing the file: %w", err)
}
return nil
}
default:
{
fmt.Fprintln(cli.Stdout, string(markdown))
return nil
}
}
}
// WriteFile writes data to a file with override control
// If override is false and file exists, returns an error
// If override is true, truncates existing file or creates new one
func WriteFile(filename string, data []byte, override bool) error {
// As the base flags we have:
// O_WRONLY = write to the file, not read
// O_CREATE = create the file if it doesn't exist
flag := os.O_WRONLY | os.O_CREATE
if override {
// We add this flag:
// O_TRUNC = the existing contents are truncated to zero length
flag |= os.O_TRUNC
} else {
// We add this flag:
// O_EXCL = if used with O_CREATE, causes error if file already exists
flag |= os.O_EXCL
}
f, err := os.OpenFile(filename, flag, 0644)
if err != nil {
return err
}
_, err = f.Write(data)
if err1 := f.Close(); err1 != nil && err == nil {
err = err1
}
return err
}
|