1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
|
#!/bin/sh
set -e
srcdir="$1"
destdir="$2"
tidy_bin=${TIDY5_BIN:-"tidy5"}
for src in "$srcdir"/*.html; do
name=$(basename "$src" .html)
dest="$destdir/$name.md"
echo "src=$src" "dest=$dest" "name=$name"
cat <<EOF > "$dest"
---
title: $name
---
EOF
$tidy_bin -i --wrap 0 \
--asxhtml \
--show-body-only yes \
--drop-empty-elements yes \
--drop-empty-paras yes \
--enclose-block-text yes \
--enclose-text yes "$src" \
| \
awk '
/<h[1-6]/ {
if ($0 ~ /<h1/) {
level = "#";
} else if ($0 ~ /<h2/) {
level = "##";
} else if ($0 ~ /<h2/) {
level = "##";
} else if ($0 ~ /<h3/) {
level = "###";
} else if ($0 ~ /<h4/) {
level = "####";
} else if ($0 ~ /<h5/) {
level = "#####";
} else if ($0 ~ /<h6/) {
level = "######";
}
id = $0;
sub(/.*(id|name)="/, "", id);
sub(/".*/, "", id);
title = $0;
sub(/ *<\/.*/, "", title);
sub(/.*> */, "", title);
print level, title, "{#" id "}";
next;
}
/dt id="/ {
id = $0;
sub(/.*(id|name)="/, "", id);
sub(/".*/, "", id);
line = $0;
sub(/id="[^"]*"/, "", line);
print line;
next;
}
/a class="permalink"/ {
title = $0;
sub(/ *<a [^>]*>/, "", title);
sub(/<\/a>/, "", title);
sub(/<br[^>]*>/, "", title);
gsub(/>\*</, ">\\*<", title);
print level "#", title, "{#" id "}";
next;
}
{
line = $0;
gsub(/{/, "\\{", line);
gsub(/<li>/, "<li>\n", line);
gsub(/<\/li>/, "\n</li>", line);
gsub(/<\/ul>/, "</ul>\n", line);
gsub(/<br[^>]*>/, "<br\/>", line);
gsub(/<\/div>]/, "<\/div>\n]", line);
gsub(/style="[^"]*"/, "", line);
print line;
next;
}
' > "$dest"
done
|