1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
|
# skip scripts
/<script.*>/,/<\/script>/ d
# skip lines not intended for description
/<table id=nodescr/,/^$/ d
/<p id=nodescr>/,/^$/ d
# define ranges to be extracted and handled
1,/^<h.>Introduction/ d
/^<h.>.*verview/,$ b overview
: intro
/^$/ d
s/<p>//
/^[ ]*</ d
s,^Mined is,'''Mined''' is,
i\
# embed Wiki keywords
s,\(Unicode\),[[\1]],g
s,\(CJK\),[[\1]],g
s,\(Han\),[[\1]],g
s,\(text terminal\),[[\1]],g
s,\(xterm\),[[\1]],g
s,\(rxvt\),[[\1]],g
b
: overview
/^<h.>.*verview/ b ovbegin
b ovcont
: ovbegin
i\
i\
: ovcont
# suppress HTML only lines
s,^<p>$,,
/^[ ]*<[^>]*>$/ d
# headers
s,^[ ]*<h3>\([^<]*\).*,== \1 ==,
t lines
# sections
s,[ ]*<h[^>]*>\([^<]*\).*,=== \1 ===,
t lines
# items
s,[ ]*<li>,* ,
t lines
# subitems
s,[ ]*<dt>,** ,
t lines
# continuation lines
s,[ ]*, ,
:lines
# remove embedded HTML
s,<[^>]*> *,,g
s,^ *$,,
# remove empty lines
/^$/ d
# prepend space to headings
/^==[^=]/ i\
/^=/ i\
# remove leading space
s,^ *,,
|