File: unhtml.vim

package info (click to toggle)
txt2tags 2.5-2
  • links: PTS, VCS
  • area: main
  • in suites: squeeze
  • size: 2,264 kB
  • ctags: 471
  • sloc: python: 2,461; lisp: 414; ruby: 347; xml: 96; php: 95; sh: 83; makefile: 26
file content (94 lines) | stat: -rw-r--r-- 1,984 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
" unhtml.vim - by Aurelio Jargas
" - Converts HTML tags into txt2tags marks
" - Part of the txt2tags <http://txt2tags.sf.net> software
"
" INSTRUCTIONS
"   1. Open the HTML file on Vim and execute
"        :so /path/to/unhtml.vim
"
"   2. A new <yourfile>.html.t2t will be saved.
"
"   3. Check the new .t2t file and correct by hand what has left.
"

""" [ preparing ]
" ignore case
set ic
" join multiline tags
g/<\s*\([ap]\|img\)\s*$/join
g/<\s*a\s[^>]*>[^<]*$/join


""" [ do it! ]
" link
%s,<\s*a\s[^>]*href="\(.\{-}\)"[^>]*>\(.\{-}\)<\/a>,[\2 \1],ge
%s,<\s*a\s[^>]*href=\([^ >]\+\)[^>]*>\(.\{-}\)<\/a>,[\2 \1],ge
" images
%s,<\s*img\s[^>]*src="\(.\{-}\)"[^>]*>,[\1],ge
%s,<\s*img\s[^>]*src=\([^ >]\+\)[^>]*>,[\1],ge
" anchor
%s,^<\s*a\s\+name=.\{-}>\(.*\)<\/a>,== \1 ==,ge
" comments
%s,\s*<!--\(.*\)-->,\% \1,ge

/<!--/,/-->/s,^,\% ,e
" paragraph
%s,<\s*p\(\s[^>]*\)\=\s*>,
,ge
" bar
%s,<\s*hr[^>]*>,-------------------------------------------------,ge
" title
%s,</\=\s*h1\s*>,=,ge
%s,</\=\s*h2\s*>,==,ge
%s,</\=\s*h3\s*>,===,ge
%s,</\=\s*h4\s*>,====,ge
%s,</\=\s*h5\s*>,=====,ge
%s,</\=\s*h6\s*>,=====,ge
" beautifiers
%s,</\=\s*code\s*>,``,ge
%s,</\=\s*\(b\|strong\)\s*>,**,ge
%s,</\=\s*\(i\|em\)\s*>,//,ge
%s,</\=\s*u\s*>,__,ge
" pre
%s,</\=\s*pre\s*>,
```
,ge
" bullet/numbered list
%s,<\s*li\s*>,- ,ge
%s,</\s*li\s*>,,ge
%s,<\s*[uo]l\s*>,,ge
%s,</\s*[uo]l\s*>,

,ge
" definition list
%s,<\s*dl\s*>,,ge
%s,</\s*dl\s*>,

,ge
%s,<\s*dt\s*>,: ,ge
%s,</\s*dt\s*>,
,ge
%s,</\=\s*dd\s*>,,ge
" BR is ignored
%s,<\s*br\s*/*>,
,ge
" trash
%s,</\s*font[^>]*\s*>,,ge
%s,</\s*p\s*>,,ge
%s,</\s*a\s*>,,ge
%s,</\=\s*blink\s*>,,ge
%s,<\s*a\s\+name=[^>]*>,,ge
%s,</\=\s*\(html\|body\|head\|title\)\(\s[^>]*\)\=\s*>,,ge
" mmmmm, dangerous! it removes all remaining HTML tags
%s,<[^>]*>,,ge
" clear just-blanks lines
%s,^\s*$,,
" special entities
%s,&quot;,",ge
%s,&amp;,\&,ge
%s,&gt;,>,ge
%s,&lt;,<,ge
%s,&nbsp;, ,ge

" save new .t2t file and turn on syntax
saveas! %.t2t | set ft=txt2tags