File: HtmlConverter.php

package info (click to toggle)
phpwiki 1.3.12p3-5etch1
  • links: PTS
  • area: main
  • in suites: etch
  • size: 16,956 kB
  • ctags: 21,608
  • sloc: php: 82,335; xml: 3,840; sh: 1,522; sql: 1,198; perl: 625; makefile: 562; awk: 28
file content (196 lines) | stat: -rw-r--r-- 7,277 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
<?php // -*-php-*-
/*
 Copyright 2005 Wincor Nixdorf International GmbH

 This file is part of PhpWiki.

 PhpWiki is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 PhpWiki is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with PhpWiki; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */

/**
 * HtmlConverter:  Convert HTML tags as far to Wiki markup as possible
 *          and eliminate all other HTML markup, so the output can be
 *          copied and pasted into a wiki page.
 *          Credit to an unknown programmer, who has provided the first
 *          version 0.01 on http://www.gpgstudy.com/striphtml.phps
 * Usage:   <?plugin HtmlConverter ?>
 * Author:  HendrikScheider <hendrik.scheider@wincor-nixdorf.com>
 */

class WikiPlugin_HtmlConverter extends WikiPlugin
{

    function getName () {
        return "HtmlConverter";
    }

    function getDescription () {
        return _("Convert HTML markup into wiki markup. (Version 0.5)");
    }

    function getDefaultArguments() {
        return array();
    }

    function run($dbi, $argstr, &$request, $basepage) {

        /* plugin not yet has arguments - save for later (copied from UpLoad)
        $args = $this->getArgs($argstr, $request);
        extract($args);
		*/
		
        $form = HTML::form(array('action' => $request->getPostURL(),
                                 'enctype' => 'multipart/form-data',
                                 'method' => 'post'));
        $contents = HTML::div(array('class' => 'wikiaction'));
        $contents->pushContent(HTML::input(array('type' => 'hidden',
                                                 'name' => 'MAX_FILE_SIZE',
                                                 'value' => MAX_UPLOAD_SIZE)));
        $contents->pushContent(HTML::input(array('name' => 'userfile',
                                                 'type' => 'file',
                                                 'size' => '50')));
        $contents->pushContent(HTML::raw(" "));
        $contents->pushContent(HTML::input(array('value' => _("Upload"),
                                                 'type' => 'submit')));
        $form->pushContent($contents);

        $message = HTML();
        $userfile = $request->getUploadedFile('userfile');
        if ($userfile) {
            $userfile_name = $userfile->getName();
            $userfile_name = basename($userfile_name);
            $userfile_tmpname = $userfile->getTmpName();

            if ( !preg_match("/(\.html|\.htm)$/i", $userfile_name)) {
                $message->pushContent(_("Only files with extension HTML are allowed"),HTML::br(),HTML::br());
            } else {
            	$message->pushContent( _("Processed $userfile_name"), HTML::br(), HTML::br());
            	$message->pushContent( _("Copy the output below and paste it into your Wiki page."), HTML::br());
            	$message->pushContent( $this->_process( $userfile_tmpname));
            }
        } else {
            $message->pushContent(HTML::br(),HTML::br());
        }

        $result = HTML();
        $result->pushContent($form);
        $result->pushContent($message);
        return $result;
    }

	function _processA(&$file) {

	    $file = eregi_replace(
	    "<a([[:space:]]+)href([[:space:]]*)=([[:space:]]*)\"([-/.a-zA-Z0-9_~#@%$?&=:\200-\377\(\)[:space:]]+)\"([^>]*)>", "{{\\4}}", $file);
	
		$file = eregi_replace("{{([-/a-zA-Z0-9._~#@%$?&=:\200-\377\(\)[:space:]]+)}}([^<]+)</a>", "[ \\2 | \\1 ]", $file);
	}

	function _processIMG(&$file) {
	
		$img_regexp = "_<img\s+src\s*=\s*\"([-/.a-zA-Z0-9\_~#@%$?&=:\200-\377\(\)\s]+)\"[^>]*>_";
	
	    $file = preg_replace( $img_regexp, "\n\n[Upload:\\1]", $file);
	}
	
	function _processUL( &$file) {
	
		// put any <li>-Tag in a new line to indent correctly and strip trailing white space (including new-lines)
		$file = str_replace( "<li", "\n<li", $file);
		$file = preg_replace( "/<li>\s*/", "<li>", $file);
		
		$enclosing_regexp = "_(.*)<ul\s?[^>]*>((?U).*)</ul>(.*)_is";
		$indent_tag = "<li";
		$embedded_fragment_array = array();
		$found = preg_match( $enclosing_regexp, $file, $embedded_fragment_array);
		while ( $found) {
			$indented = str_replace( $indent_tag, "\t".$indent_tag, $embedded_fragment_array[2]);
			// string the file together again with the indented part in the middle.
			// a <p> is inserted instead of the erased <ul> tags to have a paragraph generated at the end of the script
			$file = $embedded_fragment_array[1] . "<p>" . $indented . $embedded_fragment_array[3];
			$found = preg_match( $enclosing_regexp, $file, $embedded_fragment_array);
		}
	}

	function _process( $file_name) {
		$result = HTML();
	    $file = file_get_contents( $file_name); 
		$file = html_entity_decode( $file);
		
		$ascii  =  '[\x00-\x7F]';  
		$euc  =  '[\xA1-\xFE][\xA1-\xFE]';  
		$character  =  "$ascii|$euc";  
	
		$this->_processA( $file);
		$this->_processIMG( $file);
		$this->_processUL( $file);
		
		$file = str_replace ("\r\n", "\n", $file);
		
		$file = eregi_replace ("<h1[[:space:]]?[^>]*>", "\n\n!!!!", $file);
		
		$file = eregi_replace ("<h2[[:space:]]?[^>]*>", "\n\n!!!", $file);
		
		$file = eregi_replace ("<h3[[:space:]]?[^>]*>", "\n\n!!", $file);
		
		$file = eregi_replace ("<h4[[:space:]]?[^>]*>", "\n\n!", $file);
		
		$file = eregi_replace ("<h5[[:space:]]?[^>]*>", "\n\n__", $file);
		
		$file = eregi_replace ("</h1>", "\n\n", $file);
		
		$file = eregi_replace ("</h2>", "\n\n", $file);
		
		$file = eregi_replace ("</h3>", "\n\n", $file);
		
		$file = eregi_replace ("</h4>", "\n\n", $file);
		
		$file = eregi_replace ("</h5>", "__\n\n", $file);
		
		$file = eregi_replace ("<hr[[:space:]]?[^>]*>", "\n----\n", $file);
	
		$file = eregi_replace ("<li[[:space:]]?[^>]*>", "* ", $file);

		// strip all tags, except for <pre>, which is supported by wiki
		// and <p>'s which will be converted after compression.		
		$file = strip_tags($file, "<pre><p>");
		// strip </p> end tags with trailing white space
		$file = preg_replace ("_</p>\s*_i", "", $file);    

		// get rid of all blank lines
		$file = preg_replace( "/\n\s*\n/", "\n", $file);

		// finally only add paragraphs where defined by inserting double new-lines
		// be sure to only catch <p> or <p[space]...> and not <pre>!
		// Actually <p> tags with all white space and one new-line before
		// and after around are replaced
		$file = preg_replace ("_\n?[^\S\n]*<p(\s[^>]*|)>[^\S\n]*\n?_i", "\n\n", $file);    
	
		// strip attributes from <pre>-Tags and add a new-line before
		$file = preg_replace ("_<pre(\s[^>]*|)>_iU", "\n<pre>", $file);    

        $outputArea = HTML::textarea( array(
        	'rows' => '30',
			'cols' => '80',
			'wrap' => 'virtual')
		);
		
		$outputArea->pushContent( _($file));
		$result->pushContent( $outputArea);
		return $result;
	}
}
?>