File: Text.php

package info (click to toggle)
horde2 2.2.8-1sarge3
  • links: PTS
  • area: main
  • in suites: sarge
  • size: 3,832 kB
  • ctags: 2,897
  • sloc: php: 12,784; sh: 954; sql: 149; makefile: 104; perl: 97; xml: 24; pascal: 6
file content (458 lines) | stat: -rw-r--r-- 16,662 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
<?php
/*
 * $Horde: horde/lib/Text.php,v 1.6.2.18 2004/08/23 12:31:56 jan Exp $
 *
 * Copyright 1999-2003 Jon Parise <jon@horde.org>
 *
 * See the enclosed file COPYING for license information (LGPL). If you
 * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
 */

define('TEXT_HTML_PASSTHRU', 0);
define('TEXT_HTML_SYNTAX', 1);
define('TEXT_HTML_REDUCED', 2);
define('TEXT_HTML_MICRO', 3);
define('TEXT_HTML_NOHTML', 4);
define('TEXT_HTML_NOHTML_NOBREAK', 5);

/**
 * The Text:: class provides common methods for manipulating text.
 *
 * @author  Jon Parise <jon@horde.org>
 * @version $Revision: 1.6.2.18 $
 * @since   Horde 1.3
 * @package horde
 */
class Text {

    /**
     * Filter the given text based on the words found in $words.
     *
     * @param string $text         The text to filter.
     * @param string $words_file   Filename containing the words to replace.
     * @param string $replacement  The replacement string.
     *
     * @return string  The filtered version of $text.
     */
    function filter($text, $words_file, $replacement)
    {
        if (@is_readable($words_file)) {
            /* Read the file and iterate through the lines. */
            $lines = file($words_file);
            foreach ($lines as $line) {
                /* Strip whitespace and comments. */
                $line = trim($line);
                $line = preg_replace('|#.*$|', '', $line);

                /* Filter the text. */
                if (!empty($line)) {
                    $text = preg_replace("/(\b(\w*)$line\b|\b$line(\w*)\b)/i",
                                         $replacement, $text);
                }
            }
        }

        return $text;
    }

    /**
     * Fixes incorrect wrappings which split double-byte gb2312 characters
     *
     * @param string $text          String containing wrapped gb2312 characters
     * @param $break_char          Character used to break lines.
     *
     * @return string                  String containing fixed text.
     *
     * @since Horde 2.2
     */
    function trim_gb2312($str, $break_char = "\n")
    {
        $lines = explode($break_char, $str);

        for ($i = 0; $i < count($lines) - 1; $i++) {
                $line = $lines[$i];
                $len = strlen($line);

                /* parse double-byte gb2312 characters */
                for ($c = 0; $c < $len - 1; $c++) {
                        if (ord($line{$c}) & 128) {
                                if (ord($line{$c + 1}) & 128) $c++;
                        }
                }

                /* If the last character of the current line is the first byte
                   of a double-byte character, move it to the start of the
                   next line. */
                if (($c == $len - 1) && (ord($line[$c]) & 128)) {
                        $lines[$i] = substr($line, 0, -1);
                        $lines[$i + 1] = $line[$c] . $lines[$i + 1];
                }
        }
        return implode($break_char, $lines);
    }

    /**
     * Wraps the text of a message.
     *
     * @param string $text        String containing the text to wrap.
     * @param integer $length     Wrap $text at this number of characters.
     * @param string $break_char  Character to use when breaking lines.
     *
     * @return string  String containing the wrapped text.
     */
    function wrap($text, $length = 80, $break_char = "\n", $charset = "")
    {
        $paragraphs = preg_split("/\r?\n/", $text);

        $charset = strtolower($charset);
        switch ($charset) {
            case "gb2312":
                for ($i = 0; $i < count($paragraphs); $i++) {
                    $paragraphs[$i] = wordwrap($paragraphs[$i], $length, $break_char, 1);
                    $paragraphs[$i] = Text::trim_gb2312($paragraphs[$i], $break_char);
                }
                break;
            default:
                for ($i = 0; $i < count($paragraphs); $i++) {
                    /* We need to handle the Usenet-style signature line
                     * separately; since the space after the two dashes is
                     * REQUIRED, we don't want to trim the line. */
                    if ($paragraphs[$i] != '-- ') {
                        $paragraphs[$i] = rtrim($paragraphs[$i]);
                    }
                    $paragraphs[$i] = wordwrap($paragraphs[$i], $length, $break_char);
                }
                break;
        }
        return implode($break_char, $paragraphs);
    }

    /**
     * Turns all URLs in the text into hyperlinks.
     *
     * @param string $text               The text to be transformed.
     * @param optional boolean $capital  Sometimes it's useful to generate <A>
     *                                   and </A> so you can know which tags
     *                                   you just generated.
     * @param optional string $class     The CSS class the links should be
     *                                   displayed with.
     *
     * @return string  The linked text.
     */
    function linkUrls($text, $capital = false, $class = '')
    {
        if ($capital) {
            $a = 'A';
            $text = str_replace('</A>', '</a>', $text); // make sure that the original message doesn't contain any capital </A> tags, so we can assume we generated them
            $text = str_replace('<A', '<a', $text);     // ditto for open <A> tags
        } else {
            $a = 'a';
        }
        if (!empty($class)) {
            $class = ' class="' . $class . '"';
        }

        /* Get all possible URLs and store their position in the text. */
        preg_match_all('|(\w+)://([^\s"<]*[\w+#?/&=])|', $text, $matches, PREG_SET_ORDER);

        /* Loop through the text replacing all the matched URLs. */
        $offset = 0;
        foreach ($matches as $match) {
            $offset = strpos($text, $match[0], $offset);
            $url = Horde::addParameter(Horde::url($GLOBALS['registry']->getWebroot('horde') . '/util/go.php', false, -1), 'url=' . urlencode($match[0]));
            $new = '<' . $a . ' href="' . $url . '" target="_blank"' . $class . '>' . $match[0] . '</' . $a . '>';
            /* Replace URL with link using match offset. */
            $text = substr_replace($text, $new, $offset, strlen($match[0]));

            /* Increase offset to compensate for more characters in link. */
            $offset += (strlen($new) - strlen($match[0]));
        }

        return $text;
    }

    /**
     * Re-convert links generated by Text::linkUrls() to working
     * hrefs, after htmlspecialchars() has been called on the
     * text. This is an awkward chain, but necessary to filter out
     * other HTML.
     *
     * @param string $text             The text to convert.
     * @param optional string $target  The link target.
     *
     * @return string  The converted text.
     */
    function enableCapitalLinks($text, $target = '_blank')
    {
        $text = str_replace('&lt;A href=&quot;', '<a class="fixed" href="', $text);
        $text = str_replace('&quot; target=&quot;_blank&quot;&gt;', '" target="' . $target . '">', $text);
        $text = str_replace('&quot;&gt;','">', $text);
        $text = str_replace('&lt;/A&gt;', '</a>', $text); // only reconvert capital /A tags - the ones we generated

        return $text;
    }

    /**
     * Replace occurences of %VAR% with VAR, if VAR exists in the
     * webserver's environment. Ignores all text after a # character
     * (shell-style comments).
     *
     * @param string $text  The text to expand.
     *
     * @return string  The expanded text.
     */
    function expandEnvironment($text)
    {
        if (preg_match("|([^#]*)#.*|", $text, $regs)) {
            $text = $regs[1];

            if (strlen($text) > 0) {
                $text = $text . "\n";
            }
        }

        while (preg_match("|%([A-Za-z_]+)%|", $text, $regs)) {
            $text = preg_replace("|%([A-Za-z_]+)%|", getenv($regs[1]), $text);
        }
        return $text;
    }

    /**
     * Convert a line of text to display properly in HTML.
     *
     * @param string $text  The string of text to convert.
     *
     * @return string  The HTML-compliant converted text.
     */
    function htmlSpaces($text = '')
    {
        $text = htmlspecialchars($text);
        $text = str_replace("\t", '&nbsp; &nbsp; &nbsp; &nbsp; ', $text);
        $text = str_replace('  ', '&nbsp; ', $text);
        $text = str_replace('  ', ' &nbsp;', $text);

        return $text;
    }

    /**
     * Same as htmlSpaces() but converts all spaces to &nbsp;
     * @see htmlSpaces()
     *
     * @param string $text  The string of text to convert.
     *
     * @return string  The HTML-compliant converted text.
     */
    function htmlAllSpaces($text = '')
    {
        $text = Text::htmlSpaces($text);
        $text = str_replace(' ', '&nbsp;', $text);

        return $text;
    }

    /**
     * Removes some common entities and high-ascii or otherwise
     * nonstandard characters common in text pasted from Microsoft
     * Word into a browser.
     *
     * @param string $text  The text to be cleaned.
     *
     * @return string  The cleaned text.
     */
    function cleanEntities($text)
    {
        /* The '' entry may look wrong, depending on your editor,
           but it's not - that's not really a single quote. */
        $from = array('', '', '', '', '', '', '', '', '', '', chr(167), '&#61479;', '&#61572;', '&#61594;', '&#61640;', '&#61623;', '&#61607;', '&#61558;', '&#9658;');
        $to   = array('...',     "'", "'",    '"',    '"',    '*',    '-',    '-',    '*', '*',      '*',        '.',        '*',        '*',        '-',        '-',        '*',        '*',       '>');

        return str_replace($from, $to, $text);
    }

    /**
     * Turn text into HTML with varying levels of parsing.
     *
     * @access public
     *
     * @param string $input            An url-decoded string, \n-separated for
     *                                 lines.
     * @param int $parselevel
     *  TEXT_HTML_PASSTHRU        =  No action. Pass-through. Included for
     *                               completeness.
     *  TEXT_HTML_SYNTAX          =  Allow full html, also do line-breaks,
     *                               in-lining, syntax-parsing.
     *  TEXT_HTML_REDUCED         =  Reduced html (bold, links, etc. by syntax
     *                               array).
     *  TEXT_HTML_MICRO           =  Micro html (only line-breaks, in-line
     *                               linking).
     *  TEXT_HTML_NOHTML          =  No html (all stripped, only line-breaks)
     *  TEXT_HTML_NOHTML_NOBREAK  =  No html whatsoever, no line breaks added.
     *                               Included for completeness.
     * For no html whatsoever, use htmlspecialchars()
     *
     * @access public
     *
     * @return string  The converted HTML.
     *
     * @since Horde 2.2
     */
    function toHTML($text, $parselevel)
    {
        $syntax = array('B' => '<b>',
                        '/B' => '</b>',
                        'I' => '<i>',
                        '/I' => '</i>',
                        'U' => '<u>',
                        '/U' => '</u>',
                        'Q'   => '<blockquote>',
                        '/Q' => '</blockquote>',
                        'LIST' => '<ul>',
                        '/LIST' => '</ul>',
                        '*' => '<li>');

        /* Abort out on simple cases. */
        if ($parselevel == TEXT_HTML_PASSTHRU) {
            return $text;
        }
        if ($parselevel == TEXT_HTML_NOHTML_NOBREAK) {
            return htmlspecialchars($text);
        }

        /* Tack on spaces so that we can count on whitespace coming before
           and after URLs and email addresses. */
        $text = ' ' . $text . ' ';

        /* Find tags we recognize with this parselevel and subst them to
           <tag> ==> [tag]
           and then subst the rest < --> &lt; > --> &gt; */
        if ($parselevel == TEXT_HTML_REDUCED) {
            foreach($syntax as $k => $val) {
                $text = str_replace('<' . $k . '>', '[' . $k . ']', $text);
                $k = strtolower($k);
                $text = str_replace('<' . $k . '>', '[' . $k . ']', $text);
            }
            $input = htmlspecialchars($input);
        }

        /* Interpret tags for parse levels TEXT_HTML_SYNTAX and
           TEXT_HTML_REDUCED. */
        if ($parselevel <= TEXT_HTML_REDUCED) {
            foreach($syntax as $k => $v) {
                $text = str_replace('[' . $k . ']', $v, $text);
                $text = str_replace('<' . $k . '>', $v, $text);
                $k = strtolower($k);
                $text = str_replace('[' . $k . ']', $v, $text);
                $text = str_replace('<' . $k . '>', $v, $text);
            }
        }

        /* For level TEXT_HTML_MICRO, TEXT_HTML_NOHTML, start with
           htmlspecialchars(). */
        if ($parselevel >= TEXT_HTML_MICRO) {
            $text = htmlspecialchars($text);
        }

        /* Do in-lining of http://xxx.xxx to link, xxx@xxx.xxx to email,
           part two. */
        if ($parselevel < TEXT_HTML_NOHTML) {
            // mailto
            $text = preg_replace('|(\s+)([\w\.\-]+\@[\w\-]+\.[\.\w]+)([^\.\w])|', '\1<a href="mailto:\2">\2</a>\3', $text);

            // urls
            $text = preg_replace('|(\s+)(\w+)://([^\s"<]*)([\w#?/&=])|', '\1<a href="\2://\3\4">\2://\3\4</a>', $text);
        }

        /* Do the blank-line ---> <br /> substitution.
           Everybody gets this; if you don't want even that, just save
           the htmlspecialchars() version of the input. */
        $text = nl2br($text);

        return trim($text);
    }

    /**
     * Highlights quoted messages with different colors for the different
     * quoting levels. CSS class names called "quoted1" .. "quoted$level"
     * must be present.
     *
     * @since Horde 2.2
     *
     * @access public
     *
     * @param string $text             The text to be highlighted.
     * @param optional integer $level  The maximum numbers of different
     *                                 colors.
     *
     * @return string  The highlighted text.
     */
    function highlightQuotes($text, $level = 5)
    {
        /* Use a global var since the class is called statically. */
        $GLOBALS['_tmp_maxQuoteChars'] = 0;

        preg_replace_callback("/^\s*((&gt;\s?)+)/m", array('Text', '_countQuoteChars'), $text);

        /* Go through each level of quote block and put the
           appropriate style around it. Important to work downwards so
           blocks with fewer quote chars aren't matched until their
           turn. */
        for ($i = $GLOBALS['_tmp_maxQuoteChars']; $i > 0; $i--) {
            $text = preg_replace(
                /* Finds a quote block across multiple newlines. */
                "/(\n)( *(&gt;\s?)\{$i}(?! ?&gt;).*?)(\n|$)(?! *(&gt; ?)\{$i})/s",
                '\1<span class="quoted' . ((($i - 1) % $level) + 1) . '">\2</span>\4',
                $text
            );
        }

        /* Unset the global variable. */
        unset($GLOBALS['_tmp_maxQuoteChars']);

        /* Remove the leading newline we added above. */
        return substr($text, 1);
    }

    /**
     * Called by the preg_replace_callback function in
     * highlightQuotes(). This method finds the maximum number of
     * quote characters in all of the quote blocks.
     *
     * @since Horde 2.2.4
     *
     * @access private
     *
     * @param array $matches  The matches from the regexp.
     */
    function _countQuoteChars($matches)
    {
        $num = count(preg_split('/&gt;\s?/', $matches[1])) - 1;
        if ($num > $GLOBALS['_tmp_maxQuoteChars']) {
            $GLOBALS['_tmp_maxQuoteChars'] = $num;
        }
    }

    /**
     * Displays message signatures marked by a '-- ' in the style of the CSS
     * class "signature". Class names inside the signature are prefixed with
     * "signature-".
     *
     * @param string $text  The text to be changed.
     *
     * @return string  The changed text.
     *
     * @since Horde 2.2
     */
    function dimSignature($text)
    {
        $parts = preg_split('|(\n--\s*(<br />)?\n)|', $text, 2, PREG_SPLIT_DELIM_CAPTURE);
        $text = array_shift($parts);
        if (count($parts)) {
            $text .= '<span class="signature">' . $parts[0];
            $text .= preg_replace('|class="([^"]+)"|', 'class="signature-\1"', $parts[2]);
            $text .= '</span>';
        }

        return $text;
    }

}