1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
|
#!/usr/bin/php
<?php
/*
+----------------------------------------------------------------------+
| Copyright (c) 1997-2023 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| https://www.php.net/license/3_01.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net, so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Jakub Vrana <vrana@php.net> |
+----------------------------------------------------------------------+
*/
/*
See en.pws for list of ignored words.
*/
if ($_SERVER["argc"] != 3 || ($_SERVER["argv"][1] != "escape" && $_SERVER["argv"][1] != "unescape")) {
exit("Purpose: Escape or unescape all *.xml files for use in aspell.\n"
. "Usage: aspell.php escape|unescape <directory>\n"
);
}
// TODO: &xxx.xx; -> &xxx-xx;
$GOOD_TAGS = "type|parameter|function|refname|literal|methodname|abbrev|acronym|constant|varname|replaceable|filename|userinput|command|structname|structfield";
$MODE = $_SERVER["argv"][1];
// htmlentities in comments and CDATA
function callback_htmlentities($matches) {
return $matches[1] . ($GLOBALS["MODE"] == "escape" ? htmlentities($matches[2]) : html_entity_decode($matches[2])) . $matches[3];
}
// make attributes from contents of always-good tags
function callback_make_value($matches) {
return '<' . $matches[1] . $matches[2] . ' aspell="' . htmlentities($matches[3]) . '"/>';
}
// make contents from attributes of always-good tags
function callback_make_contents($matches) {
return '<' . $matches[1] . $matches[2] . '>' . html_entity_decode($matches[3]) . '</' . $matches[1] . '>';
}
function recurse($dir) {
echo "$dir\n";
foreach (glob("$dir/*") as $filename) {
if (is_dir($filename)) {
recurse($filename);
} elseif (preg_match('/\\.xml$/i', $filename)) {
//~ echo "$filename\n";
$file = file_get_contents($filename);
$file = preg_replace_callback('~(<!\\[CDATA\\[)(.*)(\\]\\]>)~sU', "callback_htmlentities", $file);
$file = preg_replace_callback('~(<!--)(.*)(-->)~sU', "callback_htmlentities", $file); // isn't in one function as it can match !CDATA[[...-->
if ($GLOBALS["MODE"] == "escape") {
$file = preg_replace_callback('~<(' . $GLOBALS['GOOD_TAGS'] . ')( [^>]*)?>(.*)</\\1>~sU', "callback_make_value", $file);
} else { // "unescape"
$file = str_replace("\r", "", $file); // for Windows version of Aspell
$file = preg_replace_callback('~<(' . $GLOBALS['GOOD_TAGS'] . ')( [^>]*)? aspell="(.*)"/>~sU', "callback_make_contents", $file);
}
$fp = fopen($filename, "wb");
fwrite($fp, $file);
fclose($fp);
}
}
}
recurse($_SERVER["argv"][2]);
?>
|