1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
#!/usr/bin/php -q
<?php
if ($argc > 2 || in_array($argv[1], array('--help', '-help', '-h', '-?'))) {
?>
Process XML files for used DocBook tags
and give statistics
Usage:
<?=$argv[0]?> [<language-code>]
<language-code> can be a valid language code
used in the repository, or 'all' for all
languages. Defaults to en.
The script will generate a tag_usage.txt
file, containing the tags used and the numbers.
Written by Gabor Hojtsy <goba@php.net>, 2001-12-28
<?php
exit;
}
// CONFIG SECTION
$docdir = "../"; // Main directory of the PHP documentation (one dir up in cvs)
/*********************************************************************/
/* Nothing to modify below this line */
/*********************************************************************/
// Long runtime
set_time_limit(0);
// Array to collect the entities
$used_tags = array();
// Default values
$langcodes = array("en");
// Parameter value copying
if ($argc == 2) {
$langcodes = array($argv[1]);
if ($argv[1] === 'all') {
$langcodes = array("ar", "cs", "de", "en", "es", "fr",
"hk", "hu", "it", "ja", "kr", "nl",
"pl", "pt_BR", "ru", "tr", "tw");
}
}
/*********************************************************************/
/* Here starts the functions part */
/*********************************************************************/
// Checks a diretory of phpdoc XML files
function check_dir($dir, &$used_tags)
{
// Collect files and diretcories in these arrays
$directories = array();
$files = array();
// Open and traverse the directory
$handle = @opendir($dir);
while ($file = @readdir($handle)) {
// Collect directories and XML files
if ($file != 'CVS' && $file != '.' &&
$file != '..' && is_dir($dir.$file)) {
$directories[] = $file;
}
elseif (strstr($file, ".xml")) {
$files[] = $file;
}
}
@closedir($handle);
// Sort files and directories
sort($directories);
sort($files);
// Files first...
foreach ($files as $file) {
check_file($dir.$file, $used_tags);
}
// than the subdirs
foreach ($directories as $file) {
check_dir($dir.$file."/", $used_tags);
}
} // check_dir() function end
function check_file ($filename, &$used_tags)
{
// Read in file contents
$contents = preg_replace("/[\r\n]/", "", join("", file($filename)));
// Drop out CDATA sections, they do not contain any DocBook tags
$contents = preg_replace("/<!\\[CDATA\\[.+\\]\\]>/U", "", $contents);
// Drop out comments, they do not contain any DocBook tags
$contents = preg_replace("/<!--.+-->/U", "", $contents);
// Find all tags in this file
preg_match_all("!<([^\\s>/]+)[\\s>]!U", $contents, $tags_found);
// No entities found
if (count($tags_found[1]) == 0) { return; }
// New occurances found, so increase the number
foreach ($tags_found[1] as $tag_name) {
$used_tags[$tag_name]++;
}
} // check_file() function end
/*********************************************************************/
/* Here starts the program */
/*********************************************************************/
// Chechking all languages
foreach ($langcodes as $langcode) {
// Check for directory validity
if (!@is_dir($docdir . $langcode)) {
print("The $langcode language code is not valid\n");
continue;
} else {
$tested_trees[] = $langcode;
}
// If directory is OK, start with the header
echo "Searching in $docdir$langcode ...\n";
// Check the requested directory
check_dir("$docdir$langcode/", $used_tags);
}
echo "Generating tag_usage.txt ...\n";
$fp = fopen("tag_usage.txt", "w");
fwrite($fp, "TAG USAGE STATISCTICS
=========================================================
In this file you can find tag usage stats compiled
from the following tree[s] at phpdoc:\n" .
join(", ", $tested_trees) . ".
You may find some rarely used tags here, and find out
what tags others use to write documentation.
=========================================================
");
arsort($used_tags);
foreach ($used_tags as $tag_name => $number) {
fwrite($fp, sprintf("%-30s %d", $tag_name, $number). "\n");
}
fclose($fp);
echo "Done!\n";
?>
|