1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
|
#!/usr/bin/php -q
<?php
/*
+----------------------------------------------------------------------+
| PHP Version 4 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2010 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.0 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_0.txt. |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Gabor Hojtsy <goba@php.net> |
+----------------------------------------------------------------------+
$Id: dbtags.php 293138 2010-01-05 10:21:11Z rquadling $
*/
if ($argc > 2 || (isset($argv[1]) && in_array($argv[1], array('--help', '-help', '-h', '-?')))) {
?>
Process XML files for used DocBook tags
and give statistics
Usage:
<?php echo $argv[0];?> [<language-code>]
<language-code> can be a valid language code
used in the repository, or 'all' for all
languages. Defaults to en.
The script will generate a tag_usage.txt
file, containing the tags used and the numbers.
<?php
exit;
}
// CONFIG SECTION
$docdir = "../"; // Main directory of the PHP documentation (one dir up in cvs)
/*********************************************************************/
/* Nothing to modify below this line */
/*********************************************************************/
// Long runtime
set_time_limit(0);
// Array to collect the entities
$used_tags = array();
// Default values
$langcodes = array("en");
// Parameter value copying
if ($argc == 2) {
$langcodes = array($argv[1]);
if ($argv[1] === 'all') {
$langcodes = array("ar", "cs", "de", "en", "es", "fr",
"hk", "hu", "it", "ja", "kr", "nl",
"pl", "pt_BR", "ru", "tr", "tw");
}
}
/*********************************************************************/
/* Here starts the functions part */
/*********************************************************************/
// Checks a directory of phpdoc XML files
function check_dir($dir, &$used_tags)
{
// Collect files and directories in these arrays
$directories = array();
$files = array();
// Open and traverse the directory
$handle = @opendir($dir);
while ($file = @readdir($handle)) {
// Collect directories and XML files
if ($file != 'CVS' && $file != '.' &&
$file != '..' && is_dir($dir.$file)) {
$directories[] = $file;
}
elseif (strstr($file, ".xml")) {
$files[] = $file;
}
}
@closedir($handle);
// Sort files and directories
sort($directories);
sort($files);
// Files first...
foreach ($files as $file) {
check_file($dir.$file, $used_tags);
}
// than the subdirs
foreach ($directories as $file) {
check_dir($dir.$file."/", $used_tags);
}
} // check_dir() function end
function check_file ($filename, &$used_tags)
{
// Read in file contents
$contents = preg_replace("/[\r\n]/", "", join("", file($filename)));
// Drop out CDATA sections, they do not contain any DocBook tags
$contents = preg_replace("/<!\\[CDATA\\[.+\\]\\]>/U", "", $contents);
// Drop out comments, they do not contain any DocBook tags
$contents = preg_replace("/<!--.+-->/U", "", $contents);
// Find all tags in this file
preg_match_all("!<([^\\s>/]+)[\\s>]!U", $contents, $tags_found);
// No entities found
if (count($tags_found[1]) == 0) { return; }
// New occurrences found, so increase the number
foreach ($tags_found[1] as $tag_name) {
if (isset($used_tags[$tag_name])) {
$used_tags[$tag_name]++;
} else {
$used_tags[$tag_name] = 1;
}
}
} // check_file() function end
/*********************************************************************/
/* Here starts the program */
/*********************************************************************/
// Checking all languages
foreach ($langcodes as $langcode) {
// Check for directory validity
if (!@is_dir($docdir . $langcode)) {
print("The $langcode language code is not valid\n");
continue;
} else {
$tested_trees[] = $langcode;
}
// If directory is OK, start with the header
echo "Searching in $docdir$langcode ...\n";
// Check the requested directory
check_dir("$docdir$langcode/", $used_tags);
}
echo "Generating tag_usage.txt ...\n";
$fp = fopen("tag_usage.txt", "w");
fwrite($fp, "TAG USAGE STATISTICS
=========================================================
In this file you can find tag usage stats compiled
from the following tree[s] at phpdoc:\n" .
join(", ", $tested_trees) . ".
You may find some rarely used tags here, and find out
what tags others use to write documentation.
=========================================================
");
arsort($used_tags);
foreach ($used_tags as $tag_name => $number) {
fwrite($fp, sprintf("%-30s %d", $tag_name, $number). "\n");
}
fclose($fp);
echo "Done!\n";
?>
|