1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
|
<?php
/**
* Convert data from Perl's TextCat LM format to PHP format
* used by this tool.
*/
require_once __DIR__.'/TextCat.php';
if($argc != 3) {
die("Use $argv[0] INPUTDIR OUTPUTDIR\n");
}
if(!file_exists($argv[2])) {
mkdir($argv[2], 0755, true);
}
$cat = new TextCat($argv[2]);
foreach(new DirectoryIterator($argv[1]) as $file) {
if(!$file->isFile()) {
continue;
}
$ngrams = array();
foreach(file($file->getPathname(), FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES) as $line) {
list($word, $score) = explode("\t ", $line, 2);
$ngrams[$word] = intval($score);
}
$cat->writeLanguageFile($ngrams, $argv[2] . "/" . $file->getBasename());
}
exit(0);
|