File: lm2php.php

package info (click to toggle)
mediawiki 1%3A1.27.4-1~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 152,836 kB
  • sloc: php: 426,557; sql: 8,668; python: 3,484; xml: 2,201; pascal: 1,567; ruby: 1,017; perl: 789; sh: 612; makefile: 284
file content (27 lines) | stat: -rw-r--r-- 664 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
<?php
/**
 * Convert data from Perl's TextCat LM format to PHP format
 * used by this tool.
 */
require_once __DIR__.'/TextCat.php';

if($argc != 3) {
	die("Use $argv[0] INPUTDIR OUTPUTDIR\n");
}
if(!file_exists($argv[2])) {
	mkdir($argv[2], 0755, true);
}
$cat = new TextCat($argv[2]);

foreach(new DirectoryIterator($argv[1]) as $file) {
	if(!$file->isFile()) {
		continue;
	}
	$ngrams = array();
	foreach(file($file->getPathname(), FILE_IGNORE_NEW_LINES|FILE_SKIP_EMPTY_LINES) as $line) {
		list($word, $score) = explode("\t ", $line, 2);
		$ngrams[$word] = intval($score);
	}
	$cat->writeLanguageFile($ngrams, $argv[2] . "/" . $file->getBasename());
}
exit(0);