File: split.pl

package info (click to toggle)
libcds 2.3.3-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,632 kB
  • sloc: cpp: 135,002; ansic: 7,234; perl: 243; sh: 237; makefile: 6
file content (42 lines) | stat: -rw-r--r-- 874 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/perl

my %words ;

my $input_file = shift;
my $output_file = shift;

open( my $f, $input_file ) or die "Cannot open input file $input_file";
binmode $f ;

my $text = ''	;
$text .= $_ while (<$f>) ;
close $f ;

my @a = split /[^\w'-]/, $text ;
foreach my $w (@a) {
	$words{$w} += 1 if $w ;
}
for (my $j = 1; $j < 30; ++$j ) {
	for ( my $i = 0; $i + $j - 1 < @a; $i += 1 ) {
		my $s = '';
		for ( my $k = 0; $k < $j; ++$k ) {
			$s .= ' '.$a[$i+$k];
		}
		$s =~ /\s*(\S.+\S)\s*/;
		$s = $1 ;
		$s =~ s/\s\s+/ /g ;
		$words{$s} += 1 ;
	}
}

open (my $dst, ">$output_file") or die "Cannot open output file $output_file";
binmode $dst ;

my $nCount = 0 ;
$nCount++ foreach (keys %words) ;
print $dst $nCount, "\n" ;

print "Generate test dictionary $output_file ...\n" ;
print $dst $_, "\n" foreach (keys %words)	;

close $dst  ;