File: external_text.pl

package info (click to toggle)
cg3 1.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,684 kB
  • sloc: cpp: 26,476; xml: 6,139; perl: 1,398; lisp: 1,091; ansic: 178; sh: 47; python: 26; makefile: 14
file content (138 lines) | stat: -rwxr-xr-x 3,246 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/perl
# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
BEGIN {
   $| = 1;
};
use warnings;
use strict;
use utf8;
#use feature 'unicode_strings';

use IO::Handle;
autoflush STDOUT 1;
autoflush STDERR 1;

use FindBin;
use lib $FindBin::Bin.'/';
use CG3_External qw(check_protocol read_window write_window write_null_response);

#my @af_cmd = ('sed', '-u', 's/w/WXU/g; s/ma/am/g;');
my @af_cmd = ('cat'); # Change to something meaningful

use IPC::Run qw(start pump finish timeout);
my $af_in;
my $af_out;
my $af_err;
my $af_h;
my $af_started = 0;

sub initSubChain {
	if ($ENV{'DEBUG'}) { print STDERR "$0 initSubChain enter\n"; }
	$af_h = start \@af_cmd, \$af_in, \$af_out, \$af_err;
	$af_started = 1;
	if ($ENV{'DEBUG'}) { print STDERR "$0 initSubChain exit\n"; }
}

sub callSubChain {
	if (!$af_started) {
		initSubChain();
	}
	my ($input) = @_;
	if ($ENV{'DEBUG'}) { print STDERR "$0 callSubChain input: $input\n"; }
	utf8::encode($input);
	$af_in .= $input;
	$af_in .= "\n\n<STREAMCMD:FLUSH>\n\n";
	pump $af_h until $af_out =~ /<STREAMCMD:FLUSH>/g;

	my $out = $af_out;
	$af_out = '';

	utf8::decode($out);
	$out =~ s@</s>@@g;
	$out =~ s/<STREAMCMD:FLUSH>//g;
	$out =~ s@^\s+@@g;
	$out =~ s@\s+$@@g;
	if ($ENV{'DEBUG'}) { print STDERR "$0 callSubChain output: $out\n"; }

	return $out;
}

binmode(STDIN);
binmode(STDOUT);

if (!check_protocol(*STDIN)) {
   die("Out of date protocol!\n");
}

while (my $w = read_window(*STDIN)) {
   my $out = '';
   foreach my $c (@{$w->{'cohorts'}}) {
      $out .= $c->{'wordform'}."\n";
      foreach my $r (@{$c->{'readings'}}) {
         $out .= "\t".$r->{'baseform'};
         foreach my $t (@{$r->{'tags'}}) {
            $out .= ' '.$t;
         }
         $out .= "\n";
      }
   }

   my $in = callSubChain($out);

	$out =~ s@^\s+@@g;
	$out =~ s@\s+$@@g;

   if ($in eq $out) { # No change, so just skip the rest.
      write_null_response(*STDOUT);
      next;
   }

   my @out = split /\n/, $out;

   my @in = split /\n/, $in;

   my $lout = @out;
   my $lin = @in;
   if ($lout != $lin) {
      print STDERR "Mismatch in number of lines!\n";
      write_null_response(*STDOUT);
      next;
   }

   my $cc = 0;
   for (my $i = 0 ; $i<$lin ; $i++) {
      if ($in[$i] !~ /\t/) { # Found a cohort line, start looking for readings
         $cc++;
         my $c = @{$w->{'cohorts'}}[$cc-1];

         $in[$i] =~ s/^\s+//g;
         $in[$i] =~ s/\s+$//g;
         if ($in[$i] ne $out[$i]) { # Wordform changed
            $c->{'wordform'} = $in[$i];
         }

         my $rc = 0;
         my $j;
         for ($j = $i+1 ; $j<$lin ; $j++) {
            if ($in[$j] !~ /\t/) { # Found a cohort line, so stop looking for readings
               last;
            }
            $rc++;
            if ($in[$j] eq $out[$j]) { # Skip if the reading has no changes
               next;
            }
            my $r = @{$c->{'readings'}}[$rc-1];
            $r->{'flags'} |= (1 << 0);

            $in[$j] =~ s/^\s+//g;
            $in[$j] =~ s/\s+$//g;
            my @tags = split /\s+/, $in[$j];
            $r->{'baseform'} = shift(@tags);
            @{$r->{'tags'}} = @tags;
         }
         $i = $j-1;
      }
   }

   write_window(*STDOUT, $w);
}