File: elixir-column.pl

package info (click to toggle)
libelixirfm-perl 1.1.976-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 348 kB
  • ctags: 125
  • sloc: perl: 2,260; makefile: 14
file content (55 lines) | stat: -rw-r--r-- 2,203 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#! perl -w

use ElixirFM;

$/ = "\n:::: ";                             # provide the data of `elixir resolve --lists`

while ($data = <>) {                        # process the data efficiently word by word

    $data = substr $data, 0, -5 if $data =~ /\n:::: $/;

    $data = ":::: " . $data unless $data =~ /^:::: /;

    $data = $data . "\n";

    @text = ElixirFM::unpretty $data;                   # recover structured information

    foreach my $line (@text) {                          # the text is processed by lines

        foreach my $word (@{$line}) {                   # words delimited by whitespace

            my ($node, @data) = @{$word};               # $node is an array reference
            my $form = join "", @{$node};               # remember the form for reuse

            print $form . "\n" unless @data;            # mention unrecognized words

            foreach my $data (@data) {                  # level of word tokenization

                my (undef, @data) = @{$data};

                foreach my $data (@data) {              # grouped by lexeme sequences

                    my ($node, @data) = @{$data};       # $node contains lexeme refs
                    my @lexs = @{$node};                # remember lexemes for reuse

                    foreach my $data (@data) {              # solution token sequences

                        my ($node, @data) = @{$data};       # @data contains solutions

                        print join "\t",
                                    $form,                                  # input word
                                    ( join "", @{$node} ),                  # token forms
                                    ( join " ", map { $_->[0] } @data ),    # token tags
                                    ( join " ", map { $_->[0] } @lexs ),    # lexeme ids
                                    ( join " ", map { $_->[1] } @lexs ).    # translation
                                   "\n";
                    }
                }
            }

            print "";                                   # separate input words
        }

        print "\n";                                     # separate input lines
    }
}