File: sl_salad.t

package info (click to toggle)
libmarpa-r2-perl 12.000000-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 6,660 kB
sloc: perl: 42,628; ansic: 23,387; sh: 4,363; makefile: 157
file content (206 lines) | stat: -rw-r--r-- 6,555 bytes
#!/usr/bin/env perl
# Copyright 2022 Jeffrey Kegler
# This file is part of Marpa::R2.  Marpa::R2 is free software: you can
# redistribute it and/or modify it under the terms of the GNU Lesser
# General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
#
# Marpa::R2 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser
# General Public License along with Marpa::R2.  If not, see
# http://www.gnu.org/licenses/.

# This example searches for recursively nested braces --
# curly, square and round -- in a "salad" of other things.
# It's to show general BNF search -- sort of a grep or an ack,
# but for general BNF, instead of regexes.  The term
# "salad" I picked up from Michael Roberts, to suggest
# that the targets occur in a sort of "lexeme salad".
# In the literature, this is called a supersequence
# search.

use 5.010001;
use strict;
use warnings;
use Marpa::R2 2.097_002;
use Data::Dumper;
use Test::More tests => 3;
use Getopt::Long ();

my $verbose;
die if not Getopt::Long::GetOptions( verbose => \$verbose );

my $grammar = << '=== GRAMMAR ===';
:default ::= action => [ name, value ]
lexeme default = action => [ name, value ] latm => 1 # to add token names to ast

<prefixed target> ::= prefix target
prefix ::= <prefix lexeme>*

target ::= balanced
event target = completed target

balanced ::= 
    lparen contents rparen
  | lcurly contents rcurly
  | lsquare contents rsquare

contents ::= <content item>*
<content item> ::= balanced | filler

<prefix lexeme> ~ <deep filler>
filler ~ <deep filler>
# x5b is left square bracket
# x5d is right square bracket
<deep filler> ~ [^(){}\x5B\x5D]+

<prefix lexeme> ~ <deep lparen>
lparen ~ <deep lparen>
<deep lparen> ~ '('

<prefix lexeme> ~ <deep rparen>
rparen ~ <deep rparen>
<deep rparen> ~ ')'

<prefix lexeme> ~ <deep lcurly>
lcurly ~ <deep lcurly>
<deep lcurly> ~ '{'

<prefix lexeme> ~ <deep rcurly>
rcurly ~ <deep rcurly>
<deep rcurly> ~ '}'

<prefix lexeme> ~ <deep lsquare>
lsquare ~ <deep lsquare>
<deep lsquare> ~ '['

<prefix lexeme> ~ <deep rsquare>
rsquare ~ <deep rsquare>
<deep rsquare> ~ ']'

=== GRAMMAR ===

my $g = Marpa::R2::Scanless::G->new( { source => \($grammar) } );

my @tests = (
    [ 'z}ab)({[]})))(([]))zz', ( join "\n", '({[]})', '(([]))', '' ) ],
    [   '9\090]{[][][9]89]8[][]90]{[]\{}{}09[]}[',
        join "\n", '[]', '[]', '[9]', '[]', '[]', '{[]\{}{}09[]}', ''
    ],
    [ '([]([])([]([]', join "\n", '[]', '([])', '[]', '[]', '' ],
);

for my $test (@tests) {
    my ( $string, $expected_result ) = @{$test};
    my $actual_result = test( $g, $string );
    diag("Input: $string") if $verbose;
    Test::More::is( $actual_result, $expected_result,
        qq{Result of "$string"} );
} ## end for my $test (@tests)

sub test {
    my ($g, $string) = @_;
    my @found = ();
    diag( "Input: $string" ) if $verbose;
    my $input_length = length $string;
    my $target_start = 0;

    # my $recce_debug_args = { trace_terminals => 1, trace_values => 1 };
    state $recce_debug_args = {};

    # One pass through this loop for every target found,
    # until we reach end of string without finding a target

    TARGET: while ( $target_start < $input_length ) {

        # First we find the "shortest span" -- the one which ends earliest.
        # This tells us where the prefix should end.
        # No prefix should go beyond the first location of the shortest span.

# Marpa::R2::Display
# name: SLIF exhaustion recognizer setting synopsis

        my @shortest_span = ();
        my $recce         = Marpa::R2::Scanless::R->new(
            {   grammar    => $g,
                exhaustion => 'event',
            },
            $recce_debug_args
        );
        my $pos = $recce->read( \$string, $target_start );

        EVENT:
        for my $event ( @{ $recce->events() } ) {
            my ($name) = @{$event};
            if ( $name eq 'target' ) {
                @shortest_span = $recce->last_completed_span('target');
                diag(
                    "Preliminary target at $pos: ",
                    $recce->literal(@shortest_span)
                ) if $verbose;
                next EVENT;
            } ## end if ( $name eq 'target' )
                # Not all exhaustion has an exhaustion event,
                # so we look for exhaustion explicitly below.
            next EVENT if $name eq q('exhausted);
            die join q{ }, "Spurious event at position $pos: '$name'";
        } ## end EVENT: for my $event ( @{ $recce->events() } )

# Marpa::R2::Display::End

        last TARGET if not scalar @shortest_span;

        # We now have found the longest allowed prefix.
        # Our "longest match" will begin at the end of this prefix,
        # or before it.

        # We just run until exhausted, the  look for the last
        # completed <target>.  This will be our longest match.

        diag( join q{ }, @shortest_span ) if $verbose;
        my $prefix_end = $shortest_span[0];
        $recce = Marpa::R2::Scanless::R->new(
            {   grammar    => $g,
                exhaustion => 'event',
                rejection => 'event',
            },
            $recce_debug_args
        );
        $recce->activate( 'target', 0 );
        $recce->read( \$string, $target_start, $prefix_end - $target_start );

# Marpa::R2::Display
# name: SLIF recognizer lexeme_priority_set() synopsis

        $recce->lexeme_priority_set( 'prefix lexeme', -1 );

# Marpa::R2::Display::End

        $pos = $recce->resume($prefix_end);

# Marpa::R2::Display
# name: SLIF recognizer last_completed_span() synopsis

        my @longest_span = $recce->last_completed_span('target');
        diag( "Actual target at $pos: ", $recce->literal(@longest_span) ) if $verbose;

# Marpa::R2::Display::End

        last TARGET if not scalar @longest_span;
        push @found, $recce->literal(@longest_span);
        diag( "Found target at $pos: ", $recce->literal(@longest_span) ) if $verbose;

        # Move the search location forward,
        # in preparation for looking for the next target

        $target_start = $longest_span[0] + $longest_span[1];

    } ## end TARGET: while ( $target_start < $input_length )
    return join "\n", @found, q{};
} ## end sub test

# vim: expandtab shiftwidth=4: