File: Text.pm

package info (click to toggle)
po4a 0.74-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 23,312 kB
sloc: perl: 15,767; sh: 405; xml: 293; makefile: 25
file content (1072 lines) | stat: -rw-r--r-- 37,044 bytes
#!/usr/bin/perl -w

# Po4a::Text.pm
#
# extract and translate translatable strings from a text documents
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
########################################################################

=encoding UTF-8

=head1 NAME

Locale::Po4a::Text - convert text documents from/to PO files

=head1 DESCRIPTION

The po4a (PO for anything) project goal is to ease translations (and more
interestingly, the maintenance of translations) using gettext tools on
areas where they were not expected like documentation.

Locale::Po4a::Text is a module to help the translation of text documents into
other [human] languages.

Paragraphs are split on empty lines (or lines containing only spaces or
tabulations).

If a paragraph contains a line starting by a space (or tabulation), this
paragraph won't be rewrapped.

=cut

package Locale::Po4a::Text;

use 5.16.0;
use strict;
use warnings;

use parent qw(Locale::Po4a::TransTractor);

use Locale::Po4a::Common qw(wrap_mod dgettext);
use YAML::Tiny;
use Syntax::Keyword::Try;

=head1 OPTIONS ACCEPTED BY THIS MODULE

These are this module's particular options:

=over

=item B<keyvalue>

Treat paragraphs that look like a colon-separated key-value pair as verbatim
(with the C<no-wrap> flag in the PO file). A key-value pair string is a string
like C<key: value>, containing one or more non-colon and non-space characters 
followed by a colon followed by at least one non-space character before the
end of the line.

=cut

my $keyvalue = 0;

=item B<nobullets>

Deactivate the detection of bullets.

By default, when a bullet is detected, the bullet paragraph is not considered
as a verbatim paragraph (with the C<no-wrap> flag in the PO file). Instead, the
corresponding paragraph is rewrapped in the translation.

=cut

my $bullets = 1;

=item B<tabs=>I<mode>

Specify how tabulations shall be handled. The I<mode> can be any of:

=over

=item B<split>

Lines with tabulations introduce breaks in the current paragraph.

=item B<verbatim>

Paragraph containing tabulations will not be re-wrapped.

=back

By default, tabulations are considered as spaces.

=cut

my $tabs = "";

=item B<breaks=>I<regex>

A regular expression matching lines which introduce breaks.
The regular expression will be anchored so that the whole line must match.

=cut

my $breaks;

=item B<debianchangelog>

Handle the header and footer of
released versions, which only contain non translatable information.

=cut

my $debianchangelog = 0;

=item B<fortunes>

Handle the fortunes format, which separate fortunes with a line which
consists in '%' or '%%', and use '%%' as the beginning of a comment.

=cut

my $fortunes = 0;

=item B<markdown>

Handle some special markup in Markdown-formatted texts.

=cut

my $markdown = 0;

=item B<yfm_keys> (markdown-only)

Comma-separated list of keys to process for translation in the YAML Front Matter
section. All other keys are skipped. Keys are matched with a case-sensitive
match. If B<yfm_paths> and B<yfm_keys> are used together, values are included if
they are matched by at least one of the options. Array values are always translated,
unless the B<yfm_skip_array> option is provided.

=cut

my %yfm_keys = ();

=item B<yfm_lenient> (markdown only)

Allow the YAML Front Matter parser to fail on malformated headers. This is
particularly helpful when your file starts with a horizontal ruler instead
of a YAML Front Matter, but you insist on using three dashes only for your
ruler.

=cut

my $yfm_lenient = 0;

=item B<yfm_paths> (markdown only)

=item B<yfm_paths>

Comma-separated list of hash paths to process for extraction in the YAML
Front Matter section, all other paths are skipped. Paths are matched with a
case-sensitive match. If B<yfm_paths> and B<yfm_keys> are used together,
values are included if they are matched by at least one of the options.
Arrays values are always returned unless the B<yfm_skip_array> option is
provided.

=cut

my %yfm_paths = ();

=item B<yfm_skip_array> (markdown-only)

Do not translate array values in the YAML Front Matter section.

=cut

my $yfm_skip_array = 0;

=item B<control>[B<=>I<field_list>]

Handle Debian's control files.
A comma-separated list of fields to be translated can be provided.

=cut

my %control = ();

=item B<neverwrap>

Prevent po4a from wrapping any lines. This means that every content is handled verbatim, even simple paragraphs.

=cut

my $defaultwrap = 1;

my $parse_func = \&parse_fallback;

my @comments = ();

=back

=cut

sub initialize {
    my $self    = shift;
    my %options = @_;

    $self->{options}{'control'}         = "";
    $self->{options}{'breaks'}          = 1;
    $self->{options}{'debianchangelog'} = 1;
    $self->{options}{'debug'}           = 1;
    $self->{options}{'fortunes'}        = 1;
    $self->{options}{'markdown'}        = 1;
    $self->{options}{'yfm_keys'}        = '';
    $self->{options}{'yfm_lenient'}     = 0;
    $self->{options}{'yfm_paths'}       = '';
    $self->{options}{'yfm_skip_array'}  = 0;
    $self->{options}{'nobullets'}       = 0;
    $self->{options}{'keyvalue'}        = 1;
    $self->{options}{'tabs'}            = 1;
    $self->{options}{'verbose'}         = 1;
    $self->{options}{'neverwrap'}       = 1;

    foreach my $opt ( keys %options ) {
        die wrap_mod( "po4a::text", dgettext( "po4a", "Unknown option: %s" ), $opt )
          unless exists $self->{options}{$opt};
        $self->{options}{$opt} = $options{$opt};
    }

    $keyvalue    = 1                  if ( defined $options{'keyvalue'} );
    $bullets     = 0                  if ( defined $options{'nobullets'} );
    $tabs        = $options{'tabs'}   if ( defined $options{'tabs'} );
    $breaks      = $options{'breaks'} if ( defined $options{'breaks'} );
    $defaultwrap = 0                  if ( defined $options{'neverwrap'} );

    $parse_func = \&parse_debianchangelog if ( defined $options{'debianchangelog'} );
    $parse_func = \&parse_fortunes        if ( defined $options{'fortunes'} );

    if ( defined $options{'markdown'} ) {
        $parse_func = \&parse_markdown;
        $markdown   = 1;
        map {
            $_ =~ s/^\s+|\s+$//g;    # Trim the keys before using them
            $yfm_keys{$_} = 1
        } ( split( ',', $self->{options}{'yfm_keys'} ) );
        map {
            $_ =~ s/^\s+|\s+$//g;    # Trim the keys before using them
            $yfm_paths{$_} = 1
        } ( split( ',', $self->{options}{'yfm_paths'} ) );

        #        map { print STDERR "key $_\n"; } (keys %yfm_keys);
        $yfm_skip_array = $self->{options}{'yfm_skip_array'};
        $yfm_lenient    = $self->{options}{'yfm_lenient'};
    } else {
        foreach my $opt (qw(yfm_keys yfm_lenient yfm_skip_array)) {
            die wrap_mod( "po4a::text", dgettext( "po4a", "Option %s is only valid when parsing markdown files." ),
                $opt )
              if exists $options{$opt};
        }
    }

    if ( defined $options{'control'} ) {
        $parse_func = \&parse_control;
        if ( $options{'control'} eq "1" ) {
            $control{''} = 1;
        } else {
            foreach my $tag ( split( ',', $options{'control'} ) ) {
                $control{$tag} = 1;
            }
        }
    }
}

sub parse_fallback {
    my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
    if (
        ( $line =~ /^\s*$/ )
        or ( defined $breaks
            and $line =~ m/^$breaks$/ )
      )
    {
        # Break paragraphs on lines containing only spaces
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph    = "";
        $wrapped_mode = $defaultwrap unless defined( $self->{verbatim} );
        $self->pushline( $line . "\n" );
        undef $self->{controlkey};
    } elsif ( $line =~ /^-- $/ ) {

        # Break paragraphs on email signature hint
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph    = "";
        $wrapped_mode = $defaultwrap;
        $self->pushline( $line . "\n" );
    } elsif ( $line =~ /^=+$/
        or $line =~ /^_+$/
        or $line =~ /^-+$/ )
    {
        $wrapped_mode = 0;
        $paragraph .= $line . "\n";
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph    = "";
        $wrapped_mode = $defaultwrap;
    } elsif ( $tabs eq "split" and $line =~ m/\t/ and $paragraph !~ m/\t/s ) {
        $wrapped_mode = 0;
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph    = "$line\n";
        $wrapped_mode = 0;
    } elsif ( $tabs eq "split" and $line !~ m/\t/ and $paragraph =~ m/\t/s ) {
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph    = "$line\n";
        $wrapped_mode = $defaultwrap;
    } else {
        if ( $line =~ /^\s/ ) {

            # A line starting by a space indicates a non-wrap
            # paragraph
            $wrapped_mode = 0;
        }
        if (
            $markdown
            and (
                $line =~ /\S  $/       # explicit newline
                or $line =~ /"""$/
            )
          )
        {                              # """ textblock inside macro begin
                                       # Markdown markup needing separation _after_ this line
            $end_of_paragraph = 1;
        } else {
            undef $self->{bullet};
            undef $self->{indent};
        }

        # TODO: comments
        $paragraph .= $line . "\n";
    }
    return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
}

sub parse_debianchangelog {
    my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
    if (
            $expect_header
        and $line =~ /^(\w[-+0-9a-z.]*)\ \(([^\(\) \t]+)\) # src, version
                   \s+([-+0-9a-z.]+);                 # distribution
                   \s*urgency\s*\=\s*(.*\S)\s*$/ix
      )
    {    #
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph = "";
        $self->pushline("$line\n");
        $expect_header = 0;
    } elsif ( $line =~
        m/^ \-\- (.*) <(.*)>  ((\w+\,\s*)?\d{1,2}\s+\w+\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}(\s+\([^\\\(\)]+\)))$/ )
    {
        # Found trailer
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph = "";
        $self->pushline("$line\n");
        $expect_header = 1;
    } else {
        return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
    }
    return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
}

sub parse_fortunes {
    my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;

    # Always include paragraphs in no-wrap mode,
    # because the formatting of the fortunes
    # is usually hand-crafted and matters.
    $wrapped_mode = 0;

    # Check if there are more lines in the file.
    my $last_line_of_file = 0;
    my ( $nextline, $nextref ) = $self->shiftline();
    if ( defined $nextline ) {

        # There is a next line, put it back.
        $self->unshiftline( $nextline, $nextref );
    } else {

        # Nope, no more lines available.
        $last_line_of_file = 1;
    }

    # Is the line the end of a fortune or the last line of the file?
    if ( $line =~ m/^%%?\s*$/ or $last_line_of_file ) {

        # Add the last line to the paragraph
        if ($last_line_of_file) {
            $paragraph .= $line;
        }

        # Remove the last newline for the translation.
        chomp($paragraph);
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph = "";

        # Add the last newline again for the output.
        $self->pushline("\n");

        # Also add the separator line, if this is not the end of the file.
        if ( !$last_line_of_file ) {
            $self->pushline("$line\n");
        }
    } else {
        $paragraph .= $line . "\n";
    }
    return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
}

sub parse_control {
    my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
    if ( $line =~ m/^([^ :]*): *(.*)$/ ) {
        warn wrap_mod( "po4a::text", dgettext( "po4a", "Unrecognized section: %s" ), $paragraph )
          unless $paragraph eq "";
        my $tag = $1;
        my $val = $2;
        my $t;
        if ( $control{''} or $control{$tag} ) {
            $t = $self->translate(
                $val, $self->{ref},
                $tag . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ),
                "wrap" => 0
            );
        } else {
            $t = $val;
        }
        if ( not defined $self->{controlkey} ) {
            $self->{controlkey} = "$tag: $val";
        }
        $self->pushline("$tag: $t\n");
        $paragraph      = "";
        $wrapped_mode   = $defaultwrap;
        $self->{bullet} = "";
        $self->{indent} = " ";
    } elsif ( $line eq " ." ) {
        do_paragraph( $self, $paragraph, $wrapped_mode,
            "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ) );
        $paragraph = "";
        $self->pushline( $line . "\n" );
        $self->{bullet} = "";
        $self->{indent} = " ";
    } elsif ( $line =~ m/^ Link: +(.*)$/ ) {
        do_paragraph( $self, $paragraph, $wrapped_mode,
            "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ) );
        my $link = $1;
        my $t1   = $self->translate( "Link: ", $self->{ref}, "Link", "wrap" => 0 );
        my $t2   = $self->translate(
            $link, $self->{ref},
            "Link" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" ),
            "wrap" => 0
        );
        $self->pushline(" $t1$t2\n");
        $paragraph = "";
    } elsif ( defined $self->{indent}
        and $line =~ m/^$self->{indent}\S/ )
    {
        $paragraph .= $line . "\n";
        $self->{type} = "Long Description" . ( defined $self->{controlkey} ? ", " . $self->{controlkey} : "" );
    } else {
        return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
    }
    return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
}

# Support pandoc's format of specifying bibliographic information.
#
# If the first line starts with a percent sign, the following
# is considered to be title, author, and date.
#
# If the information spans multiple lines, the following
# lines must be indented with space.
# If information is omitted, it's just a percent sign
# and a blank line.
#
# Examples with missing title resp. missing authors:
#
# %
# % Author
#
# % My title
# %
# % June 14, 2018
sub parse_markdown_bibliographic_information {
    my ( $self, $line, $ref ) = @_;
    my ( $nextline, $nextref );

    # The first match is always the title or an empty string (no title).
    if ( $line =~ /^%(.*)$/ ) {
        my $title = $1;

        # Remove leading and trailing whitespace
        $title =~ s/^\s+|\s+$//g;

        # If there's some text, look for continuation lines
        if ( length($title) ) {
            ( $nextline, $nextref ) = $self->shiftline();
            while ( $nextline =~ /^\s+(.+)$/ ) {
                $nextline = $1;
                $nextline =~ s/^\s+|\s+$//g;
                $title .= " " . $nextline;
                ( $nextline, $nextref ) = $self->shiftline();
            }

            # Now the title should be complete, give it to translation.
            my $t = $self->translate( $title, $ref, "Pandoc title block", "wrap" => $defaultwrap );
            $t = Locale::Po4a::Po::wrap($t);
            my $first_line = 1;
            foreach my $translated_line ( split /\n/, $t ) {
                if ($first_line) {
                    $first_line = 0;
                    $self->pushline( "% " . $translated_line . "\n" );
                } else {
                    $self->pushline( "  " . $translated_line . "\n" );
                }
            }
        } else {

            # Title has been empty, fetch the next line
            # if that are the authors.
            $self->pushline("%\n");
            ( $nextline, $nextref ) = $self->shiftline();
        }

        # The next line can contain the author or an empty string.
        if ( $nextline =~ /^%(.*)$/ ) {
            my $author_ref = $nextref;
            my $authors    = $1;

            # If there's some text, look for continuation lines
            if ( length($authors) ) {
                ( $nextline, $nextref ) = $self->shiftline();
                while ( $nextline =~ /^\s+(.+)$/ ) {
                    $nextline = $1;
                    $authors .= ";" . $nextline;
                    ( $nextline, $nextref ) = $self->shiftline();
                }

                # Now the authors should be complete, split them by semicolon
                my $first_line = 1;
                foreach my $author ( split /;/, $authors ) {
                    $author =~ s/^\s+|\s+$//g;

                    # Skip empty authors
                    next unless length($author);
                    my $t = $self->translate( $author, $author_ref, "Pandoc title block" );
                    if ($first_line) {
                        $first_line = 0;
                        $self->pushline( "% " . $t . "\n" );
                    } else {
                        $self->pushline( "  " . $t . "\n" );
                    }
                }
            } else {

                # Authors has been empty, fetch the next line
                # if that is the date.
                $self->pushline("%\n");
                ( $nextline, $nextref ) = $self->shiftline();
            }

            # The next line can contain the date.
            if ( $nextline =~ /^%(.*)$/ ) {
                my $date = $1;

                # Remove leading and trailing whitespace
                $date =~ s/^\s+|\s+$//g;
                my $t = $self->translate( $date, $nextref, "Pandoc title block" );
                $self->pushline( "% " . $t . "\n" );

                # Now we're done with the bibliographic information
                return;
            }
        }

        # The line did not start with a percent sign, to stop
        # parsing bibliographic information and return the
        # line to the normal parsing.
        $self->unshiftline( $nextline, $nextref );
        return;
    }
}

# Support YAML Front Matter in Markdown documents
#
# If the text starts with a YAML ---\n separator, the full text until
# the next YAML ---\n separator is considered YAML metadata. The ...\n
# "end of document" separator can be used at the end of the YAML
# block.
#
sub parse_markdown_yaml_front_matter {
    my ( $self, $line, $blockref ) = @_;
    my $yfm;
    my @saved_ctn;
    my ( $nextline, $nextref ) = $self->shiftline();
    push @saved_ctn, ( $nextline, $nextref );
    while ( defined($nextline) ) {
        last if ( $nextline =~ /^(---|\.\.\.)$/ );
        $yfm .= $nextline;
        ( $nextline, $nextref ) = $self->shiftline();
        if ( $nextline =~ /: [\[\{]/ ) {
            die wrap_mod(
                "po4a::text",
                dgettext(
                    "po4a",
                    "Inline lists and dictionaries on a single line are not correctly handled the parser we use (YAML::Tiny): they are interpreted as regular strings. "
                      . "Please use multi-lines definitions instead. Offending line:\n %s"
                ),
                $nextline
            );

        }
        push @saved_ctn, ( $nextline, $nextref );
    }

    my $yamlarray;    # the parsed YFM content
    my $yamlres;      # containing the parse error, if any
    try {
        $yamlarray = YAML::Tiny->read_string($yfm);
    } catch {
        $yamlres = $@;
    }

    if ( defined($yamlres) ) {
        if ($yfm_lenient) {
            $yamlres =~ s/ at .*$//;    # Remove the error localisation in YAML::Tiny die message, if any (for our test)
            warn wrap_mod(
                "po4a::text",
                dgettext(
                    "po4a",
                    "Proceeding even if the YAML Front Matter could not be parsed. Remove the 'yfm_lenient' option for a stricter behavior.\nIgnored error: %s"
                ),
                $yamlres
            );
            my $len = ( scalar @saved_ctn ) - 1;
            while ( $len >= 0 ) {
                $self->unshiftline( $saved_ctn[ $len - 1 ], $saved_ctn[$len] );

                # print STDERR "Unshift ".$saved_ctn[ $len - 1] ." | ". $saved_ctn[$len] ."\n";
                $len -= 2;
            }
            return 0;    # Not a valid YAML
        } else {
            die wrap_mod(
                "po4a::text",
                dgettext(
                    "po4a",
                    "Could not get the YAML Front Matter from the file. If you did not intend to add a YAML front matter "
                      . "but an horizontal ruler, please use '----' instead, or pass the 'yfm_lenient' option.\nError: %s\nContent of the YFM: %s"
                ),
                $yamlres, $yfm
            );
        }
    }

    $self->handle_yaml( 1, $blockref, $yamlarray, \%yfm_keys, $yfm_skip_array, \%yfm_paths );
    $self->pushline("---\n");
    return 1;    # Valid YAML
}

sub parse_markdown {
    my ( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) = @_;
    if ($expect_header) {

        # It is only possible to find and parse the bibliographic
        # information or the YAML Front Matter from the first line.
        # Anyway, stop expecting header information for the next run.
        $expect_header = 0;
        if ( $line =~ /^%(.*)$/ ) {
            parse_markdown_bibliographic_information( $self, $line, $ref );
            return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
        } elsif ( $line =~ /^---$/ ) {
            if ( parse_markdown_yaml_front_matter( $self, $line, $ref ) ) {    # successfully parsed
                return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
            }

            # If it wasn't a YFM paragraph after all, stop expecting a header and keep going
        }
    }
    if (    ( $line =~ m/^(={4,}|-{4,})$/ )
        and ( defined($paragraph) )
        and ( $paragraph =~ m/^[^\n]*\n$/s )
        and ( length($paragraph) == ( length($line) + 1 ) ) )
    {
        # XXX: There can be any number of underlining according
        #      to the documentation. This detection, which avoid
        #      translating the formatting, is only supported if
        #      the underlining has the same size as the header text.
        # Found title
        $wrapped_mode = 0;
        my $level = $line;
        $level =~ s/^(.).*$/$1/;

        # Remove the trailing newline from the title
        chomp($paragraph);
        my $t = $self->translate(
            $paragraph, $self->{ref}, "Title $level",
            "wrap"  => 0,
            "flags" => "markdown-text"
        );

        # Add the newline again for the output
        $self->pushline( $t . "\n" );
        $paragraph    = "";
        $wrapped_mode = $defaultwrap;
        $self->pushline( ( $level x length($t) ) . "\n" );
    } elsif ( $line =~ m/^(#{1,6})( +)(.*?)( +\1)?$/ ) {
        my $titlelevel1 = $1;
        my $titlespaces = $2;
        my $title       = $3;
        my $titlelevel2 = $4 || "";

        # Found one line title
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $wrapped_mode = 0;
        $paragraph    = "";
        my $t = $self->translate(
            $title, $self->{ref}, "Title $titlelevel1",
            "wrap"  => 0,
            "flags" => "markdown-text"
        );
        $self->pushline( $titlelevel1 . $titlespaces . $t . $titlelevel2 . "\n" );
        $wrapped_mode = $defaultwrap;
    } elsif ( $line =~ /^[ ]{0,3}([*_-])\s*(?:\1\s*){2,}$/ ) {

        # Horizontal rule
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $self->pushline( $line . "\n" );
        $paragraph        = "";
        $end_of_paragraph = 1;
    } elsif ( $line =~ /^([ ]{0,3})(\[[^\]]+\]:[ \t]?.+)$/ ) {
        my $indentation   = $1;
        my $linkreference = $2;

        # Link reference
        # TODO: support multiline link reference definition
        # TODO: treat link title properly
        # https://spec.commonmark.org/0.30/#link-reference-definitions
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $wrapped_mode = 0;
        $paragraph    = "";
        my $t = $self->translate(
            $linkreference, $self->{ref}, "Link reference",
            "wrap"  => 0,
            "flags" => "link-reference"
        );
        $self->pushline( $indentation . $t . "\n" );
        $wrapped_mode = $defaultwrap;
    } elsif ( $line =~ /^([ ]{0,3})(([~`])\3{2,})(\s*)([^`]*)\s*$/ ) {
        my $fence_space_before  = $1;
        my $fence               = $2;
        my $fencechar           = $3;
        my $fence_space_between = $4;
        my $info_string         = $5;

        # fenced code block
        my $type = "Fenced code block" . ( $info_string ? " ($info_string)" : "" );
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $wrapped_mode = 0;
        $paragraph    = "";
        $self->pushline("$line\n");
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph = "";
        my ( $nextline, $nextref ) = $self->shiftline();

        while ( $nextline && $nextline !~ /^\s{0,3}$fence$fencechar*\s*$/ ) {
            $paragraph .= "$nextline";
            ( $nextline, $nextref ) = $self->shiftline();
        }
        my $t = $self->translate(
            $paragraph, $self->{ref}, $type,
            "wrap"  => 0
        );
        $self->pushline($t);
        $self->pushline($nextline);
        $paragraph        = "";
        $end_of_paragraph = 1;
    } elsif ( $line =~ /^([ ]{0,3})(([:])\3{2,})(\s*)([^`]*)\s*$/ ) {
        my $fence_space_before  = $1;
        my $fence               = $2;
        my $fencechar           = $3;
        my $fence_space_between = $4;
        my @info_string         = ($5);

        #        print STDERR "----------------\n";
        #        print STDERR "line: $line\n";
        #        print STDERR "fence: '$fence'; fencechar: '$fencechar'; info: '$info_string'\n";

        # fenced div block (fenced with ::: where code blocks are fenced with ` or ~)
        # https://pandoc.org/MANUAL.html#divs-and-spans
        my $info = join( "|", map { chomp $_; $_ } @info_string );
        my $type = "Fenced div block" . ( $info ? " ($info)" : "" );
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $wrapped_mode = 0;
        $paragraph    = "";
        $self->pushline("$line\n");
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph = "";

        my $lvl = 1;
        while ( $lvl > 0 ) {
            my ( $nextline, $nextref ) = $self->shiftline();
            die wrap_mod(
                "po4a::text",
                dgettext(
                    "po4a", "Malformed fenced div block: Block starting at %s not closed before the end of the file."
                ),
                $ref
            ) unless ( defined($nextline) );

            #            print STDERR "within $lvl: $nextline";
            if ( $nextline =~ /^\s*:::+\s*$/ ) {
                my $info = join( "|", map { chomp $_; $_ } @info_string );
                $type = "Fenced div block" . ( $info ? " ($info)" : "" );
                if ( $paragraph ne "" ) {
                    do_paragraph( $self, $paragraph, $wrapped_mode, $type );
                    $paragraph = "";
                }
                $self->pushline($nextline);
                $lvl--;
                while ( scalar @info_string > $lvl ) {
                    pop @info_string;
                }
            } elsif ( $nextline =~ /^([ ]{0,3})(([:])\3{2,})(\s*)([^`]*)\s*$/ ) {
                if ( $paragraph ne "" ) {
                    do_paragraph( $self, $paragraph, $wrapped_mode, $type );
                    $paragraph = "";
                }
                $self->pushline($nextline);
                push @info_string, $5;
                $lvl++;
            } else {
                $paragraph .= $nextline;
            }
        }
        $paragraph        = "";
        $end_of_paragraph = 1;

        #        print STDERR "Out now ------------\n";
    } elsif (
        $line =~ /^\s*\[\[\!\S+\s*$/       # macro begin
        or $line =~ /^\s*"""\s*\]\]\s*$/
      )
    {                                      # """ textblock inside macro end
                                           # Avoid translating Markdown lines containing only markup
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph    = "";
        $wrapped_mode = $defaultwrap;
        $self->pushline("$line\n");
    } elsif ( $line =~ /^\s*\[\[\!\S[^\]]*\]\]\s*$/ ) {    # sole macro
                                                           # Preserve some Markdown markup as a single line
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph        = "$line\n";
        $wrapped_mode     = 0;
        $end_of_paragraph = 1;
    } elsif ( $line =~ /^"""/ ) {    # """ textblock inside macro end
                                     # Markdown markup needing separation _before_ this line
        do_paragraph( $self, $paragraph, $wrapped_mode );
        $paragraph    = "$line\n";
        $wrapped_mode = $defaultwrap;
    } else {
        return parse_fallback( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
    }
    return ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );
}

sub parse {
    my $self = shift;
    my ( $line, $ref );
    my $paragraph        = "";
    my $wrapped_mode     = $defaultwrap;
    my $expect_header    = 1;
    my $end_of_paragraph = 0;
    ( $line, $ref ) = $self->shiftline();
    my $file = $ref;
    $file =~ s/:[0-9]+$// if defined($line);

    while ( defined($line) ) {
        $ref =~ m/^(.*):[0-9]+$/;
        if ( $1 ne $file ) {
            $file = $1;
            do_paragraph( $self, $paragraph, $wrapped_mode );
            $paragraph     = "";
            $wrapped_mode  = $defaultwrap;
            $expect_header = 1;
        }

        chomp($line);
        $self->{ref} = "$ref";
        ( $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph ) =
          &$parse_func( $self, $line, $ref, $paragraph, $wrapped_mode, $expect_header, $end_of_paragraph );

        # paragraphs starting by a bullet, or numbered
        # or paragraphs with a line containing more than 3 consecutive spaces
        # are considered as verbatim paragraphs
        $wrapped_mode = 0 if ( $paragraph =~ m/^(\*|[0-9]+[.)] )/s
            or $paragraph =~ m/[ \t][ \t][ \t]/s );

        # Paragraphs starting with a table formating (GH extension) are also considered verbatim
        $wrapped_mode = 0 if ( $paragraph =~ m/^\|/ );

        $wrapped_mode = 0 if ( $tabs eq "verbatim"
            and $paragraph =~ m/\t/s );

        # Also consider keyvalue paragraphs verbatim, if requested
        $wrapped_mode = 0 if ( $keyvalue == 1
            and $paragraph =~ m/^[^ :]+:.*[^\s].*$/s );
        if ($markdown) {

            # Some Markdown markup can (or might) not survive wrapping
            $wrapped_mode = 0
              if (
                $paragraph    =~ /^>/ms                     # blockquote
                or $paragraph =~ /^( {8}|\t)/ms             # monospaced
                or $paragraph =~ /^\$(\S+[{}]\S*\s*)+/ms    # Xapian macro
                or $paragraph =~ /<(?![a-z]+[:@])/ms        # maybe html (tags but not wiki <URI>)
                or $paragraph =~ /^[^<]+>/ms                # maybe html (tag with vertical space)
                or $paragraph =~ /\S  $/ms                  # explicit newline
                or $paragraph =~ /\[\[\!\S[^\]]+$/ms        # macro begin
              );
        }
        if ($end_of_paragraph) {
            do_paragraph( $self, $paragraph, $wrapped_mode );
            $paragraph        = "";
            $wrapped_mode     = $defaultwrap;
            $end_of_paragraph = 0;
        }
        ( $line, $ref ) = $self->shiftline();
    }
    if ( length $paragraph ) {
        do_paragraph( $self, $paragraph, $wrapped_mode );
    }
}

sub do_paragraph {
    my ( $self, $paragraph, $wrap ) = ( shift, shift, shift );
    my $type  = shift || $self->{type} || "Plain text";
    my $flags = "";
    if ( $type eq "Plain text" and $markdown ) {
        $flags = "markdown-text";
    }

    return if ( $paragraph eq "" );

    $wrap = 0 unless $defaultwrap;

    # DEBUG
    # $type .= " verbatim: '".($self->{verbatim}//"NONE")."' bullet: '$bullets' wrap: '$wrap' indent: '".($self->{indent}//"NONE")."' type: '".($self->{type}//"NONE")."'";
    # print STDERR "$type\n";

    if ( $bullets and not defined $self->{verbatim} ) {

        # Detect bullets
        # |        * blah blah
        # |<spaces>  blah
        # |          ^-- aligned
        # <empty line>
        #
        # The leading spaces are optional, and other bullets are supported:
        # - blah         o blah         + blah
        # 1. blah       1) blah       (1) blah
      TEST_BULLET:
        if ( $paragraph =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s ) {
            my $para    = $5;
            my $bullet  = $2;
            my $indent1 = $1;
            my $indent2 = "$1" . ( ' ' x length $bullet );
            my $text    = $4;
            while ( $para !~ m/^$indent2(?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+/
                and $para =~ s/^$indent2(\S[^\n]*\n)//s )
            {
                $text .= $1;
            }

            # TODO: detect if a line starts with the same bullet
            if ( $text !~ m/\S[ \t][ \t][ \t]+\S/s ) {
                my $bullet_regex = quotemeta( $indent1 . $bullet );
                $bullet_regex =~ s/[0-9]+/\\d\+/;
                if (   $para eq ''
                    or $para =~ m/^(\s*)((?:[-*o+]|([0-9]+[.\)])|\([0-9]+\))\s+)([^\n]*\n)(.*)$/s
                    or $para =~ m/^$bullet_regex\S/s )
                {
                    my $trans = $self->translate(
                        $text,
                        $self->{ref},
                        "Bullet: '$indent1$bullet'",
                        "flags"   => "markdown-text",
                        "wrap"    => $defaultwrap,
                        "wrapcol" => -( length $indent2 )
                    );
                    $trans =~ s/^/$indent1$bullet/s;
                    $trans =~ s/\n(.)/\n$indent2$1/sg;
                    $self->pushline( $trans . "\n" );
                    if ( $para eq '' ) {
                        return;
                    } else {

                        # Another bullet
                        $paragraph = $para;
                        goto TEST_BULLET;
                    }
                }
            }
        }
    }

    my $end = "";
    if ($wrap) {
        $paragraph =~ s/^(.*?)(\n*)$/$1/s;
        $end = $2 || "";
    }
    my $t = $self->translate(
        $paragraph,
        $self->{ref},
        $type,
        "comment" => join( "\n", @comments ),
        "flags"   => $flags,
        "wrap"    => $wrap
    );
    @comments = ();
    if ( defined $self->{bullet} ) {
        my $bullet  = $self->{bullet};
        my $indent1 = $self->{indent};
        my $indent2 = $indent1 . ( ' ' x length($bullet) );
        $t =~ s/^/$indent1$bullet/s;
        $t =~ s/\n(.)/\n$indent2$1/sg;
    }
    $self->pushline( $t . $end );
}

1;

=head1 STATUS OF THIS MODULE

Tested successfully on simple text files and NEWS.Debian files.

=head1 AUTHORS

 Nicolas François <nicolas.francois@centraliens.net>

=head1 COPYRIGHT AND LICENSE

 Copyright © 2005-2008 Nicolas FRANÇOIS <nicolas.francois@centraliens.net>.

 Copyright © 2008-2009, 2018 Jonas Smedegaard <dr@jones.dk>.
 Copyright © 2020 Martin Quinson <mquinson#debian.org>.

This program is free software; you may redistribute it and/or modify it
under the terms of GPL v2.0 or later (see the COPYING file).

=cut

__END__

#  LocalWords: Charset charset po UTF gettext msgid nostrip GPL