File: bidi

package info (click to toggle)
libtext-bidi-perl 2.18-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 4,304 kB
  • sloc: ansic: 3,358; perl: 1,064; makefile: 32
file content (220 lines) | stat: -rw-r--r-- 7,178 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#! perl

=head1 NAME

bidi - Make urxvt present Bidi text correctly

=head1 DESCRIPTION

This extension filters the text displayed by Urxvt, so that Bi-directional 
text (e.g., Hebrew or Arabic mixed with English) is displayed correctly. It 
does so using the L<Text::Bidi> module (which should be installed).

The extension emulates a cursor via rendition. This means that when typing, 
there will be two cursors, the original one whose location corresponds to the 
current location within the logical string (so, mostly useless), and a fake 
one which corresponds to the current insertion point.

To enable the extension, add C<bidi> to the I<Urxvt.perl-ext-common> 
resource. See urxvt(1) and urxvtperl(1) for other options and more details.

The extension recognises the following resources:

=over

=cut

#:META:RESOURCE:%.FieldSep:string:Separator between different fields

=item I<bidi.FieldSep>

This should be a string on which each line is split before applying the Bidi 
algorithm. This permits creating tables, where each cell is treated 
separately, e.g., in the index of an email client. Note that this is a 
string, not a regular expression. The default is C<\x{2502}>.

=cut

#:META:RESOURCE:%.LRE:string:Insert LRE mark where this matches

=item I<bidi.LRE>

A regular expression. The plugin insert an explicit left-right mark where it 
matches.

=cut

#:META:RESOURCE:%.PDF:string:Insert PDF mark where this matches

=item I<bidi.PDF>

A regular expression. The plugin insert a PDF mark (end of LRE) where it 
matches.

=cut

#:META:RESOURCE:%.par:boolean:Work in paragraph mode

=item I<bidi.par>

Boolean, if true work in paragraph mode: the paragraph direction is not reset 
on each line, but only on lines that start or end a paragraph

=cut

#:META:RESOURCE:%.ParReset:string:Start a new paragraph where this matches

=item I<bidi.ParReset>

A regular expression. A line matching this will start a new paragraph.

=cut

#:META:RESOURCE:%.ParResetAfter:string:Start a new paragraph after this matches

=item I<bidi.ParResetAfter>

Similar to I<bidi.ParReset>, but the paragraph starts after the matching 
line.

=back

For example, I use urxvt with this plugin to edit LaTeX documents in Hebrew.  
I have the following resources defined:

    URxvt.bidi.LRE: \\\\[([]
    URxvt.bidi.PDF: \\\\[)\\]]
    urxvt-vimh.bidi.par: true
    urxvt-vimh.cursorUnderline: true
    urxvt-vimh.bidi.ParReset: ^\\s*$|^\\\\begin|^\\\\\\[
    urxvt-vimh.bidi.ParResetAfter: ^\\\\end|\\\\\\]$

This causes inline equations (delimited by C<\(> and C<\)>) be displayed 
correctly, and equation environments be considered a paragraph (so that it is 
displayed on the left).

=cut

use 5.10.0;
use Text::Bidi qw(log2vis is_bidi);
use Text::Bidi::Constants;
use Encode qw(decode_utf8);

sub on_start {
    my ($self) = @_;
    $self->{'split'} = decode_utf8($self->x_resource('%.FieldSep')) // 
                       "\x{2502}";
    my $lre = decode_utf8($self->x_resource('%.LRE')) // '';
    my $pdf = decode_utf8($self->x_resource('%.PDF')) // '';
    my $preset = decode_utf8($self->x_resource('%.ParReset')) // '^\s*$';
    my $preseta = decode_utf8($self->x_resource('%.ParResetAfter')) // '(?!)';
    $self->{'par'} = decode_utf8($self->x_resource('%.par')) // '';
    $self->{'lre'} = qr/.*?($lre)/ if $lre;
    $self->{'pdf'} = qr/.*?$pdf/ if $pdf;
    $self->{'pres'} = qr/$preset/;
    $self->{'presa'} = qr/$preseta/;
    #warn "LRE: $self->{'lre'}\n";
    #warn "PDF: $self->{'pdf'}\n";
    #warn "$self->{'lre'}, $self->{'llre'}, $self->{'pdf'}, 
    #$self->{'lpdf'}\n";
    $self->{'spre'} = qr/\Q$self->{'split'}\E/;
    $self->{'ls'} = length($self->{'split'});
}

sub on_refresh_begin {
    my ($self) = @_;
    my ($crow, $ccol) = $self->screen_cur;
    my $pdir;
    my $l;
    for my $i ( 0..$self->nrow-1 ) {
        $l = $self->ROW_t($i);
        $pdir = undef if $l =~ $self->{'pres'};
        # for speed
        next unless is_bidi($l) or 
            (defined($pdir) and $pdir == $Text::Bidi::Par::RTL);
        chomp($l);
        my @r = @{$self->ROW_r($i)};
        # expand combining chars (Nikud) from one illegal char to two legal
        $l = $self->special_decode($l);
        # add bidi marks
        if ( $self->{'lre'} ) {
            while ( $l =~ /\G$self->{'lre'}/gc ) {
                my $p = pos($l);
                my $j = $p - length($1);
                substr($l, $j, 0) = "\x{202a}";
                splice(@r, $j, 0, 0);
                $ccol++ if $j <= $ccol and $i == $crow;
                pos($l) = $p + 1;
            }
            pos($l) = undef;
            while ( $l =~ /\G.*?$self->{'pdf'}/gc ) {
                my $j = pos($l);
                substr($l, $j, 0) = "\x{202c}";
                splice(@r, $j, 0, 0);
                $ccol++ if $j <= $ccol and $i == $crow;
                pos($l) = $j + 1;
            }
        }
        # nikud
        while ( $l =~ /[\x{5b0}-\x{5bb}]/g ) {
            my $j = pos($l) - 1;
            splice(@r, $j, 0, 0);
            $ccol++ if $j <= $ccol and $i == $crow;
        }
        my @l = split $self->{'spre'}, $l;
        my (@res, @map);
        # current offset within the line
        my $off = 0;
        # we keep the map, so that we can apply it to the rendition
        for my $part ( @l ) {
            my ($p, $v) = log2vis($part, undef, $pdir);
            my $types = $p->types;
            #warn "$i: <$part> ==>\n  <$v>\n  dir:" . $p->dir . "\ntype: " .  
            #join(',', map { my $cc = substr($part, $_, 1); '[' . $cc . ':' .  
            #sprintf("%x", ord($cc)) . ":" . $types->[$_] . ']' } 
            #0..$#{$types}) . "\n\n";
            push @res, $v;
            my @mmap = @{$p->map};
            push @map, map { $_ + $off } @mmap;
            $off += length($v);
            # compensate for the field separator
            push @map, $off..$off+$self->{'ls'}-1;
            $off += $self->{'ls'};
            $pdir = $p->dir if ( $self->{'par'} and not defined $pdir );
        }
        # remove the last field separator
        splice @map, -$self->{'ls'};
        my $res = join($self->{'split'}, @res);
        # remove bidi marks
        while ( $res =~ /[\x{200b}-\x{200f}\x{202a}-\x{202f}\x{5b0}-\x{5bb}]/g ) {
            $map[pos($res)-1] = -1;
        }
        $res =~ s/[\x{200b}-\x{200f}\x{202a}-\x{202f}]//g;
        $res = $self->special_encode($res); 
        # fake cursor
        if ( $crow == $i ) {
            $r[$ccol] |= urxvt::RS_RVid unless $map[$ccol]==$ccol;
        }
        my @newr = (@r)[grep { $_ >= 0 } @map];
        # keep the logical data for restoring after display
        $self->{'text'}[$i] = $self->ROW_t($i, $res);
        $self->{'rend'}[$i] = $self->ROW_r($i, \@newr);
    } continue {
        $pdir = undef if $l =~ $self->{'presa'};
    }
    ()
}

sub on_refresh_end {
    my ($self) = @_;
    return unless defined $self->{'text'};
    foreach (0..$self->nrow-1 ) {
        next unless defined $self->{'text'}[$_];
        $self->ROW_t($_, $self->{'text'}[$_]);
        $self->ROW_r($_, $self->{'rend'}[$_]);
    }
    delete $self->{'text'};
    delete $self->{'rend'};
    ()
}