1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
|
#! perl
=head1 NAME
bidi - Make urxvt present Bidi text correctly
=head1 DESCRIPTION
This extension filters the text displayed by Urxvt, so that Bi-directional
text (e.g., Hebrew or Arabic mixed with English) is displayed correctly. It
does so using the L<Text::Bidi> module (which should be installed).
The extension emulates a cursor via rendition. This means that when typing,
there will be two cursors, the original one whose location corresponds to the
current location within the logical string (so, mostly useless), and a fake
one which corresponds to the current insertion point.
To enable the extension, add C<bidi> to the I<Urxvt.perl-ext-common>
resource. See urxvt(1) and urxvtperl(1) for other options and more details.
The extension recognises the following resources:
=over
=cut
#:META:RESOURCE:%.FieldSep:string:Separator between different fields
=item I<bidi.FieldSep>
This should be a string on which each line is split before applying the Bidi
algorithm. This permits creating tables, where each cell is treated
separately, e.g., in the index of an email client. Note that this is a
string, not a regular expression. The default is C<\x{2502}>.
=cut
#:META:RESOURCE:%.LRE:string:Insert LRE mark where this matches
=item I<bidi.LRE>
A regular expression. The plugin insert an explicit left-right mark where it
matches.
=cut
#:META:RESOURCE:%.PDF:string:Insert PDF mark where this matches
=item I<bidi.PDF>
A regular expression. The plugin insert a PDF mark (end of LRE) where it
matches.
=cut
#:META:RESOURCE:%.par:boolean:Work in paragraph mode
=item I<bidi.par>
Boolean, if true work in paragraph mode: the paragraph direction is not reset
on each line, but only on lines that start or end a paragraph
=cut
#:META:RESOURCE:%.ParReset:string:Start a new paragraph where this matches
=item I<bidi.ParReset>
A regular expression. A line matching this will start a new paragraph.
=cut
#:META:RESOURCE:%.ParResetAfter:string:Start a new paragraph after this matches
=item I<bidi.ParResetAfter>
Similar to I<bidi.ParReset>, but the paragraph starts after the matching
line.
=back
For example, I use urxvt with this plugin to edit LaTeX documents in Hebrew.
I have the following resources defined:
URxvt.bidi.LRE: \\\\[([]
URxvt.bidi.PDF: \\\\[)\\]]
urxvt-vimh.bidi.par: true
urxvt-vimh.cursorUnderline: true
urxvt-vimh.bidi.ParReset: ^\\s*$|^\\\\begin|^\\\\\\[
urxvt-vimh.bidi.ParResetAfter: ^\\\\end|\\\\\\]$
This causes inline equations (delimited by C<\(> and C<\)>) be displayed
correctly, and equation environments be considered a paragraph (so that it is
displayed on the left).
=cut
use 5.10.0;
use Text::Bidi qw(log2vis is_bidi);
use Text::Bidi::Constants;
use Encode qw(decode_utf8);
sub on_start {
my ($self) = @_;
$self->{'split'} = decode_utf8($self->x_resource('%.FieldSep')) //
"\x{2502}";
my $lre = decode_utf8($self->x_resource('%.LRE')) // '';
my $pdf = decode_utf8($self->x_resource('%.PDF')) // '';
my $preset = decode_utf8($self->x_resource('%.ParReset')) // '^\s*$';
my $preseta = decode_utf8($self->x_resource('%.ParResetAfter')) // '(?!)';
$self->{'par'} = decode_utf8($self->x_resource('%.par')) // '';
$self->{'lre'} = qr/.*?($lre)/ if $lre;
$self->{'pdf'} = qr/.*?$pdf/ if $pdf;
$self->{'pres'} = qr/$preset/;
$self->{'presa'} = qr/$preseta/;
#warn "LRE: $self->{'lre'}\n";
#warn "PDF: $self->{'pdf'}\n";
#warn "$self->{'lre'}, $self->{'llre'}, $self->{'pdf'},
#$self->{'lpdf'}\n";
$self->{'spre'} = qr/\Q$self->{'split'}\E/;
$self->{'ls'} = length($self->{'split'});
}
sub on_refresh_begin {
my ($self) = @_;
my ($crow, $ccol) = $self->screen_cur;
my $pdir;
my $l;
for my $i ( 0..$self->nrow-1 ) {
$l = $self->ROW_t($i);
$pdir = undef if $l =~ $self->{'pres'};
# for speed
next unless is_bidi($l) or
(defined($pdir) and $pdir == $Text::Bidi::Par::RTL);
chomp($l);
my @r = @{$self->ROW_r($i)};
# expand combining chars (Nikud) from one illegal char to two legal
$l = $self->special_decode($l);
# add bidi marks
if ( $self->{'lre'} ) {
while ( $l =~ /\G$self->{'lre'}/gc ) {
my $p = pos($l);
my $j = $p - length($1);
substr($l, $j, 0) = "\x{202a}";
splice(@r, $j, 0, 0);
$ccol++ if $j <= $ccol and $i == $crow;
pos($l) = $p + 1;
}
pos($l) = undef;
while ( $l =~ /\G.*?$self->{'pdf'}/gc ) {
my $j = pos($l);
substr($l, $j, 0) = "\x{202c}";
splice(@r, $j, 0, 0);
$ccol++ if $j <= $ccol and $i == $crow;
pos($l) = $j + 1;
}
}
# nikud
while ( $l =~ /[\x{5b0}-\x{5bb}]/g ) {
my $j = pos($l) - 1;
splice(@r, $j, 0, 0);
$ccol++ if $j <= $ccol and $i == $crow;
}
my @l = split $self->{'spre'}, $l;
my (@res, @map);
# current offset within the line
my $off = 0;
# we keep the map, so that we can apply it to the rendition
for my $part ( @l ) {
my ($p, $v) = log2vis($part, undef, $pdir);
my $types = $p->types;
#warn "$i: <$part> ==>\n <$v>\n dir:" . $p->dir . "\ntype: " .
#join(',', map { my $cc = substr($part, $_, 1); '[' . $cc . ':' .
#sprintf("%x", ord($cc)) . ":" . $types->[$_] . ']' }
#0..$#{$types}) . "\n\n";
push @res, $v;
my @mmap = @{$p->map};
push @map, map { $_ + $off } @mmap;
$off += length($v);
# compensate for the field separator
push @map, $off..$off+$self->{'ls'}-1;
$off += $self->{'ls'};
$pdir = $p->dir if ( $self->{'par'} and not defined $pdir );
}
# remove the last field separator
splice @map, -$self->{'ls'};
my $res = join($self->{'split'}, @res);
# remove bidi marks
while ( $res =~ /[\x{200b}-\x{200f}\x{202a}-\x{202f}\x{5b0}-\x{5bb}]/g ) {
$map[pos($res)-1] = -1;
}
$res =~ s/[\x{200b}-\x{200f}\x{202a}-\x{202f}]//g;
$res = $self->special_encode($res);
# fake cursor
if ( $crow == $i ) {
$r[$ccol] |= urxvt::RS_RVid unless $map[$ccol]==$ccol;
}
my @newr = (@r)[grep { $_ >= 0 } @map];
# keep the logical data for restoring after display
$self->{'text'}[$i] = $self->ROW_t($i, $res);
$self->{'rend'}[$i] = $self->ROW_r($i, \@newr);
} continue {
$pdir = undef if $l =~ $self->{'presa'};
}
()
}
sub on_refresh_end {
my ($self) = @_;
return unless defined $self->{'text'};
foreach (0..$self->nrow-1 ) {
next unless defined $self->{'text'}[$_];
$self->ROW_t($_, $self->{'text'}[$_]);
$self->ROW_r($_, $self->{'rend'}[$_]);
}
delete $self->{'text'};
delete $self->{'rend'};
()
}
|