File: fmt_rtf.pl

package info (click to toggle)
linuxdoc-tools 0.9.86-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,312 kB
  • sloc: ansic: 16,247; perl: 3,769; sh: 1,134; makefile: 813; lex: 566; lisp: 309
file content (153 lines) | stat: -rw-r--r-- 4,665 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#
#  fmt_rtf.pl
#
# -----------------------------------------------------------
#  RTF-specific driver stuff
#
#  Copyright (C) 1994-1996, Matt Welsh
#  Copyright (C) 1996, Cees de Groot
#  Copyright (C) 1998, Sven Rudolph
#  Copyright (C) 1999-2001, Taketoshi Sano
#  Copyright (C) 2008-2020, Agustin Martin
# -----------------------------------------------------------

package LinuxDocTools::fmt_rtf;
use strict;

use File::Copy;
use Encode qw/encode/;
use LinuxDocTools::Vars;
use LinuxDocTools::CharEnts;
use LinuxDocTools::Utils qw(ldt_log);

my $rtf = {};
$rtf->{NAME} = "rtf";
$rtf->{HELP} = "";
$rtf->{OPTIONS} = [
  { option => "twosplit",
    type => "f",
    short => "2" }
  ];
$rtf->{twosplit}  = 0;

$Formats{$rtf->{NAME}} = $rtf;

# -----------------------------------------------------------------------
sub rtf2unicode {
  # ---------------------------------------------------------------------
  # Replace utf-8 chars by their rtf representation, braced and doubly
  # escaped (e.g., {\\u123?}, ? stands for write ? if char unavailable
  # in font). Will process it later to something like \uxxx?
  # ---------------------------------------------------------------------
  my $string = shift;

  if ( $global->{charset} eq "utf-8" ) {
    my @chars = split '', $string;
    foreach (@chars){
      if ( ord($_) > 127 ){
	$_ = "{\\\\u" . unpack("s", encode("utf16-le", $_)) . "?}";
      }
    }
    return join("", @chars);
  } else {
    return $string;
  }
}

# -----------------------------------------------------------------------
my $rtf_escape = sub {
  # ---------------------------------------------------------------------
  # Ascii escape sub to properly escape some characters, if required.
  # Passed to `parse_data' below in rtf_preASP .
  # ---------------------------------------------------------------------
  my ($data) = @_;

  return ($data);
};

# -------------------------------------------------------------
$rtf->{preASP} = sub {
  # -------------------------------------------------------------
  my ($INFILE, $OUTFILE) = @_;
  my $verbatim;
  my $inheading;

  # `sdata_dirs' passed as anonymous array to make a single argument
  my $rtf_char_maps = load_char_maps (
    '.2rtf',
    [ Text::EntityMap::sdata_dirs() ]);

  # Declare $INFILE as utf-8 if charset is utf-8
  if ( $global->{charset} eq "utf-8" ){
    binmode($INFILE, ":utf8");
  }

  while (<$INFILE>){
    chomp;
    # RTF does not treat newline as whitespace, so we need to turn
    # "\n" into " \n". Without the extra space, two words separated
    # only by a newline will get jammed together in the RTF output.
    # ------------------------------------------------------------
    s/([^\\])\\n/$1 \\n/g;

    if ( s/^-// ) {
      print $OUTFILE "-" . parse_data(rtf2unicode($_), $rtf_char_maps, $rtf_escape) . "\n";
    } elsif (/^A/) {
      /^A(\S+) (IMPLIED|CDATA|NOTATION|ENTITY|TOKEN)( (.*))?$/
	|| die "bad attribute data: $_\n";
      my ($name,$type,$value) = ($1,$2,$4);
      if ($type eq "CDATA") {
	# CDATA attributes get also translated
	$value = parse_data (rtf2unicode($value), $rtf_char_maps, $rtf_escape);
      }
      print $OUTFILE "A$name $type $value\n";
    } else {
      if (/^\(HEADING/){
        $inheading = 1;
      } elsif (/^\)HEADING/){
        $inheading = '';
      } elsif (/^\((VERB|CODE)/) {
	$verbatim = 1;
      } elsif (/^\)(VERB|CODE)/) {
	$verbatim = '';
      }
      print $OUTFILE rtf2unicode($_) . "\n";
    }
  }
};

# -------------------------------------------------------------
$rtf->{postASP} = sub {
# -------------------------------------------------------------
#  Take the sgmlsasp output, and make something useful from it.
# -------------------------------------------------------------
  my $INFILE  = shift;
  my $rtf2rtf = "$main::AuxBinDir/rtf2rtf";
  my $split   = ($rtf->{twosplit}) ? "-2" : "";
  my $pipe_in = "$global->{tmpbase}.fmt_rtf.01.rtf";
  my $prefile = "$global->{filename}";
  my $rtffile = "$global->{filename}.rtf";
  my $msghead = "fmt_rtf.pl::postASP";

  # Preprocess raw file before piping to rtf2rtf
  open ( my $RTF_PIPE_IN, "> $pipe_in");
  while (<$INFILE>){
    # Change {\\u323?} type strings to something like \u323? (or
    # \u-xx?) Needed for sgmlsasp not complaining about bad escapes.
    s/\{\\(\\u[\-\d]+\?)\}/$1/g;

    print $RTF_PIPE_IN $_;
  }
  close $RTF_PIPE_IN;

  open ( my $RTF_PIPE,"| $rtf2rtf $split $prefile > $rtffile" )
    or die "$msghead: Could not open pipe to $rtf2rtf. Aborting ...\n";
  copy ($pipe_in, $RTF_PIPE);
  close $RTF_PIPE;

  ldt_log "$msghead: cat $pipe_in | $rtf2rtf $split $prefile > $rtffile";

  return 0;
};

1;