File: purify.t

package info (click to toggle)
libtext-bibtex-perl 0.88-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,528 kB
  • sloc: ansic: 10,706; perl: 1,987; makefile: 7
file content (142 lines) | stat: -rw-r--r-- 4,939 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# -*- cperl -*-
use strict;
use warnings;

use IO::Handle;
use Test::More tests => 110;

use vars qw($DEBUG);
use Cwd;
BEGIN {
    use_ok('Text::BibTeX', qw(purify_string));
    my $common = getcwd()."/t/common.pl";
    require $common;
}

#
# purify.t
#
# Text::BibTeX test program -- compare my purify routine with known
# results from BibTeX 0.99.
#
# $Id$
#

$DEBUG = 1;

# make sure that purify_string doesn't modify its input string
# (at least while it's *supposed* to act this way!)
my ($in1, $in2, $out);
$in1 = 'f{\"o}o';
$in2 = $in1;
$out = 'clobber me';
$out = purify_string ($in2);
is($in1, $in2);
is($out, 'foo');

is(length $in1, 7);
is(length $in2, 7);
is(length $out, 3);

# These two *don't* come from BibTeX -- just borderline cases
# that should be checked
is(purify_string (''), '');
ok(! defined purify_string (undef));


# The "expected" results here are all taken directly from BibTeX, using
# a special .bst file of my own devising.  One problem is that BibTeX
# strips trailing spaces from each line on output, which means that 
# "purified" strings ending with a space are not delivered exactly as
# I expect them.  However, BibTeX's text.length$ function does give the
# correct length (including those trailing spaces), so at least I can
# indirectly check that things are as I expect them to be.
#
# The upshot of all this is that the "expected purified strings" in the
# table below are shorn of trailing spaces, but have accurate lengths.
# My reasoning for doing things this way is that although it is (apparently)
# BibTeX's output routines that does the space-stripping, there is no 
# way to get data out of BibTeX other than through its output routines.
# Thus, if I'm going to compare my results with BibTeX's, I'd better be
# prepared to deal with the stripped-spaces problem...so I am!

my @tests = 
   (q[Bl{\"o}w, Jo{\'{e}} Q. and J.~R. R. Tolk{\u e}in and {Fo{\'o} Bar ~ {\aa}nd {\SS}on{\v{s}}, Ltd.}] => 
       [58, 'Blow Joe Q and J R R Tolkein and Foo Bar   aand SSonvs Ltd'],
    q[] => [0, ''],
    q[G{\"o}del] => [5, 'Godel'],
    q[G{\" o}del] => [5, 'Godel'],
    q[G{\" o }del] => [5, 'Godel'],
    q[G{\"o }del] => [5, 'Godel'],
    q[G{\"{o}}del] => [5, 'Godel'],
    q[G{\" {o}}del] => [5, 'Godel'],
    q[G{\" { o}}del] => [5, 'Godel'],
    q[G{\" {o }}del] => [5, 'Godel'],
    q[G{\" { o }}del] => [5, 'Godel'],
    q[G{\" { o } }del] => [5, 'Godel'],
    q[G{\"{o} }del] => [5, 'Godel'],
    q[G{\" {o} }del] => [5, 'Godel'],
    q[G{\"o foo}del] => [8, 'Gofoodel'],
    q[G{\"foo}del] => [7, 'Gfoodel'],
    q[G{\"{foo}}del] => [7, 'Gfoodel'],
    q[{G\"odel}] => [5, 'Godel'],
    q[G{\"o}del] => [5, 'Godel'],
    q[G{\"{o}}del] => [5, 'Godel'],
    q[{\ss}uper-duper] => [12, 'ssuper duper'],
    q[{\ss }uper-duper] => [12, 'ssuper duper'],
    q[{ \ss}uper-duper] => [13, ' ssuper duper'],
    q[{\ss{}}uper-duper] => [12, 'ssuper duper'],
    q[{\ss foo}uper-duper] => [15, 'ssfoouper duper'],
    q[{\ss { }}uper-duper] => [12, 'ssuper duper'],
    q[{\ss {foo}}uper-duper] => [15, 'ssfoouper duper'],
    q[{\ss{foo}}uper-duper] => [15, 'ssfoouper duper'],
    q[Tom{\`a}{\v s}] => [5, 'Tomas'],
    q[Tom{\`a}{\v{s}}] => [5, 'Tomas'],
    q[Tom{\`a}{{\v s}}] => [7, 'Tomav s'],
    q[{Tom{\`a}{\v s}}] => [7, 'Tomav s'],
    q[{Tom{\`a}{\v{s}}}] => [6, 'Tomavs'],
    q[{Tom{\`a}{\v{ s}}}] => [7, 'Tomav s'],
    q[{Tom{\`a}{\v{ s }}}] => [8, 'Tomav s'],
    q[{\v s}] => [1, 's'],
    q[{\x s}] => [1, 's'],
    q[{\r s}] => [1, 's'],
    q[{\foo s}] => [1, 's'],
    q[{\oe}] => [2, 'oe'],
    q[{\ae}] => [2, 'ae'],

    # Handling of \aa is a bit problematic -- BibTeX 0.99 converts this
    # special char. to "a", but my understanding of the Nordic languages
    # leads me to believe it ought to be converted to "aa".  (E.g.
    # \AArhus is usually written "Aarhus" in English, not "Arhus".)
    # Neither way will result in proper sorting (at least for Danish,
    # where \aa comes at the end of the alphabet), but at least my way 
    # is consistent with the normal English rendering of \aa.
#   q[{\aa}] => [1, 'a'],               # BibTeX 0.99's behaviour
    q[{\aa}] => [2, 'aa'],              # btparse's behaviour
    q[{\AA}] => [2, 'Aa'],
    q[{\o}] => [1, 'o'],
    q[{\l}] => [1, 'l'],
    q[{\ss}] => [2, 'ss'],
    q[{\ae s}] => [3, 'aes'],
    q[\TeX] => [3, 'TeX'],
    q[{\TeX}] => [0, ''],
    q[{{\TeX}}] => [3, 'TeX'],
    q[{\foobar}] => [0, '']
    );

while (@tests)
{
   my $str = shift @tests;
   my ($exp_length, $exp_purified) = @{shift @tests};

   my $purified = purify_string ($str);
   my $length = length $purified;       # length before stripping
   printf "[%s] -> [%s] (length %d) (expected [%s], length %d)\n",
          $str, $purified, $length, $exp_purified, $exp_length
      if $DEBUG;

   $purified =~ s/ +$//;                # strip trailing spaces
   is($purified, $exp_purified);
   is($length, $exp_length);
}