File: 04010_misc_errors_fixed.t

package info (click to toggle)
libtext-unidecode-perl 1.30-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 1,236 kB
  • sloc: perl: 3,878; makefile: 2
file content (143 lines) | stat: -rw-r--r-- 3,918 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# -*- coding:utf-8; mode:CPerl -*-
use 5.8.0; use strict; use warnings; use Test; use utf8;
print q[# //Time-stamp: "2014-07-22 05:40:49 MDT sburke@cpan.org"], "\n";

BEGIN {plan tests => 75;}

use Text::Unidecode;
print "# Text::Unidecode version $Text::Unidecode::VERSION\n";

print "# Checking various errors to be fixed...\n";

ok 1;
binmode($_, ":utf8") for (*STDOUT, *STDIN, *STDERR);

# Thank you very much to 

ok( unidecode( "Ý" ), "Y" ); # https://rt.cpan.org/Ticket/Display.html?id=96889


#From Tomaž Šolc superstar!


ok( unidecode( "ij"  ), "ij") ;
ok( unidecode( "Dz"  ), "Dz") ;
ok( unidecode( "Ƞ"  ), "N") ;
ok( unidecode( "ȡ"  ), "d") ;

print "# Block 02: 'Latin; IPA; spacing accents', U+02xx\n";
ok( unidecode( "ȴ"  ), 'l') ;    # U+0234
ok( unidecode( "ȵ"  ), 'n') ;    # U+0235
ok( unidecode( "ȶ"  ), 't') ;    # U+0236
ok( unidecode( "ȷ"  ), 'j') ;    # U+0237
ok( unidecode( "ȸ"  ), 'db') ;   # U+0238
ok( unidecode( "ȹ"  ), 'qp') ;   # U+0239
ok( unidecode( "Ⱥ"  ), 'A') ;    # U+023a
ok( unidecode( "Ȼ"  ), 'C') ;    # U+023b
ok( unidecode( "ȼ"  ), 'c') ;    # U+023c
ok( unidecode( "Ƚ"  ), 'L') ;    # U+023d
ok( unidecode( "Ⱦ"  ), 'T') ;    # U+023e
ok( unidecode( "ȿ"  ), 's') ;    # U+023f
ok( unidecode( "ɀ"  ), 'z') ;    # U+0240


ok( unidecode( "Ƀ"  ),'B') ;   #U+0243
ok( unidecode( "Ʉ"  ),'U') ;   #U+0244
ok( unidecode( "Ʌ"  ),'^') ;   #U+0245
ok( unidecode( "Ɇ"  ),'E') ;   #U+0246
ok( unidecode( "ɇ"  ),'e') ;   #U+0247
ok( unidecode( "Ɉ"  ),'J') ;   #U+0248
ok( unidecode( "ɉ"  ),'j') ;   #U+0249
ok( unidecode( "Ɋ"  ),'q') ;   #U+024a
ok( unidecode( "ɋ"  ),'q') ;   #U+024b
ok( unidecode( "Ɍ"  ),'R') ;   #U+024c
ok( unidecode( "ɍ"  ),'r') ;   #U+024d
ok( unidecode( "Ɏ"  ),'Y') ;   #U+024e
ok( unidecode( "ɏ"  ),'y') ;   #U+024f

ok( unidecode( "ɐ"  ),'a') ;   #U+0250


ok( unidecode( "ɸ"  ), 'F') ;    # \x{0278}

ok( unidecode( "ɹ"  ), 'r') ;    # \x{0279}
ok( unidecode( "ɺ"  ), 'r') ;    # \x{027a}
ok( unidecode( "ɻ"  ), 'r') ;    # \x{027b}
ok( unidecode( "ɼ"  ), 'r') ;    # \x{027c}
ok( unidecode( "ɽ"  ), 'r') ;    # \x{027d}
ok( unidecode( "ɾ"  ), 'r') ;    # \x{027e}
ok( unidecode( "ɿ"  ), 'r') ;    # \x{027f}
ok( unidecode( "ʀ"  ), 'R') ;    # \x{0280}
ok( unidecode( "ʁ"  ), 'R') ;    # \x{0281}



ok( unidecode( "ʉ"  ), 'u') ;    # \x{0289}


# ok( unidecode( ""  ), '') ;    # \x{02__}

ok( unidecode( "ʌ"  ), '^') ;    # \x{028c}
ok( unidecode( "ʍ"  ), 'w') ;    # \x{028d}
ok( unidecode( "ʎ"  ), 'y') ;    # \x{028e}
ok( unidecode( "ʏ"  ), 'Y') ;    # \x{028f}

ok( unidecode( "ʮ"  ), 'h') ;    # \x{02ae}
ok( unidecode( "ʯ"  ), 'h') ;    # \x{02af}
ok( unidecode( "ʰ"  ), 'h') ;    # \x{02b0}

# Rejecting TS's suggested mapping of "ʰ" to "k".  I see what he
# means, but it's too much of a stretch

#======================================================================



#ok( unidecode( ""  ), '') ;    # \x{02__}

print "# Combining Latin letters, U+03xx\n";

ok( unidecode( "ͣ" ), 'a');
ok( unidecode( "ͤ" ), 'e');
ok( unidecode( "ͥ" ), 'i');
ok( unidecode( "ͦ" ), 'o');
ok( unidecode( "ͧ" ), 'u');
ok( unidecode( "ͨ" ), 'c');
ok( unidecode( "ͩ" ), 'd');
ok( unidecode( "ͪ" ), 'h');
ok( unidecode( "ͫ" ), 'm');
ok( unidecode( "ͬ" ), 'r');
ok( unidecode( "ͭ" ), 't');
ok( unidecode( "ͮ" ), 'v');
ok( unidecode( "ͯ" ), 'x');

print "# Russian things\n";
ok( unidecode( "Е" ), 'E');
ok( unidecode( "г" ), 'g');
ok( unidecode( "е" ), 'e');


print "# Stuff...\n";
ok( unidecode( "։" ), '.');
 # U+0589 | Armenian full stop

ok( unidecode( "\x{05c0}" ), '|');
 # U+05C0 | Hebrew punctuation paseq



ok( unidecode( "ẛ" ), 's');
ok( unidecode( "ẜ" ), 's');
ok( unidecode( "ẝ" ), 's');
ok( unidecode( "ẞ" ), 'Ss');
ok( unidecode( "ẟ" ), 'd');

# That's all of Tomaž's stuff before 0x20

#======================================================================

ok 1;

# End