1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
|
#!/usr/bin/perl
# Copyright (C) 2014-2016 Jakub Wilk <jwilk@jwilk.net>
# Copyright (C) 2017-2023 Axel Beckert <abe@debian.org>
#
# This program is free software. It is distributed under the terms of
# the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, you can find it on the World Wide
# Web at https://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.
use strict;
use warnings;
use Const::Fast;
use IPC::Run3;
use List::SomeUtils qw(uniq);
use Array::Utils qw(intersect);
use Test::More tests => 8;
const my $NEWLINE => qq{\n};
const my $DOT => q{.};
const my $WAIT_STATUS_SHIFT => 8;
$ENV{'LINTIAN_BASE'} //= $DOT;
my $cmd_path = "$ENV{LINTIAN_BASE}/bin/spellintian";
my $spelling_data = "$ENV{LINTIAN_BASE}/data/spelling/corrections";
my @word_lists
= qw(/usr/share/dict/american-english /usr/share/dict/british-english);
# See #1019541 why some valid words are ignored and still ok to be
# listed as a misspelled word.
my @valid_but_very_seldom_words = qw(bellow singed want's);
# See #865055 why "iff" is wrong. "publically" is a seldom, but valid
# English word, is used in the OpenSSL license and hence causes quite
# some false positives, when being added (again).
my @valid_words = qw(iff publically);
sub t {
my ($input, $expected, @options) = @_;
my @command = ($cmd_path, @options);
my $output;
run3(\@command, \$input, \$output);
my $status = ($? >> $WAIT_STATUS_SHIFT);
is($status, 0, 'exit status 0');
is($output, $expected, 'expected output');
return;
}
my $s = "A familar brown gnu allows\nto jump over the lazy dog.\n";
t($s,
'familar -> familiar'
. $NEWLINE
. '"allows to" -> "allows one to"'
. $NEWLINE);
t(
$s,
'familar -> familiar'
. $NEWLINE
. '"allows to" -> "allows one to"'
. $NEWLINE
. 'gnu -> GNU'
. $NEWLINE,
'--picky'
);
foreach my $word_list (@word_lists) {
open(my $wl_fh, '<', $word_list)
or die "Can't open $word_list for reading: $!";
local $/ = undef; # enable localized slurp mode
push(@valid_words, split(/\n/, <$wl_fh>));
close $wl_fh;
}
# Don't list identical words from American and British English twice.
@valid_words = uniq(@valid_words);
# Ignore words which are valid but very seldom and unlikely to show up
# in Debian packages.
foreach my $valid_but_very_seldom_word (@valid_but_very_seldom_words) {
@valid_words = grep { !/^$valid_but_very_seldom_word$/ } @valid_words;
}
my $iff = 0;
my $publically = 0;
my @case_sen;
my @equal;
my @valid_but_listed_words = qw();
my @bad_spellings = qw();
my @good_spellings = qw();
open(my $sp_fh, '<', $spelling_data)
or die "Can't open $spelling_data for reading: $!";
while (my $corr = <$sp_fh>) {
next if $corr =~ m{ ^\# | ^$ }x;
chomp($corr);
my ($wrong, $good) = split(/\|\|/, $corr);
# Check for corrections equal to original
if ($wrong eq $good) {
push @equal, $wrong;
# Check if case sensitive corrections have been added to the wrong
# file (data/spelling/corrections, not data/spelling/corrections-case).
# Bad example from #883041: german||German
} elsif ($wrong eq lc($good)) {
push @case_sen, $wrong;
}
# Needed later, e.g. for checking against lists of valid words.
push(@bad_spellings, $wrong);
push(@good_spellings, $good);
}
close($sp_fh);
ok(
scalar(@equal) == 0,
"No no-op correction present in ${spelling_data} ("
. join(', ', @equal) . ')'
);
ok(
scalar(@case_sen) == 0,
"No case sensitive correction present in ${spelling_data} ("
. join(', ', @case_sen) . ')'
);
# Check if valid words have beeing has been added as correction.
my %word_count = ();
foreach my $word (@valid_words, @bad_spellings) {
$word_count{$word}++;
}
foreach my $word (keys %word_count) {
push(@valid_but_listed_words, $word) if $word_count{$word} > 1;
}
ok(
scalar(@valid_but_listed_words) == 0,
"No valid word is present in ${spelling_data} ("
. join(', ', sort @valid_but_listed_words) . ')'
);
my @good_bad_ugly = intersect(@bad_spellings, @good_spellings);
ok(
scalar(@good_bad_ugly) == 0,
'No bad spelling is listed as good spelling for another bad spelling ('
. join(', ', @good_bad_ugly) . ')'
);
# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
|