File: badwords

package info (click to toggle)
curl 8.19.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 31,884 kB
  • sloc: ansic: 200,254; perl: 21,116; python: 10,390; sh: 6,691; makefile: 1,507; pascal: 240; cpp: 196
file content (166 lines) | stat: -rwxr-xr-x 3,738 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env perl
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# SPDX-License-Identifier: curl
#
# bad[:=]correct
#
# If separator is '=', the string will be compared case sensitively.
# If separator is ':', the check is done case insensitively.
#
# To add white listed uses of bad words that are removed before checking for
# the bad ones:
#
# ---(accepted word)
#

use strict;
use warnings;

my @whitelist = (
    # ignore what looks like URLs
    '(^|\W)((https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&\'\(\)*+,;=]+)',
    # ignore bolded sections
    '\*\*(.*?)\*\*');
my %alt;
my %exactcase;
my $skip_indented = 1;

if($ARGV[0] eq "-a") {
    shift @ARGV;
    $skip_indented = 0;
}
my %wl;
if($ARGV[0] eq "-w") {
    shift @ARGV;
    my $file = shift @ARGV;
    open(W, "<$file") or die "Cannot open '$file': $!";
    while(<W>) {
        if(/^#/) {
            # allow #-comments
            next;
        }
        if(/^([^:]*):(\d*):(.*)/) {
            $wl{"$1:$2:$3"}=1;
            #print STDERR "whitelisted $1:$2:$3\n";
        }
    }
    close(W);
}

my @w;
my @exact;
while(<STDIN>) {
    chomp;
    if($_ =~ /^#/) {
        next;
    }
    if($_ =~ /^---(.*)/) {
        push @whitelist, $1;
    }
    elsif($_ =~ /^(.*)([:=])(.*)/) {
        my ($bad, $sep, $better)=($1, $2, $3);
        if($sep eq "=") {
            $alt{$bad} = $better;
            push @exact, $bad;
        }
        else {
            $alt{lc($bad)} = $better;
            push @w, $bad;
        }
    }
}

# Build a single combined regex for case-insensitive words
my $re_ci;
if(@w) {
    my $pat = join('|', map { quotemeta($_) } @w);
    $re_ci = qr/\b($pat)\b/i;
}

# Build a single combined regex for case-sensitive (exact) words
my $re_cs;
if(@exact) {
    my $pat = join('|', map { quotemeta($_) } @exact);
    $re_cs = qr/\b($pat)\b/;
}

my $errors = 0;

sub highlight {
    my ($p, $w, $in, $f, $l, $lookup) = @_;

    my $c = length($p)+1;
    my $ch = "$f:$l:$w";
    if($wl{$ch}) {
        # whitelisted filename + line + word
        return;
    }
    $ch = $f . "::" . $w;
    if($wl{$ch}) {
        # whitelisted filename + word
        return;
    }

    print STDERR  "$f:$l:$c: error: found bad word \"$w\"\n";
    printf STDERR " %4d | %s\n", $l, $in;
    printf STDERR "      | %*s^%s\n", length($p), " ",
        "~" x (length($w)-1);
    printf STDERR " maybe use \"%s\" instead?\n", $alt{$lookup};
    $errors++;
}

sub file {
    my ($f) = @_;
    my $l = 0;
    open(F, "<$f");
    while(<F>) {
        my $in = $_;
        $l++;
        chomp $in;
        if($skip_indented && $in =~ /^    /) {
            next;
        }
        # remove the link part
        $in =~ s/(\[.*\])\(.*\)/$1/g;
        # remove backticked texts
        $in =~ s/\`.*\`//g;
        # remove whitelisted patterns (pre-compiled)
        for my $p (@whitelist) {
            $in =~ s/$p//g;
        }
        # case-insensitive bad words
        if($re_ci) {
            while($in =~ /^(.*)$re_ci/i) {
                highlight($1, $2, $in, $f, $l, lc($2));
                last;
            }
        }
        # case-sensitive (exact) bad words
        if($re_cs) {
            while($in =~ /^(.*)$re_cs/) {
                highlight($1, $2, $in, $f, $l, $2);
                last;
            }
        }
    }
    close(F);
}

my @filemasks = @ARGV;
open(my $git_ls_files, '-|', 'git', 'ls-files', '--', @filemasks) or die "Failed running git ls-files: $!";
my @files;
while(my $each = <$git_ls_files>) {
    chomp $each;
    push @files, $each;
}
close $git_ls_files;

my $onum = scalar(@files);
my $num;
for my $e (@files) {
    #printf STDERR "Complete: %d%%\r", $num++ * 100 / $onum;
    file($e);
}

exit $errors;