File: 10grep

package info (click to toggle)
htag 0.0.24-2
  • links: PTS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 356 kB
  • sloc: perl: 1,302; makefile: 20; sh: 2
file content (248 lines) | stat: -rw-r--r-- 2,950 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/bin/perl -w

# Copyright (C) 2001 Simon Huggins

# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., 59
# Temple Place, Suite 330, Boston, MA 02111-1307  USA

use strict;

my (@tags,%words,%blacklist);

if (defined $cfg{'grep_debug'} and $cfg{'grep_debug'}) {
	open(DEBUG, ">$cfg{'grep_debugfile'}");
}

open(HANDLE, "<$cfg{'tagfile'}") or htagdie "Could not open $cfg{'tagfile'}: $!\n";
@tags=<HANDLE>;
close(HANDLE);

while(<DATA>) {
	chomp;
	s/\s+//g;
	$blacklist{$_}++;
}

open(MSG, "<$cfg{'msgfile'}");
while(<MSG>) {
	s/[\s\t\n]+/ /g;
	tr/A-Za-z0-9 //dc; # delete non-alphanumeric
	s/\b\d+\b//g;
	$_ = lc $_;
	my @words = split;
	foreach (@words) {
		next if length($_)>9;
		$words{$_}++ if not exists $blacklist{$_};
	}
}
close(MSG);

my @goodtags;
my $count=0;
foreach my $key (sort { $words{$b} <=> $words{$a} }
		keys %words) {
	print DEBUG "$key occurred $words{$key} times\n" if $cfg{'grep_debug'};
	my @foundtags = grep { /\b$key\b/i } @tags;
	push @goodtags,@foundtags; # Tags with more than one matching word will get
				   # pushed on more than one time
	print DEBUG join "\n",@foundtags if $cfg{'grep_debug'};
	$count++;
	last if $count >20;
}

open(OUT, ">$cfg{'tmptagfile'}")
	or htagdie "$0: Could not open $cfg{'tmptagfile'}: $!\n";
reg_deletion("$cfg{'tmptagfile'}");
if (@goodtags) {
	print OUT $goodtags[rand(@goodtags)];
} else {
	exit(5);
}

END {
	close(OUT);

	if ($cfg{'grep_debug'}) {
		close(DEBUG);
	}
}

__DATA__
a 
about 
again 
all 
am 
an 
and 
another 
any 
apr
are 
arent 
as 
at 
aug
be 
because 
been 
before 
being 
but 
by 
can 
cant 
cat 
could 
dec
did 
do 
doesnt 
dont 
down 
ehlo
esmtp
even 
every 
feb
for 
fri
from 
gmt
go
great 
had
hadnt 
has 
have 
he 
her
here 
hers
herself 
him 
himself
his 
how 
however 
i 
id
if 
im 
in 
instead 
into 
is 
it 
its 
itself 
ive 
jan
jul
jun
know 
like 
lots 
mar
may
maybe
me
might
might 
mine
mon
more 
must 
my 
near 
need 
new 
no 
not 
nov
now 
oct
of 
off 
oh 
on 
or 
ought
ours
out 
over 
please 
quite 
received
said 
same 
sat
seem
seemed 
seems
sep
she
should
should 
smtp
so 
some 
such 
sun
than 
that 
thats 
the 
their
theirs
them
then 
there 
theres 
these 
they
this 
thu
to 
tom
too 
tue
up 
us
very 
want 
was 
we 
wed
well 
went 
were
what 
when 
which 
while 
who 
why 
will 
with 
wont 
would
would 
yes
yet 
you 
your 
youre 
yours
youve