File: fixhist

package info (click to toggle)
inn2 2.5.4-3
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 11,720 kB
  • ctags: 8,983
  • sloc: ansic: 92,499; sh: 13,509; perl: 12,921; makefile: 2,985; yacc: 842; python: 342; lex: 255
file content (89 lines) | stat: -rwxr-xr-x 2,143 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/local/bin/perl
#
# history database sanity checker
# David Barr <barr@math.psu.edu>
# version 1.4
# w/mods from: hucka@eecs.umich.edu
# Katsuhiro Kondou <kondou@nec.co.jp>
# version 1.1
# Throw away history entries with:
#   malformed lines (too long, contain nulls or special characters)
#
# INN Usage:
#   ctlinnd throttle 'fixing history'
#   ./fixhist <history >history.n
#   makedbz -s `wc -l <history.n` -f history.n
#      or use instructions from fixhist to avoid the `wc -l <history.n`
#   mv history.n history
#   mv history.n.dir history.dir
### if TAGGED_HASH is DO or before inn2.0
#   mv history.n.pag history.pag
### if TAGGED_HASH is DONT
#   mv history.n.hash history.hash
#   mv history.n.index history.index
### endif
#   ctlinnd reload history x
#   ctlinnd go 'fixing history'
# any malformed entries will be output to stderr.


$MAXKEYLEN=254;
$count=0;

while (<>) {
	chop;
	($msgid,$dates,$arts,$xtra) = split('\t');
	if ($xtra) {
		&tossit();		# too many fields
		next;
	}
	if (!($dates) && (($arts) || ($xtra))) {
		&tossit();		# if not date field, then the rest
		next;			# should be empty
	}
	if (length($msgid) >= $MAXKEYLEN) {
		&tossit();		# message-id too long
		next;
	}
	if ($msgid !~ /^<[^<> ]*>$/) {
		if ($msgid =~ /^\[[0-9A-F]{32}\]$/) {
			if ($arts ne "") { 
				if ($arts =~ /^\@[0-9A-F]{56}\@$/) {
					$arts =~ s/^\@([0-9A-F]{36})([0-9A-F]{20})\@$/\@${1}\@/;
					print "$msgid\t$dates\t$arts\n";
					next;
				}
				if ($arts !~ /^\@[0-9A-F]{36}\@$/) {
					&tossit();
					next;
				}
			}
		} else {
			&tossit();		# malformed msg-ids
			next;
		}
	} else {
		if ($arts ne "" && ($arts !~ /[^\/]*\/[0-9]*/)) {
			&tossit();		# malformed articles list
			next;
		}
	}
	if (/[\000-\010\012-\037\177-\237]/) { # non-control chars except tab
		&tossit();		# illegal chars
		next;
	}
	if ($dates) {
		if ($dates =~ /[^\d~\-]/) {	# rudimentary check
			&tossit();		# full check would be too slow
			next;
		}
	}
	print "$_\n";
	$count++;
	$0 = "history line $./$count" if $. % 50000 == 0;
}
print STDERR "Done.  Now run:\nmakedbz -s $count -f history.n\n";

sub tossit {
	print STDERR "$_\n";
}