File: url-normalizer.pl

package info (click to toggle)
squid 2.6.5-6etch5
  • links: PTS
  • area: main
  • in suites: etch
  • size: 12,540 kB
  • ctags: 13,801
  • sloc: ansic: 105,278; sh: 6,083; makefile: 1,297; perl: 1,245; awk: 40
file content (51 lines) | stat: -rwxr-xr-x 1,353 bytes parent folder | download | duplicates (21)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/local/bin/perl -Tw

# From:    Markus Gyger <mgyger@itr.ch>
#
# I'd like to see a redirector which "normalizes" URLs to have
# a higher chance to get a hit. I didn't see such a redirector,
# so I thought I would send my little attempt. However, I have
# no real idea how much CPU time it needs using the LWP modules,
# but it seems to work.

require 5.003;
use strict;
use URI::URL;

$| = 1;  # force a flush after every print on STDOUT

my ($url, $addr, $fqdn, $ident, $method);

while (<>) {
    ($url, $addr, $fqdn, $ident, $method) = m:(\S*) (\S*)/(\S*) (\S*) (\S*):;

    # "normalize" URL
    $url = url $url;                    # also removes default port number
    $url->host(lc $url->host);          # map host name to lower case
    my $epath = $url->epath;
    $epath =~ s/%7e/~/ig;               # unescape ~
    $epath =~ s/(%[\da-f]{2})/\U$1/ig;  # capitalize escape digits
    if ($url->scheme =~ /^(http|ftp)$/) {
	$epath =~ s:/\./:/:g;           # safe?
	$epath =~ s://:/:g;             # safe?
    }
    $url->epath($epath);


    # ...


} continue {
    print "$url $addr/$fqdn $ident $method\n"
}


BEGIN {
    unless (URI::URL::implementor('cache_object')) {
	package cache_object;
	@cache_object::ISA = (URI::URL::implementor());
	URI::URL::implementor('cache_object', 'cache_object');

	sub default_port { 3128 }
    }
}