1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
#!/usr/local/bin/perl -Tw
#
# * Copyright (C) 1996-2025 The Squid Software Foundation and contributors
# *
# * Squid software is distributed under GPLv2+ license and includes
# * contributions from numerous individuals and organizations.
# * Please see the COPYING and CONTRIBUTORS files for details.
#
# From: Markus Gyger <mgyger@itr.ch>
#
# I'd like to see a redirector which "normalizes" URLs to have
# a higher chance to get a hit. I didn't see such a redirector,
# so I thought I would send my little attempt. However, I have
# no real idea how much CPU time it needs using the LWP modules,
# but it seems to work.
require 5.003;
use strict;
use URI::URL;
$| = 1; # force a flush after every print on STDOUT
my ($url, $addr, $fqdn, $ident, $method);
while (<>) {
($url, $addr, $fqdn, $ident, $method) = m:(\S*) (\S*)/(\S*) (\S*) (\S*):;
# "normalize" URL
$url = url $url; # also removes default port number
$url->host(lc $url->host); # map host name to lower case
my $epath = $url->epath;
$epath =~ s/%7e/~/ig; # unescape ~
$epath =~ s/(%[\da-f]{2})/\U$1/ig; # capitalize escape digits
if ($url->scheme =~ /^(http|ftp)$/) {
$epath =~ s:/\./:/:g; # safe?
$epath =~ s://:/:g; # safe?
}
$url->epath($epath);
# ...
} continue {
print "$url $addr/$fqdn $ident $method\n"
}
BEGIN {
unless (URI::URL::implementor('cache_object')) {
package cache_object;
@cache_object::ISA = (URI::URL::implementor());
URI::URL::implementor('cache_object', 'cache_object');
sub default_port { 3128 }
}
}
|