1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
|
##---------------------------------------------------------------------------##
## File:
## @(#) mhtxthtml.pl 1.1 96/09/17 @(#)
## Author:
## Earl Hood ehood@medusa.acs.uci.edu
## Description:
## Library defines routine to filter text/html body parts
## for MHonArc.
## Filter routine can be registered with the following:
## <MIMEFILTERS>
## text/html:m2h_text_html'filter:mhtxthtml.pl
## </MIMEFILTERS>
##---------------------------------------------------------------------------##
## MHonArc -- Internet mail-to-HTML converter
## Copyright (C) 1995 Earl Hood, ehood@medusa.acs.uci.edu
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##
package m2h_text_html;
$Url = '(\w+://|\w+:)'; # Beginning of URL match expression
##---------------------------------------------------------------------------
## The filter must modify HTML content parts for merging into the
## final filtered HTML messages. Modification is needed so the
## resulting filtered message is valid HTML.
##
sub filter {
local($header, *fields, *data, $isdecode, $args) = @_;
local($base, $title, $tmp);
## Get/remove title
if ($data =~ s%<title\s*>([^<]*)</title\s*>%%i) {
$title = "<ADDRESS>Title: <STRONG>$1</STRONG></ADDRESS>\n";
}
## Get/remove BASE url
if ($data =~ s%(<base\s[^>]*>)%%i) {
$tmp = $1;
($base) = $tmp =~ m%href\s*=\s*['"]([^'"]+)['"]%i;
$base =~ s%(.*/).*%$1%;
}
## Strip out certain elements/tags
$data =~ s%<!doctype\s[^>]*>%%i;
$data =~ s%</?html[^>]*>%%ig;
$data =~ s%</?body[^>]*>%%ig;
$data =~ s%<head\s*>[\s\S]*</head\s*>%%i;
## Modify relative urls to absolute using BASE
if ($base !~ /^\s*$/) {
$data =~ s%(href\s*=\s*['"])([^'"]+)(['"])%
&addbase($base,$1,$2,$3)%gei;
$data =~ s%(src\s*=\s*['"])([^'"]+)(['"])%
&addbase($base,$1,$2,$3)%gei;
}
($title . $data);
}
##---------------------------------------------------------------------------
sub addbase {
local($b, $pre, $u, $suf) = @_;
local($ret);
$u =~ s/^\s+//;
if ($u =~ m%^$Url%o) { # Non-relative URL, do nothing
$ret = $pre . $u . $suf;
} else { # Relative URL
if ($u =~ m%^/%) { # Check for "/..."
$b =~ s%^(${Url}[^/]*)/.*%$1%o; # Get hostname:port number
}
$ret = $pre . $b . $u . $suf;
}
$ret;
}
##---------------------------------------------------------------------------
1;
|