File: mhtxthtml.pl

package info (click to toggle)
mhonarc 2.1-1
  • links: PTS
  • area: main
  • in suites: hamm, slink
  • size: 1,932 kB
  • ctags: 2,849
  • sloc: perl: 7,329; makefile: 51
file content (89 lines) | stat: -rw-r--r-- 3,229 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
##---------------------------------------------------------------------------##
##  File:
##	@(#)  mhtxthtml.pl 1.1 96/09/17 @(#)
##  Author:
##      Earl Hood       ehood@medusa.acs.uci.edu
##  Description:
##	Library defines routine to filter text/html body parts
##	for MHonArc.
##	Filter routine can be registered with the following:
##		<MIMEFILTERS>
##		text/html:m2h_text_html'filter:mhtxthtml.pl
##		</MIMEFILTERS>
##---------------------------------------------------------------------------##
##    MHonArc -- Internet mail-to-HTML converter
##    Copyright (C) 1995	Earl Hood, ehood@medusa.acs.uci.edu
##
##    This program is free software; you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation; either version 2 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program; if not, write to the Free Software
##    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
##---------------------------------------------------------------------------##


package m2h_text_html;

$Url	= '(\w+://|\w+:)';	# Beginning of URL match expression

##---------------------------------------------------------------------------
##	The filter must modify HTML content parts for merging into the
##	final filtered HTML messages.  Modification is needed so the
##	resulting filtered message is valid HTML.
##
sub filter {
    local($header, *fields, *data, $isdecode, $args) = @_;
    local($base, $title, $tmp);

    ## Get/remove title
    if ($data =~ s%<title\s*>([^<]*)</title\s*>%%i) {
        $title = "<ADDRESS>Title: <STRONG>$1</STRONG></ADDRESS>\n";
    }
    ## Get/remove BASE url
    if ($data =~ s%(<base\s[^>]*>)%%i) {
        $tmp = $1;
        ($base) = $tmp =~ m%href\s*=\s*['"]([^'"]+)['"]%i;
        $base =~ s%(.*/).*%$1%;
    }
    ## Strip out certain elements/tags
    $data =~ s%<!doctype\s[^>]*>%%i;
    $data =~ s%</?html[^>]*>%%ig;
    $data =~ s%</?body[^>]*>%%ig;
    $data =~ s%<head\s*>[\s\S]*</head\s*>%%i;

    ## Modify relative urls to absolute using BASE
    if ($base !~ /^\s*$/) {
        $data =~ s%(href\s*=\s*['"])([^'"]+)(['"])%
		   &addbase($base,$1,$2,$3)%gei;
        $data =~ s%(src\s*=\s*['"])([^'"]+)(['"])%
                   &addbase($base,$1,$2,$3)%gei;
    }

    ($title . $data);
}
##---------------------------------------------------------------------------
sub addbase {
    local($b, $pre, $u, $suf) = @_;
    local($ret);
    $u =~ s/^\s+//;
    if ($u =~ m%^$Url%o) {	# Non-relative URL, do nothing
        $ret = $pre . $u . $suf;
    } else {			# Relative URL
	if ($u =~ m%^/%) {		# Check for "/..."
	    $b =~ s%^(${Url}[^/]*)/.*%$1%o;	# Get hostname:port number
	}
        $ret = $pre . $b . $u . $suf;
    }
    $ret;
}
##---------------------------------------------------------------------------

1;