1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
|
<?php
/**
* SafeHtmlChecker
*
* checks HTML against a subset of elements to ensure safety and XHTML validation.
*
* b2evolution - {@link http://b2evolution.net/}
* Released under GNU GPL License - {@link http://b2evolution.net/about/license.html}
* @copyright (c)2003-2005 by Francois PLANQUE - {@link http://fplanque.net/}
*
* @package evocore
* @author Simon Willison, 23rd Feb 2003, modified by fplanque, sakichan
*/
if( !defined('DB_USER') ) die( 'Please, do not access this page directly.' );
/**
* SafeHtmlChecker
*
* checks HTML against a subset of elements to ensure safety and XHTML validation.
*
* @package evocore
*/
class SafeHtmlChecker
{
var $tags; // Array showing allowed attributes for tags
var $tagattrs; // Array showing URI attributes
var $uri_attrs;
var $allowed_uri_scheme;
// Internal variables
var $parser;
var $stack = array();
var $last_checked_pos;
var $error;
/**
* Constructor
*
* {@internal This gets tested in _libs.misc.simpletest.php}}
*
* @param array
* @param array
* @param array
* @param array
* @param string Input encoding to use ('ISO-8859-1', 'UTF-8', 'US-ASCII' or '' for auto-detect)
*/
function SafeHtmlChecker( & $allowed_tags, & $allowed_attributes, & $uri_attrs, & $allowed_uri_scheme, $encoding = '' )
{
$this->tags = & $allowed_tags;
$this->tagattrs = & $allowed_attributes;
$this->uri_attrs = & $uri_attrs;
$this->allowed_uri_scheme = & $allowed_uri_scheme;
$encoding = strtoupper($encoding); // we might get 'iso-8859-1' for example
$this->encoding = $encoding;
if( ! in_array( $encoding, array( 'ISO-8859-1', 'UTF-8', 'US-ASCII' ) ) )
{ // passed encoding not supported by xml_parser_create()
$this->xml_parser_encoding = ''; // auto-detect (in PHP4, in PHP5 anyway)
}
else
{
$this->xml_parser_encoding = $this->encoding;
}
$this->parser = xml_parser_create( $this->xml_parser_encoding );
$this->last_checked_pos = 0;
$this->error = false;
// Creates the parser
xml_set_object( $this->parser, $this);
// set functions to call when a start or end tag is encountered
xml_set_element_handler($this->parser, 'tag_open', 'tag_close');
// set function to call for the actual data
xml_set_character_data_handler($this->parser, 'cdata');
xml_set_default_handler($this->parser, 'default_handler');
xml_set_external_entity_ref_handler($this->parser, 'external_entity');
xml_set_unparsed_entity_decl_handler($this->parser, 'unparsed_entity');
xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false);
}
function default_handler( $parser, $data)
{
// echo 'default handler: '.$data.'<br />';
}
function external_entity( $parser, $open_entity_names, $base, $system_id, $public_id)
{
// echo 'external_entity<br />';
}
function unparsed_entity( $parser, $entity_name, $base, $system_id, $public_id, $notation_name)
{
// echo 'unparsed_entity<br />';
}
/**
* check(-)
*/
function check($xhtml)
{
// Convert encoding:
if( empty($this->xml_parser_encoding) || $this->encoding != $this->xml_parser_encoding )
{ // we need to convert encoding:
if( function_exists( 'mb_convert_encoding' ) )
{ // we can convert encoding to UTF-8
$this->encoding = 'UTF-8';
// Convert XHTML:
$xhtml = mb_convert_encoding( $xhtml, 'UTF-8' );
}
}
// Open comments or '<![CDATA[' are dangerous
$xhtml = str_replace('<!', '', $xhtml);
// Convert isolated & chars
$xhtml = preg_replace( '#(\s)&(\s)#', '\\1&\\2', $xhtml );
$xhtml_head = '<?xml version="1.0"';
if( ! empty($this->encoding) )
{
$xhtml_head .= ' encoding="'.$this->encoding.'"';
}
$xhtml_head .= '?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">';
$xhtml = $xhtml_head.'<body>'.$xhtml.'</body>';
if( !xml_parse($this->parser, $xhtml) )
{
$xml_error_code = xml_get_error_code( $this->parser );
$xml_error_string = xml_error_string( $xml_error_code );
switch( $xml_error_code )
{
case XML_ERROR_TAG_MISMATCH:
$xml_error_string .= ': <code>'.$this->stack[count($this->stack)-1].'</code>';
break;
}
$pos = xml_get_current_byte_index($this->parser);
$xml_error_string .= ' near <code>'.htmlspecialchars( substr( $xhtml, $this->last_checked_pos, $pos-$this->last_checked_pos+20 ) ).'</code>';
$this->html_error( T_('Parser error: ').$xml_error_string );
}
}
/**
* tag_open(-)
*
* Called when the parser finds an opening tag
*/
function tag_open($parser, $tag, $attrs)
{
// echo "processing tag: $tag <br />\n";
$this->last_checked_pos = xml_get_current_byte_index($this->parser);
if ($tag == 'body')
{
if( count($this->stack) > 0 )
$this->html_error( T_('Tag <code>body</code> can only be used once!') );
$this->stack[] = $tag;
return;
}
$previous = $this->stack[count($this->stack)-1];
// If previous tag is illegal, no point in running tests
if (!in_array($previous, array_keys($this->tags))) {
$this->stack[] = $tag;
return;
}
// Is tag a legal tag?
if (!in_array($tag, array_keys($this->tags))) {
$this->html_error( T_('Illegal tag'). ": <code>$tag</code>" );
$this->stack[] = $tag;
return;
}
// Is tag allowed in the current context?
if (!in_array($tag, explode(' ', $this->tags[$previous]))) {
if ($previous == 'body') {
$this->html_error( sprintf( T_('Tag %s must occur inside another tag'), '<code>'.$tag.'</code>' ) );
} else {
$this->html_error( sprintf( T_('Tag %s is not allowed within tag %s'), '<code>'.$tag.'</code>', '<code>'.$previous.'</code>') );
}
}
// Are tag attributes valid?
foreach( $attrs as $attr => $value )
{
if (!isset($this->tagattrs[$tag]) || !in_array($attr, explode(' ', $this->tagattrs[$tag])))
{
$this->html_error( sprintf( T_('Tag %s may not have attribute %s'), '<code>'.$tag.'</code>', '<code>'.$attr.'</code>' ) );
}
if (in_array($attr, $this->uri_attrs))
{ // Must this attribute be checked for URIs
$matches = array();
$value = trim($value);
if( $error = validate_url( $value, $this->allowed_uri_scheme ) )
{
$this->html_error( T_('Found invalid URL: ').$error );
}
}
}
// Set previous, used for checking nesting context rules
$this->stack[] = $tag;
}
/**
* cdata(-)
*/
function cdata($parser, $cdata)
{
$this->last_checked_pos = xml_get_current_byte_index($this->parser);
// Simply check that the 'previous' tag allows CDATA
$previous = $this->stack[count($this->stack)-1];
// If previous tag is illegal, no point in running test
if (!in_array($previous, array_keys($this->tags))) {
return;
}
if (trim($cdata) != '') {
if (!in_array('#PCDATA', explode(' ', $this->tags[$previous]))) {
$this->html_error( sprintf( T_('Tag %s may not contain raw character data'), '<code>'.$previous.'</code>' ) );
}
}
}
/**
* tag_close(-)
*/
function tag_close($parser, $tag)
{
$this->last_checked_pos = xml_get_current_byte_index($this->parser);
// Move back one up the stack
array_pop($this->stack);
}
function html_error( $string )
{
$this->error = true;
errors_add( $string );
}
/**
* isOK(-)
*/
function isOK()
{
return ! $this->error;
}
}
?>
|