1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
|
<?php
/**
* HTML Purifier's internal representation of a URI.
* @note
* Internal data-structures are completely escaped. If the data needs
* to be used in a non-URI context (which is very unlikely), be sure
* to decode it first. The URI may not necessarily be well-formed until
* validate() is called.
*/
class HTMLPurifier_URI
{
public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
/**
* @note Automatically normalizes scheme and port
*/
public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) {
$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
$this->userinfo = $userinfo;
$this->host = $host;
$this->port = is_null($port) ? $port : (int) $port;
$this->path = $path;
$this->query = $query;
$this->fragment = $fragment;
}
/**
* Retrieves a scheme object corresponding to the URI's scheme/default
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return Scheme object appropriate for validating this URI
*/
public function getSchemeObj($config, $context) {
$registry = HTMLPurifier_URISchemeRegistry::instance();
if ($this->scheme !== null) {
$scheme_obj = $registry->getScheme($this->scheme, $config, $context);
if (!$scheme_obj) return false; // invalid scheme, clean it out
} else {
// no scheme: retrieve the default one
$def = $config->getDefinition('URI');
$scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
if (!$scheme_obj) {
// something funky happened to the default scheme object
trigger_error(
'Default scheme object "' . $def->defaultScheme . '" was not readable',
E_USER_WARNING
);
return false;
}
}
return $scheme_obj;
}
/**
* Generic validation method applicable for all schemes. May modify
* this URI in order to get it into a compliant form.
* @param $config Instance of HTMLPurifier_Config
* @param $context Instance of HTMLPurifier_Context
* @return True if validation/filtering succeeds, false if failure
*/
public function validate($config, $context) {
// ABNF definitions from RFC 3986
$chars_sub_delims = '!$&\'()*+,;=';
$chars_gen_delims = ':/?#[]@';
$chars_pchar = $chars_sub_delims . ':@';
// validate scheme (MUST BE FIRST!)
if (!is_null($this->scheme) && is_null($this->host)) {
$def = $config->getDefinition('URI');
if ($def->defaultScheme === $this->scheme) {
$this->scheme = null;
}
}
// validate host
if (!is_null($this->host)) {
$host_def = new HTMLPurifier_AttrDef_URI_Host();
$this->host = $host_def->validate($this->host, $config, $context);
if ($this->host === false) $this->host = null;
}
// validate username
if (!is_null($this->userinfo)) {
$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
$this->userinfo = $encoder->encode($this->userinfo);
}
// validate port
if (!is_null($this->port)) {
if ($this->port < 1 || $this->port > 65535) $this->port = null;
}
// validate path
$path_parts = array();
$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
if (!is_null($this->host)) {
// path-abempty (hier and relative)
$this->path = $segments_encoder->encode($this->path);
} elseif ($this->path !== '' && $this->path[0] === '/') {
// path-absolute (hier and relative)
if (strlen($this->path) >= 2 && $this->path[1] === '/') {
// This shouldn't ever happen!
$this->path = '';
} else {
$this->path = $segments_encoder->encode($this->path);
}
} elseif (!is_null($this->scheme) && $this->path !== '') {
// path-rootless (hier)
// Short circuit evaluation means we don't need to check nz
$this->path = $segments_encoder->encode($this->path);
} elseif (is_null($this->scheme) && $this->path !== '') {
// path-noscheme (relative)
// (once again, not checking nz)
$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
$c = strpos($this->path, '/');
if ($c !== false) {
$this->path =
$segment_nc_encoder->encode(substr($this->path, 0, $c)) .
$segments_encoder->encode(substr($this->path, $c));
} else {
$this->path = $segment_nc_encoder->encode($this->path);
}
} else {
// path-empty (hier and relative)
$this->path = ''; // just to be safe
}
// qf = query and fragment
$qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
if (!is_null($this->query)) {
$this->query = $qf_encoder->encode($this->query);
}
if (!is_null($this->fragment)) {
$this->fragment = $qf_encoder->encode($this->fragment);
}
return true;
}
/**
* Convert URI back to string
* @return String URI appropriate for output
*/
public function toString() {
// reconstruct authority
$authority = null;
if (!is_null($this->host)) {
$authority = '';
if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@';
$authority .= $this->host;
if(!is_null($this->port)) $authority .= ':' . $this->port;
}
// reconstruct the result
$result = '';
if (!is_null($this->scheme)) $result .= $this->scheme . ':';
if (!is_null($authority)) $result .= '//' . $authority;
$result .= $this->path;
if (!is_null($this->query)) $result .= '?' . $this->query;
if (!is_null($this->fragment)) $result .= '#' . $this->fragment;
return $result;
}
}
// vim: et sw=4 sts=4
|