1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
|
//! Determine which URLs are allowed for loading.
use std::fmt;
use std::ops::Deref;
use url::Url;
use crate::error::AllowedUrlError;
/// Decides which URLs are allowed to be loaded.
///
/// Currently only contains the base URL.
///
/// The plan is to add:
/// base_only: Only allow to load content from the same base URL. By default
// this restriction is enabled and requires to provide base_url.
/// include_xml: Allows to use xi:include with XML. Enabled by default.
/// include_text: Allows to use xi:include with text. Enabled by default.
/// local_only: Only allow to load content from the local filesystem.
/// Enabled by default.
#[derive(Clone)]
pub struct UrlResolver {
/// Base URL; all relative references will be resolved with respect to this.
pub base_url: Option<Url>,
}
impl UrlResolver {
/// Creates a `UrlResolver` with defaults, and sets the `base_url`.
pub fn new(base_url: Option<Url>) -> Self {
UrlResolver { base_url }
}
/// Decides which URLs are allowed to be loaded based on the presence of a base URL.
///
/// This function implements the policy described in "Security and locations of
/// referenced files" in the [crate
/// documentation](index.html#security-and-locations-of-referenced-files).
pub fn resolve_href(&self, href: &str) -> Result<AllowedUrl, AllowedUrlError> {
let url = Url::options()
.base_url(self.base_url.as_ref())
.parse(href)
.map_err(AllowedUrlError::UrlParseError)?;
// Allow loads of data: from any location
if url.scheme() == "data" {
return Ok(AllowedUrl(url));
}
// Queries are not allowed.
if url.query().is_some() {
return Err(AllowedUrlError::NoQueriesAllowed);
}
// Fragment identifiers are not allowed. They should have been stripped
// upstream, by NodeId.
if url.fragment().is_some() {
return Err(AllowedUrlError::NoFragmentIdentifierAllowed);
}
// All other sources require a base url
if self.base_url.is_none() {
return Err(AllowedUrlError::BaseRequired);
}
let base_url = self.base_url.as_ref().unwrap();
// Deny loads from differing URI schemes
if url.scheme() != base_url.scheme() {
return Err(AllowedUrlError::DifferentUriSchemes);
}
// resource: is allowed to load anything from other resources
if url.scheme() == "resource" {
return Ok(AllowedUrl(url));
}
// Non-file: isn't allowed to load anything
if url.scheme() != "file" {
return Err(AllowedUrlError::DisallowedScheme);
}
// The rest of this function assumes file: URLs; guard against
// incorrect refactoring.
assert!(url.scheme() == "file");
// If we have a base_uri of "file:///foo/bar.svg", and resolve an href of ".",
// Url.parse() will give us "file:///foo/". We don't want that, so check
// if the last path segment is empty - it will not be empty for a normal file.
if let Some(segments) = url.path_segments() {
if segments
.last()
.expect("URL path segments always contain at last 1 element")
.is_empty()
{
return Err(AllowedUrlError::NotSiblingOrChildOfBaseFile);
}
} else {
unreachable!("the file: URL cannot have an empty path");
}
// We have two file: URIs. Now canonicalize them (remove .. and symlinks, etc.)
// and see if the directories match
let url_path = url
.to_file_path()
.map_err(|_| AllowedUrlError::InvalidPath)?;
let base_path = base_url
.to_file_path()
.map_err(|_| AllowedUrlError::InvalidPath)?;
let base_parent = base_path.parent();
if base_parent.is_none() {
return Err(AllowedUrlError::BaseIsRoot);
}
let base_parent = base_parent.unwrap();
let path_canon = url_path
.canonicalize()
.map_err(|_| AllowedUrlError::CanonicalizationError)?;
let parent_canon = base_parent
.canonicalize()
.map_err(|_| AllowedUrlError::CanonicalizationError)?;
if path_canon.starts_with(parent_canon) {
// Finally, convert the canonicalized path back to a URL.
let path_to_url = Url::from_file_path(path_canon).unwrap();
Ok(AllowedUrl(path_to_url))
} else {
Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
}
}
}
/// Wrapper for URLs which are allowed to be loaded
///
/// SVG files can reference other files (PNG/JPEG images, other SVGs,
/// CSS files, etc.). This object is constructed by checking whether
/// a specified `href` (a possibly-relative filename, for example)
/// should be allowed to be loaded, given the base URL of the SVG
/// being loaded.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct AllowedUrl(Url);
impl Deref for AllowedUrl {
type Target = Url;
fn deref(&self) -> &Url {
&self.0
}
}
impl fmt::Display for AllowedUrl {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn disallows_relative_file_with_no_base_file() {
let url_resolver = UrlResolver::new(None);
assert!(matches!(
url_resolver.resolve_href("foo.svg"),
Err(AllowedUrlError::UrlParseError(
url::ParseError::RelativeUrlWithoutBase
))
));
}
#[test]
fn disallows_different_schemes() {
let url_resolver = UrlResolver::new(Some(
Url::parse("http://example.com/malicious.svg").unwrap(),
));
assert!(matches!(
url_resolver.resolve_href("file:///etc/passwd"),
Err(AllowedUrlError::DifferentUriSchemes)
));
}
fn make_file_uri(p: &str) -> String {
if cfg!(windows) {
format!("file:///c:{}", p)
} else {
format!("file://{}", p)
}
}
#[test]
fn disallows_base_is_root() {
let url_resolver = UrlResolver::new(Some(Url::parse(&make_file_uri("/")).unwrap()));
assert!(matches!(
url_resolver.resolve_href("foo.svg"),
Err(AllowedUrlError::BaseIsRoot)
));
}
#[test]
fn disallows_non_file_scheme() {
let url_resolver = UrlResolver::new(Some(Url::parse("http://foo.bar/baz.svg").unwrap()));
assert!(matches!(
url_resolver.resolve_href("foo.svg"),
Err(AllowedUrlError::DisallowedScheme)
));
}
#[test]
fn allows_data_url_with_no_base_file() {
let url_resolver = UrlResolver::new(None);
assert_eq!(
url_resolver
.resolve_href("")
.unwrap()
.as_ref(),
"",
);
}
fn url_from_test_fixtures(filename_relative_to_librsvg_srcdir: &str) -> Url {
let path = PathBuf::from(filename_relative_to_librsvg_srcdir);
let absolute = path
.canonicalize()
.expect("files from test fixtures are supposed to canonicalize");
Url::from_file_path(absolute).unwrap()
}
/*#[test]
fn allows_relative() {
let base_url = url_from_test_fixtures("tests/fixtures/loading/bar.svg");
let url_resolver = UrlResolver::new(Some(base_url));
let resolved = url_resolver.resolve_href("foo.svg").unwrap();
let resolved_str = resolved.as_str();
assert!(resolved_str.ends_with("/loading/foo.svg"));
}
#[test]
fn allows_sibling() {
let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
"tests/fixtures/loading/bar.svg",
)));
let resolved = url_resolver
.resolve_href(url_from_test_fixtures("tests/fixtures/loading/foo.svg").as_str())
.unwrap();
let resolved_str = resolved.as_str();
assert!(resolved_str.ends_with("/loading/foo.svg"));
}
#[test]
fn allows_child_of_sibling() {
let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
"tests/fixtures/loading/bar.svg",
)));
let resolved = url_resolver
.resolve_href(url_from_test_fixtures("tests/fixtures/loading/subdir/baz.svg").as_str())
.unwrap();
let resolved_str = resolved.as_str();
assert!(resolved_str.ends_with("/loading/subdir/baz.svg"));
}
// Ignore on Windows since we test for /etc/passwd
#[cfg(unix)]
#[test]
fn disallows_non_sibling() {
let url_resolver = UrlResolver::new(Some(url_from_test_fixtures(
"tests/fixtures/loading/bar.svg",
)));
assert!(matches!(
url_resolver.resolve_href(&make_file_uri("/etc/passwd")),
Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
));
}*/
#[test]
fn disallows_queries() {
let url_resolver = UrlResolver::new(Some(
Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
));
assert!(matches!(
url_resolver.resolve_href(".?../../../../../../../../../../etc/passwd"),
Err(AllowedUrlError::NoQueriesAllowed)
));
}
#[test]
fn disallows_weird_relative_uris() {
let url_resolver = UrlResolver::new(Some(
Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
));
assert!(url_resolver
.resolve_href(".@../../../../../../../../../../etc/passwd")
.is_err());
assert!(url_resolver
.resolve_href(".$../../../../../../../../../../etc/passwd")
.is_err());
assert!(url_resolver
.resolve_href(".%../../../../../../../../../../etc/passwd")
.is_err());
assert!(url_resolver
.resolve_href(".*../../../../../../../../../../etc/passwd")
.is_err());
assert!(url_resolver
.resolve_href("~/../../../../../../../../../../etc/passwd")
.is_err());
}
#[test]
fn disallows_dot_sibling() {
let url_resolver = UrlResolver::new(Some(
Url::parse(&make_file_uri("/example/bar.svg")).unwrap(),
));
assert!(matches!(
url_resolver.resolve_href("."),
Err(AllowedUrlError::NotSiblingOrChildOfBaseFile)
));
assert!(matches!(
url_resolver.resolve_href(".#../../../../../../../../../../etc/passwd"),
Err(AllowedUrlError::NoFragmentIdentifierAllowed)
));
}
#[test]
fn disallows_fragment() {
// UrlResolver::resolve_href() explicitly disallows fragment identifiers.
// This is because they should have been stripped before calling that function,
// by NodeId or the Iri machinery.
let url_resolver =
UrlResolver::new(Some(Url::parse("https://example.com/foo.svg").unwrap()));
assert!(matches!(
url_resolver.resolve_href("bar.svg#fragment"),
Err(AllowedUrlError::NoFragmentIdentifierAllowed)
));
}
#[cfg(windows)]
#[test]
fn invalid_url_from_test_suite() {
// This is required for Url to panic.
let resolver =
UrlResolver::new(Some(Url::parse("file:///c:/foo.svg").expect("initial url")));
// With this, it doesn't panic:
// let resolver = UrlResolver::new(None);
// The following panics, when using a base URL
// match resolver.resolve_href("file://invalid.css") {
// so, use a less problematic case, hopefully
match resolver.resolve_href("file://") {
Ok(_) => println!("yay!"),
Err(e) => println!("err: {}", e),
}
}
}
|