1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
|
// Copyright 2013 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <string_view>
#include "base/check.h"
#include "url/third_party/mozilla/url_parse.h"
#include "url/url_file.h"
#include "url/url_parse_internal.h"
// Interesting IE file:isms...
//
// INPUT OUTPUT
// ========================= ==============================
// file:/foo/bar file:///foo/bar
// The result here seems totally invalid!?!? This isn't UNC.
//
// file:/
// file:// or any other number of slashes
// IE6 doesn't do anything at all if you click on this link. No error:
// nothing. IE6's history system seems to always color this link, so I'm
// guessing that it maps internally to the empty URL.
//
// C:\ file:///C:/
// When on a file: URL source page, this link will work. When over HTTP,
// the file: URL will appear in the status bar but the link will not work
// (security restriction for all file URLs).
//
// file:foo/ file:foo/ (invalid?!?!?)
// file:/foo/ file:///foo/ (invalid?!?!?)
// file://foo/ file://foo/ (UNC to server "foo")
// file:///foo/ file:///foo/ (invalid, seems to be a file)
// file:////foo/ file://foo/ (UNC to server "foo")
// Any more than four slashes is also treated as UNC.
//
// file:C:/ file://C:/
// file:/C:/ file://C:/
// The number of slashes after "file:" don't matter if the thing following
// it looks like an absolute drive path. Also, slashes and backslashes are
// equally valid here.
namespace url {
namespace {
// Returns the index of the next slash in the input after the given index, or
// `spec.size()` if the end of the input is reached.
template <typename CharT>
size_t FindNextSlash(std::basic_string_view<CharT> spec, size_t begin_index) {
size_t idx = begin_index;
while (idx < spec.size() && !IsSlashOrBackslash(spec[idx])) {
idx++;
}
return idx;
}
// A subcomponent of DoParseFileURL, the input of this function should be a UNC
// path name, with the index of the first character after the slashes following
// the scheme given in `after_slashes`. This will initialize the host, path,
// query, and ref, and leave the other output components untouched
// (DoParseFileURL handles these for us).
template <typename CharT>
void DoParseUNC(std::basic_string_view<CharT> url,
size_t after_slashes,
Parsed* parsed) {
int url_len = base::checked_cast<int>(url.size());
// The cast is safe because `FindNextSlash` will never return anything longer
// than `url_len`.
int next_slash = static_cast<int>(FindNextSlash(url, after_slashes));
// Everything up until that first slash we found (or end of string) is the
// host name, which will end up being the UNC host. For example,
// "file://foo/bar.txt" will get a server name of "foo" and a path of "/bar".
// Later, on Windows, this should be treated as the filename "\\foo\bar.txt"
// in proper UNC notation.
if (after_slashes < static_cast<size_t>(next_slash)) {
parsed->host = MakeRange(after_slashes, next_slash);
} else {
parsed->host.reset();
}
if (next_slash < url_len) {
ParsePathInternal(url.data(), MakeRange(next_slash, url_len), &parsed->path,
&parsed->query, &parsed->ref);
} else {
parsed->path.reset();
}
}
// A subcomponent of DoParseFileURL, the input should be a local file, with the
// beginning of the path indicated by the index in `path_begin`. This will
// initialize the host, path, query, and ref, and leave the other output
// components untouched (DoParseFileURL handles these for us).
template <typename CharT>
void DoParseLocalFile(std::basic_string_view<CharT> url,
int path_begin,
Parsed* parsed) {
parsed->host.reset();
ParsePathInternal(url.data(),
MakeRange(path_begin, base::checked_cast<int>(url.size())),
&parsed->path, &parsed->query, &parsed->ref);
}
// Backend for the external functions that operates on either char type.
// Handles cases where there is a scheme, but also when handed the first
// character following the "file:" at the beginning of the spec. If so,
// this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
template <typename CharT>
Parsed DoParseFileURL(std::basic_string_view<CharT> url) {
// Strip leading & trailing spaces and control characters.
int begin = 0;
int url_len = base::checked_cast<int>(url.size());
TrimURL(url.data(), &begin, &url_len);
// Find the scheme, if any.
int num_slashes = CountConsecutiveSlashes(url.data(), begin, url_len);
int after_scheme;
size_t after_slashes;
Parsed parsed;
#ifdef WIN32
// See how many slashes there are. We want to handle cases like UNC but also
// "/c:/foo". This is when there is no scheme, so we can allow pages to do
// links like "c:/foo/bar" or "//foo/bar". This is also called by the
// relative URL resolver when it determines there is an absolute URL, which
// may give us input like "/c:/foo".
after_slashes = begin + num_slashes;
if (DoesBeginWindowsDriveSpec(url.data(), after_slashes, url_len)) {
// Windows path, don't try to extract the scheme (for example, "c:\foo").
after_scheme = after_slashes;
} else if (DoesBeginUNCPath(url.data(), begin, url_len, false)) {
// Windows UNC path: don't try to extract the scheme, but keep the slashes.
after_scheme = begin;
} else
#endif
{
// ExtractScheme doesn't understand the possibility of filenames with
// colons in them, in which case it returns the entire spec up to the
// colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
// the foo.c: scheme.
if (!num_slashes &&
ExtractScheme(&url[begin], url_len - begin, &parsed.scheme)) {
// Offset the results since we gave ExtractScheme a substring.
parsed.scheme.begin += begin;
after_scheme = parsed.scheme.end() + 1;
} else {
// No scheme found, remember that.
parsed.scheme.reset();
after_scheme = begin;
}
}
// Handle empty specs ones that contain only whitespace or control chars,
// or that are just the scheme (for example "file:").
if (after_scheme == url_len) {
return parsed;
}
num_slashes = CountConsecutiveSlashes(url.data(), after_scheme, url_len);
after_slashes = after_scheme + num_slashes;
#ifdef WIN32
// Check whether the input is a drive again. We checked above for windows
// drive specs, but that's only at the very beginning to see if we have a
// scheme at all. This test will be duplicated in that case, but will
// additionally handle all cases with a real scheme such as "file:///C:/".
if (!DoesBeginWindowsDriveSpec(url.data(), after_slashes, url_len) &&
num_slashes != 3) {
// Anything not beginning with a drive spec ("c:\") on Windows is treated
// as UNC, with the exception of three slashes which always means a file.
// Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
DoParseUNC(url.substr(0, url_len), after_slashes, &parsed);
return parsed;
}
#else
// file: URL with exactly 2 slashes is considered to have a host component.
if (num_slashes == 2) {
DoParseUNC(url.substr(0, url_len), after_slashes, &parsed);
return parsed;
}
#endif // WIN32
// Easy and common case, the full path immediately follows the scheme
// (modulo slashes), as in "file://c:/foo". Just treat everything from
// there to the end as the path. Empty hosts have 0 length instead of -1.
// We include the last slash as part of the path if there is one.
DoParseLocalFile(
url.substr(0, url_len),
num_slashes > 0 ? after_scheme + num_slashes - 1 : after_scheme, &parsed);
return parsed;
}
} // namespace
Parsed ParseFileURL(std::string_view url) {
return DoParseFileURL(url);
}
Parsed ParseFileURL(std::u16string_view url) {
return DoParseFileURL(url);
}
} // namespace url
|