File: PathMatcher.php

package info (click to toggle)
mediawiki 1%3A1.43.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 417,464 kB
  • sloc: php: 1,062,949; javascript: 664,290; sql: 9,714; python: 5,458; xml: 3,489; sh: 1,131; makefile: 64
file content (236 lines) | stat: -rw-r--r-- 7,167 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
<?php

namespace MediaWiki\Rest\PathTemplateMatcher;

/**
 * A tree-based path routing algorithm.
 *
 * This container builds defined routing templates into a tree, allowing
 * paths to be efficiently matched against all templates. The match time is
 * independent of the number of registered path templates.
 *
 * Efficient matching comes at the cost of a potentially significant setup time.
 * We measured ~10ms for 1000 templates. Using getCacheData() and
 * newFromCache(), this setup time may be amortized over multiple requests.
 */
class PathMatcher {
	/**
	 * An array of trees indexed by the number of path components in the input.
	 *
	 * A tree node consists of an associative array in which the key is a match
	 * specifier string, and the value is another node. A leaf node, which is
	 * identifiable by its fixed depth in the tree, consists of an associative
	 * array with the following keys:
	 *   - template: The path template string
	 *   - paramNames: A list of parameter names extracted from the template
	 *   - userData: The user data supplied to add()
	 *
	 * A match specifier string may be either "*", which matches any path
	 * component, or a literal string prefixed with "=", which matches the
	 * specified deprefixed string literal.
	 *
	 * @var array
	 */
	private $treesByLength = [];

	/**
	 * Create a PathMatcher from cache data
	 *
	 * @param array $data The data array previously returned by getCacheData()
	 * @return PathMatcher
	 */
	public static function newFromCache( $data ) {
		$matcher = new self;
		$matcher->treesByLength = $data;
		return $matcher;
	}

	/**
	 * Get a data array for later use by newFromCache().
	 *
	 * The internal format is private to PathMatcher, but note that it includes
	 * any data passed as $userData to add(). The array returned will be
	 * serializable as long as all $userData values are serializable.
	 *
	 * @return array
	 */
	public function getCacheData() {
		return $this->treesByLength;
	}

	/**
	 * Determine whether a path template component is a parameter
	 *
	 * @param string $part
	 * @return bool
	 */
	private function isParam( $part ) {
		$partLength = strlen( $part );
		return $partLength > 2 && $part[0] === '{' && $part[$partLength - 1] === '}';
	}

	/**
	 * If a path template component is a parameter, return the parameter name.
	 * Otherwise, return false.
	 *
	 * @param string $part
	 * @return string|false
	 */
	private function getParamName( $part ) {
		if ( $this->isParam( $part ) ) {
			return substr( $part, 1, -1 );
		} else {
			return false;
		}
	}

	/**
	 * Recursively search the match tree, checking whether the proposed path
	 * template, passed as an array of component parts, can be added to the
	 * matcher without ambiguity.
	 *
	 * Ambiguity means that a path exists which matches multiple templates.
	 *
	 * The function calls itself recursively, incrementing $index so as to
	 * ignore a prefix of the input, in order to check deeper parts of the
	 * match tree.
	 *
	 * If a conflict is discovered, the conflicting leaf node is returned.
	 * Otherwise, false is returned.
	 *
	 * @param array $node The tree node to check against
	 * @param string[] $parts The array of path template parts
	 * @param int $index The current index into $parts
	 * @return array|false
	 */
	private function findConflict( $node, $parts, $index = 0 ) {
		if ( $index >= count( $parts ) ) {
			// If we reached the leaf node then a conflict is detected
			return $node;
		}
		$part = $parts[$index];
		$result = false;

		if ( $this->isParam( $part ) ) {
			foreach ( $node as $childNode ) {
				// Params and tree entries for trailing empty slashes ('=') do not conflict
				if ( !isset( $node['='] ) ) {
					$result = $this->findConflict( $childNode, $parts, $index + 1 );
					if ( $result !== false ) {
						break;
					}
				}
			}
		} else {
			if ( isset( $node["=$part"] ) ) {
				$result = $this->findConflict( $node["=$part"], $parts, $index + 1 );
			}

			// Trailing empty slashes (aka an empty $part) do not conflict with params
			if ( $result === false && isset( $node['*'] ) && $part !== '' ) {
				$result = $this->findConflict( $node['*'], $parts, $index + 1 );
			}
		}
		return $result;
	}

	/**
	 * Add a template to the matcher.
	 *
	 * The path template consists of components separated by "/". Each component
	 * may be either a parameter of the form {paramName}, or a literal string.
	 * A parameter matches any input path component, whereas a literal string
	 * matches itself.
	 *
	 * Path templates must not conflict with each other, that is, any input
	 * path must match at most one path template. If a path template conflicts
	 * with another already registered, this function throws a PathConflict
	 * exception.
	 *
	 * @param string $template The path template
	 * @param mixed $userData User data used to identify the matched route to
	 *   the caller of match()
	 * @throws PathConflict|PathSegmentException
	 */
	public function add( $template, $userData ) {
		// This will produce an empty part before any leading slash and after any trailing slash
		$parts = explode( '/', $template );
		$length = count( $parts );
		if ( !isset( $this->treesByLength[$length] ) ) {
			$this->treesByLength[$length] = [];
		}
		$tree =& $this->treesByLength[$length];

		// Disallow empty path parts within the path (but not leading or trailing).
		for ( $i = 1; $i < $length - 1; $i++ ) {
			if ( $parts[$i] == '' ) {
				throw new PathSegmentException( $template, $userData );
			}
		}

		$conflict = $this->findConflict( $tree, $parts );
		if ( $conflict !== false ) {
			throw new PathConflict( $template, $userData, $conflict );
		}

		$params = [];
		foreach ( $parts as $index => $part ) {
			$paramName = $this->getParamName( $part );
			if ( $paramName !== false ) {
				$params[] = $paramName;
				$key = '*';
			} else {
				$key = "=$part";
			}
			if ( $index === $length - 1 ) {
				$tree[$key] = [
					'template' => $template,
					'paramNames' => $params,
					'userData' => $userData
				];
			} elseif ( !isset( $tree[$key] ) ) {
				$tree[$key] = [];
			}
			$tree =& $tree[$key];
		}
	}

	/**
	 * Match a path against the current match trees.
	 *
	 * If the path matches a previously added path template, an array will be
	 * returned with the following keys:
	 *   - params: An array mapping parameter names to their detected values
	 *   - userData: The user data passed to add(), which identifies the route
	 *
	 * If the path does not match any template, false is returned.
	 *
	 * @param string $path
	 * @return array|false
	 */
	public function match( $path ) {
		$parts = explode( '/', $path );
		$length = count( $parts );
		if ( !isset( $this->treesByLength[$length] ) ) {
			return false;
		}
		$node = $this->treesByLength[$length];

		$paramValues = [];
		foreach ( $parts as $part ) {
			if ( isset( $node["=$part"] ) ) {
				$node = $node["=$part"];
			} elseif ( isset( $node['*'] ) ) {
				$node = $node['*'];
				$paramValues[] = $part;
			} else {
				return false;
			}
		}

		return [
			'params' => array_combine( $node['paramNames'], $paramValues ),
			'userData' => $node['userData']
		];
	}
}