File: FilterProfiler.php

package info (click to toggle)
mediawiki 1%3A1.43.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 417,464 kB
  • sloc: php: 1,062,949; javascript: 664,290; sql: 9,714; python: 5,458; xml: 3,489; sh: 1,131; makefile: 64
file content (299 lines) | stat: -rw-r--r-- 8,195 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
<?php

namespace MediaWiki\Extension\AbuseFilter;

use MediaWiki\Config\ServiceOptions;
use MediaWiki\Title\Title;
use Psr\Log\LoggerInterface;
use Wikimedia\ObjectCache\BagOStuff;
use Wikimedia\Stats\IBufferingStatsdDataFactory;
use Wikimedia\WRStats\LocalEntityKey;
use Wikimedia\WRStats\WRStatsFactory;

/**
 * This class is used to create, store, and retrieve profiling information for single filters and
 * groups of filters.
 *
 * @internal
 */
class FilterProfiler {
	public const SERVICE_NAME = 'AbuseFilterFilterProfiler';

	public const CONSTRUCTOR_OPTIONS = [
		'AbuseFilterConditionLimit',
		'AbuseFilterSlowFilterRuntimeLimit',
	];

	/**
	 * How long to keep profiling data in cache (in seconds)
	 */
	private const STATS_STORAGE_PERIOD = BagOStuff::TTL_DAY;

	/** The stats time bucket size */
	private const STATS_TIME_STEP = self::STATS_STORAGE_PERIOD / 12;

	/** The WRStats spec common to all metrics */
	private const STATS_TEMPLATE = [
		'sequences' => [ [
			'timeStep' => self::STATS_TIME_STEP,
			'expiry' => self::STATS_STORAGE_PERIOD,
		] ],
	];

	private const KEY_PREFIX = 'abusefilter-profile';

	/** @var WRStatsFactory */
	private $statsFactory;

	/** @var ServiceOptions */
	private $options;

	/** @var string */
	private $localWikiID;

	/** @var IBufferingStatsdDataFactory */
	private $statsd;

	/** @var LoggerInterface */
	private $logger;

	/** @var array */
	private $statsSpecs;

	/**
	 * @param WRStatsFactory $statsFactory
	 * @param ServiceOptions $options
	 * @param string $localWikiID
	 * @param IBufferingStatsdDataFactory $statsd
	 * @param LoggerInterface $logger
	 */
	public function __construct(
		WRStatsFactory $statsFactory,
		ServiceOptions $options,
		string $localWikiID,
		IBufferingStatsdDataFactory $statsd,
		LoggerInterface $logger
	) {
		$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
		$this->statsFactory = $statsFactory;
		$this->options = $options;
		$this->localWikiID = $localWikiID;
		$this->statsd = $statsd;
		$this->logger = $logger;
		$this->statsSpecs = [
			'count' => self::STATS_TEMPLATE,
			'total' => self::STATS_TEMPLATE,
			'overflow' => self::STATS_TEMPLATE,
			'matches' => self::STATS_TEMPLATE,
			'total-time' => [ 'resolution' => 1e-3 ] + self::STATS_TEMPLATE,
			'total-cond' => self::STATS_TEMPLATE
		];
	}

	/**
	 * @param int $filter
	 */
	public function resetFilterProfile( int $filter ): void {
		$writer = $this->statsFactory->createWriter(
			$this->statsSpecs,
			self::KEY_PREFIX
		);
		$writer->resetAll( [ $this->filterProfileKey( $filter ) ] );
	}

	/**
	 * Retrieve per-filter statistics.
	 *
	 * @param int $filter
	 * @return array See self::NULL_FILTER_PROFILE for the returned array structure
	 * @phan-return array{count:int,matches:int,total-time:float,total-cond:int}
	 */
	public function getFilterProfile( int $filter ): array {
		$reader = $this->statsFactory->createReader(
			$this->statsSpecs,
			self::KEY_PREFIX
		);
		return $reader->total( $reader->getRates(
			[ 'count', 'matches', 'total-time', 'total-cond' ],
			$this->filterProfileKey( $filter ),
			$reader->latest( self::STATS_STORAGE_PERIOD )
		) );
	}

	/**
	 * Retrieve per-group statistics.
	 *
	 * @param string $group
	 * @return array See self::NULL_GROUP_PROFILE for the returned array structure
	 * @phan-return array{total:int,overflow:int,total-time:float,total-cond:int,matches:int}
	 */
	public function getGroupProfile( string $group ): array {
		$reader = $this->statsFactory->createReader(
			$this->statsSpecs,
			self::KEY_PREFIX
		);
		return $reader->total( $reader->getRates(
			[ 'total', 'overflow', 'total-time', 'total-cond', 'matches' ],
			$this->filterProfileGroupKey( $group ),
			$reader->latest( self::STATS_STORAGE_PERIOD )
		) );
	}

	/**
	 * Record per-filter profiling data
	 *
	 * @param int $filter
	 * @param float $time Time taken, in milliseconds
	 * @param int $conds
	 * @param bool $matched
	 */
	private function recordProfilingResult( int $filter, float $time, int $conds, bool $matched ): void {
		$key = $this->filterProfileKey( $filter );
		$writer = $this->statsFactory->createWriter(
			$this->statsSpecs,
			self::KEY_PREFIX
		);
		$writer->incr( 'count', $key );
		if ( $matched ) {
			$writer->incr( 'matches', $key );
		}
		$writer->incr( 'total-time', $key, $time );
		$writer->incr( 'total-cond', $key, $conds );
		$writer->flush();
	}

	/**
	 * Update global statistics
	 *
	 * @param string $group
	 * @param int $condsUsed The amount of used conditions
	 * @param float $totalTime Time taken, in milliseconds
	 * @param bool $anyMatch Whether at least one filter matched the action
	 */
	public function recordStats( string $group, int $condsUsed, float $totalTime, bool $anyMatch ): void {
		$writer = $this->statsFactory->createWriter(
			$this->statsSpecs,
			self::KEY_PREFIX
		);
		$key = $this->filterProfileGroupKey( $group );

		$writer->incr( 'total', $key );
		$writer->incr( 'total-time', $key, $totalTime );
		$writer->incr( 'total-cond', $key, $condsUsed );

		// Increment overflow counter, if our condition limit overflowed
		if ( $condsUsed > $this->options->get( 'AbuseFilterConditionLimit' ) ) {
			$writer->incr( 'overflow', $key );
		}

		// Increment counter by 1 if there was at least one match
		if ( $anyMatch ) {
			$writer->incr( 'matches', $key );
		}
		$writer->flush();
	}

	/**
	 * Record runtime profiling data for all filters together
	 *
	 * @param int $totalFilters
	 * @param int $totalConditions
	 * @param float $runtime
	 * @codeCoverageIgnore
	 */
	public function recordRuntimeProfilingResult( int $totalFilters, int $totalConditions, float $runtime ): void {
		$keyPrefix = 'abusefilter.runtime-profile.' . $this->localWikiID . '.';

		$this->statsd->timing( $keyPrefix . 'runtime', $runtime );
		$this->statsd->timing( $keyPrefix . 'total_filters', $totalFilters );
		$this->statsd->timing( $keyPrefix . 'total_conditions', $totalConditions );
	}

	/**
	 * Record per-filter profiling, for all filters
	 *
	 * @param Title $title
	 * @param array $data Profiling data
	 * @phan-param array<string,array{time:float,conds:int,result:bool}> $data
	 */
	public function recordPerFilterProfiling( Title $title, array $data ): void {
		$slowFilterThreshold = $this->options->get( 'AbuseFilterSlowFilterRuntimeLimit' );

		foreach ( $data as $filterName => $params ) {
			[ $filterID, $global ] = GlobalNameUtils::splitGlobalName( $filterName );
			// @todo Maybe add a parameter to recordProfilingResult to record global filters
			// data separately (in the foreign wiki)
			if ( !$global ) {
				$this->recordProfilingResult(
					$filterID,
					$params['time'],
					$params['conds'],
					$params['result']
				);
			}

			if ( $params['time'] > $slowFilterThreshold ) {
				$this->recordSlowFilter(
					$title,
					$filterName,
					$params['time'],
					$params['conds'],
					$params['result'],
					$global
				);
			}
		}
	}

	/**
	 * Logs slow filter's runtime data for later analysis
	 *
	 * @param Title $title
	 * @param string $filterId
	 * @param float $runtime
	 * @param int $totalConditions
	 * @param bool $matched
	 * @param bool $global
	 */
	private function recordSlowFilter(
		Title $title,
		string $filterId,
		float $runtime,
		int $totalConditions,
		bool $matched,
		bool $global
	): void {
		$this->logger->info(
			'Edit filter {filter_id} on {wiki} is taking longer than expected',
			[
				'wiki' => $this->localWikiID,
				'filter_id' => $filterId,
				'title' => $title->getPrefixedText(),
				'runtime' => $runtime,
				'matched' => $matched,
				'total_conditions' => $totalConditions,
				'global' => $global
			]
		);
	}

	/**
	 * Get the WRStats entity key used to store per-filter profiling data.
	 *
	 * @param int $filter
	 * @return LocalEntityKey
	 */
	private function filterProfileKey( int $filter ): LocalEntityKey {
		return new LocalEntityKey( [ 'filter', (string)$filter ] );
	}

	/**
	 * WRStats entity key used to store overall profiling data for rule groups
	 *
	 * @param string $group
	 * @return LocalEntityKey
	 */
	private function filterProfileGroupKey( string $group ): LocalEntityKey {
		return new LocalEntityKey( [ 'group', $group ] );
	}
}