File: RemoteIcuCollation.php

package info (click to toggle)
mediawiki 1%3A1.43.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 417,464 kB
  • sloc: php: 1,062,949; javascript: 664,290; sql: 9,714; python: 5,458; xml: 3,489; sh: 1,131; makefile: 64
file content (107 lines) | stat: -rw-r--r-- 2,723 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
<?php

use MediaWiki\Shell\ShellboxClientFactory;
use Shellbox\RPC\RpcClient;

/**
 * An ICU collation that uses a remote server to compute sort keys. This can be
 * used in conjunction with $wgTempCategoryCollations to migrate to a different
 * version of ICU.
 */
class RemoteIcuCollation extends Collation {
	private RpcClient $rpcClient;
	private string $locale;

	public function __construct( ShellboxClientFactory $shellboxClientFactory, string $locale ) {
		$this->rpcClient = $shellboxClientFactory->getRpcClient(
			[ 'service' => 'icu-collation' ] );
		$this->locale = $locale;
	}

	public function getSortKey( $string ) {
		return $this->getSortKeys( [ $string ] )[0];
	}

	/**
	 * Encode an array of binary strings as a string
	 *
	 * @param string[] $strings
	 * @return string
	 */
	private static function encode( $strings ) {
		$ret = '';
		foreach ( $strings as $s ) {
			$ret .= sprintf( "%08x", strlen( $s ) ) . $s;
		}
		return $ret;
	}

	/**
	 * Decode the value returned by encode()
	 *
	 * @param string $blob
	 * @return string[]
	 */
	private static function decode( $blob ) {
		$p = 0;
		$ret = [];
		while ( $p < strlen( $blob ) ) {
			$len = intval( substr( $blob, $p, 8 ), 16 );
			$p += 8;
			$ret[] = substr( $blob, $p, $len );
			$p += $len;
		}
		return $ret;
	}

	public function getSortKeys( $strings ) {
		if ( !count( $strings ) ) {
			return [];
		}
		$blob = $this->rpcClient->call(
			'icu-collation',
			self::class . '::' . 'doGetSortKeys',
			[
				$this->locale,
				self::encode( array_values( $strings ) )
			],
			[
				'classes' => [ parent::class, self::class ],
				'binary' => true
			]
		);
		return array_combine(
			array_keys( $strings ),
			self::decode( $blob )
		);
	}

	public function getFirstLetter( $string ) {
		// @phan-suppress-previous-line PhanPluginNeverReturnMethod
		throw new RuntimeException( __METHOD__ . ': not implemented' );
	}

	/**
	 * The remote entry point. Get sort keys for an encoded list of inputs.
	 *
	 * @param string $locale The ICU locale
	 * @param string $blob The input array encoded with encode()
	 * @return string The encoded result
	 */
	public static function doGetSortKeys( $locale, $blob ) {
		$mainCollator = Collator::create( $locale );
		if ( !$mainCollator ) {
			throw new RuntimeException( "Invalid ICU locale specified for collation: $locale" );
		}

		// If the special suffix for numeric collation is present, turn on numeric collation.
		if ( str_ends_with( $locale, '-u-kn' ) ) {
			$mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
		}
		$ret = [];
		foreach ( self::decode( $blob ) as $string ) {
			$ret[] = $mainCollator->getSortKey( $string );
		}
		return self::encode( $ret );
	}
}