1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
<?php
use MediaWiki\Shell\ShellboxClientFactory;
use Shellbox\RPC\RpcClient;
/**
* An ICU collation that uses a remote server to compute sort keys. This can be
* used in conjunction with $wgTempCategoryCollations to migrate to a different
* version of ICU.
*/
class RemoteIcuCollation extends Collation {
private RpcClient $rpcClient;
private string $locale;
public function __construct( ShellboxClientFactory $shellboxClientFactory, string $locale ) {
$this->rpcClient = $shellboxClientFactory->getRpcClient(
[ 'service' => 'icu-collation' ] );
$this->locale = $locale;
}
public function getSortKey( $string ) {
return $this->getSortKeys( [ $string ] )[0];
}
/**
* Encode an array of binary strings as a string
*
* @param string[] $strings
* @return string
*/
private static function encode( $strings ) {
$ret = '';
foreach ( $strings as $s ) {
$ret .= sprintf( "%08x", strlen( $s ) ) . $s;
}
return $ret;
}
/**
* Decode the value returned by encode()
*
* @param string $blob
* @return string[]
*/
private static function decode( $blob ) {
$p = 0;
$ret = [];
while ( $p < strlen( $blob ) ) {
$len = intval( substr( $blob, $p, 8 ), 16 );
$p += 8;
$ret[] = substr( $blob, $p, $len );
$p += $len;
}
return $ret;
}
public function getSortKeys( $strings ) {
if ( !count( $strings ) ) {
return [];
}
$blob = $this->rpcClient->call(
'icu-collation',
self::class . '::' . 'doGetSortKeys',
[
$this->locale,
self::encode( array_values( $strings ) )
],
[
'classes' => [ parent::class, self::class ],
'binary' => true
]
);
return array_combine(
array_keys( $strings ),
self::decode( $blob )
);
}
public function getFirstLetter( $string ) {
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
throw new RuntimeException( __METHOD__ . ': not implemented' );
}
/**
* The remote entry point. Get sort keys for an encoded list of inputs.
*
* @param string $locale The ICU locale
* @param string $blob The input array encoded with encode()
* @return string The encoded result
*/
public static function doGetSortKeys( $locale, $blob ) {
$mainCollator = Collator::create( $locale );
if ( !$mainCollator ) {
throw new RuntimeException( "Invalid ICU locale specified for collation: $locale" );
}
// If the special suffix for numeric collation is present, turn on numeric collation.
if ( str_ends_with( $locale, '-u-kn' ) ) {
$mainCollator->setAttribute( Collator::NUMERIC_COLLATION, Collator::ON );
}
$ret = [];
foreach ( self::decode( $blob ) as $string ) {
$ret[] = $mainCollator->getSortKey( $string );
}
return self::encode( $ret );
}
}
|