1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339
|
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace MediaWiki\Languages;
use InvalidArgumentException;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\HookContainer\HookContainer;
use MediaWiki\HookContainer\HookRunner;
use MediaWiki\Language\LanguageCode;
use MediaWiki\MainConfigNames;
use MediaWiki\Title\MediaWikiTitleCodec;
use Wikimedia\ObjectCache\BagOStuff;
use Wikimedia\ObjectCache\HashBagOStuff;
/**
* A service that provides utilities to do with language names and codes.
*
* See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information.
*
* @since 1.34
* @ingroup Language
*/
class LanguageNameUtils {
/**
* Return autonyms in getLanguageName(s).
*/
public const AUTONYMS = null;
/**
* Return all known languages in getLanguageName(s).
*/
public const ALL = 'all';
/**
* Return in getLanguageName(s) only the languages that are defined by MediaWiki.
*/
public const DEFINED = 'mw';
/**
* Return in getLanguageName(s) only the languages for which we have at least some localisation.
*/
public const SUPPORTED = 'mwfile';
/** @var ServiceOptions */
private $options;
/**
* Cache for language names
* @var HashBagOStuff|null
*/
private $languageNameCache;
/**
* Cache for validity of language codes
* @var array
*/
private $validCodeCache = [];
/**
* @internal For use by ServiceWiring
*/
public const CONSTRUCTOR_OPTIONS = [
MainConfigNames::ExtraLanguageNames,
MainConfigNames::UsePigLatinVariant,
MainConfigNames::UseXssLanguage,
];
/** @var HookRunner */
private $hookRunner;
/**
* @param ServiceOptions $options
* @param HookContainer $hookContainer
*/
public function __construct( ServiceOptions $options, HookContainer $hookContainer ) {
$options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
$this->options = $options;
$this->hookRunner = new HookRunner( $hookContainer );
}
/**
* Checks whether any localisation is available for that language tag in MediaWiki
* (MessagesXx.php or xx.json exists).
*
* @param string $code Language tag (in lower case)
* @return bool Whether language is supported
*/
public function isSupportedLanguage( string $code ): bool {
if ( !$this->isValidBuiltInCode( $code ) ) {
return false;
}
if ( $code === 'qqq' ) {
// Special code for internal use, not supported even though there is a qqq.json
return false;
}
if (
$code === 'en-x-piglatin' &&
!$this->options->get( MainConfigNames::UsePigLatinVariant )
) {
// Suppress Pig Latin unless explicitly enabled.
return false;
}
return is_readable( $this->getMessagesFileName( $code ) ) ||
is_readable( $this->getJsonMessagesFileName( $code ) );
}
/**
* Returns true if a language code string is of a valid form, whether it exists.
* This includes codes which are used solely for customisation via the MediaWiki namespace.
*
* @param string $code
*
* @return bool False if the language code contains dangerous characters, e.g, HTML special
* characters or characters that are illegal in MediaWiki titles.
*/
public function isValidCode( string $code ): bool {
if ( !isset( $this->validCodeCache[$code] ) ) {
// People think language codes are HTML-safe, so enforce it. Ideally, we should only
// allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs
// T39564, T39587, T38938.
$this->validCodeCache[$code] =
// Protect against path traversal
strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) &&
!preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code ) &&
// libicu sets ULOC_FULLNAME_CAPACITY to 157; stay comfortably lower
strlen( $code ) <= 128;
}
return $this->validCodeCache[$code];
}
/**
* Returns true if a language code is of a valid form for the purposes of internal customisation
* of MediaWiki, via Messages*.php or *.json.
*
* @param string $code
* @return bool
*/
public function isValidBuiltInCode( string $code ): bool {
return (bool)preg_match( '/^[a-z0-9-]{2,128}$/', $code );
}
/**
* Returns true if a language code is an IETF tag known to MediaWiki.
*
* @param string $tag
*
* @return bool
*/
public function isKnownLanguageTag( string $tag ): bool {
// Quick escape for invalid input to avoid exceptions down the line when code tries to
// process tags which are not valid at all.
if ( !$this->isValidBuiltInCode( $tag ) ) {
return false;
}
if ( isset( Data\Names::NAMES[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) {
return true;
}
return false;
}
/**
* Get an array of language names, indexed by code.
*
* @param null|string $inLanguage Code of language in which to return the names
* Use self::AUTONYMS for autonyms (native names)
* @param string $include One of:
* self::ALL All available languages
* self::DEFINED Only if the language is defined in MediaWiki or wgExtraLanguageNames
* (default)
* self::SUPPORTED Only if the language is in self::DEFINED *and* has a message file
* @return array Language code => language name (sorted by key)
*/
public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) {
if ( $inLanguage !== self::AUTONYMS ) {
$inLanguage = LanguageCode::replaceDeprecatedCodes( LanguageCode::bcp47ToInternal( $inLanguage ) );
}
$cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage;
$cacheKey .= ":$include";
if ( !$this->languageNameCache ) {
$this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] );
}
return $this->languageNameCache->getWithSetCallback(
$cacheKey,
BagOStuff::TTL_INDEFINITE,
function () use ( $inLanguage, $include ) {
return $this->getLanguageNamesUncached( $inLanguage, $include );
}
);
}
/**
* Uncached helper for getLanguageNames.
*
* @param null|string $inLanguage As getLanguageNames
* @param string $include As getLanguageNames
* @return array Language code => language name (sorted by key)
*/
private function getLanguageNamesUncached( $inLanguage, $include ) {
// If passed an invalid language code to use, fallback to en
if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) {
$inLanguage = 'en';
}
$names = [];
if ( $inLanguage !== self::AUTONYMS ) {
# TODO: also include for self::AUTONYMS, when this code is more efficient
// @phan-suppress-next-line PhanTypeMismatchArgumentNullable False positive
$this->hookRunner->onLanguageGetTranslatedLanguageNames( $names, $inLanguage );
}
$mwNames = $this->options->get( MainConfigNames::ExtraLanguageNames ) + Data\Names::NAMES;
if ( !$this->options->get( MainConfigNames::UsePigLatinVariant ) ) {
// Suppress Pig Latin unless explicitly enabled.
unset( $mwNames['en-x-piglatin'] );
}
if ( $this->options->get( MainConfigNames::UseXssLanguage ) ) {
$mwNames['x-xss'] = 'fake xss language (see $wgUseXssLanguage)';
}
foreach ( $mwNames as $mwCode => $mwName ) {
# - Prefer own MediaWiki native name when not using the hook
# - For other names just add if not added through the hook
if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
$names[$mwCode] = $mwName;
}
}
if ( $include === self::ALL ) {
ksort( $names );
return $names;
}
$returnMw = [];
$coreCodes = array_keys( $mwNames );
foreach ( $coreCodes as $coreCode ) {
$returnMw[$coreCode] = $names[$coreCode];
}
if ( $include === self::SUPPORTED ) {
$namesMwFile = [];
# We do this using a foreach over the codes instead of a directory loop so that messages
# files in extensions will work correctly.
foreach ( $returnMw as $code => $value ) {
if ( is_readable( $this->getMessagesFileName( $code ) ) ||
is_readable( $this->getJsonMessagesFileName( $code ) )
) {
$namesMwFile[$code] = $names[$code];
}
}
ksort( $namesMwFile );
return $namesMwFile;
}
ksort( $returnMw );
# self::DEFINED option; default if it's not one of the other two options
# (self::ALL/self::SUPPORTED)
return $returnMw;
}
/**
* @param string $code The code of the language for which to get the name
* @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS
* for autonyms)
* @param string $include See getLanguageNames(), except this function defaults to self::ALL instead of
* self::DEFINED
* @return string Language name or empty
*/
public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) {
$code = LanguageCode::replaceDeprecatedCodes( LanguageCode::bcp47ToInternal( $code ) );
$array = $this->getLanguageNames( $inLanguage, $include );
return $array[$code] ?? '';
}
/**
* Get the name of a file for a certain language code.
*
* @param string $prefix Prepend this to the filename
* @param string $code Language code
* @param string $suffix Append this to the filename
* @return string $prefix . $mangledCode . $suffix
*/
public function getFileName( $prefix, $code, $suffix = '.php' ) {
if ( !$this->isValidBuiltInCode( $code ) ) {
throw new InvalidArgumentException( "Invalid language code \"$code\"" );
}
return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
}
/**
* @param string $code
* @return string
*/
public function getMessagesFileName( $code ) {
global $IP;
$file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' );
$this->hookRunner->onLanguage__getMessagesFileName( $code, $file );
return $file;
}
/**
* @param string $code
* @return string
*/
public function getJsonMessagesFileName( $code ) {
global $IP;
if ( !$this->isValidBuiltInCode( $code ) ) {
throw new InvalidArgumentException( "Invalid language code \"$code\"" );
}
return "$IP/languages/i18n/$code.json";
}
}
|