File: PageBundleParserOutputConverter.php

package info (click to toggle)
mediawiki 1%3A1.43.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 417,464 kB
  • sloc: php: 1,062,949; javascript: 664,290; sql: 9,714; python: 5,458; xml: 3,489; sh: 1,131; makefile: 64
file content (122 lines) | stat: -rw-r--r-- 4,142 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
<?php

namespace MediaWiki\Parser\Parsoid;

use MediaWiki\Language\LanguageCode;
use MediaWiki\Parser\ParserOutput;
use Wikimedia\Parsoid\Core\PageBundle;

/**
 * Provides methods for conversion between PageBundle and ParserOutput
 * TODO: Convert to a trait once we drop support for PHP < 8.2 since
 * support for constants in traits was added in PHP 8.2
 * @since 1.40
 * @internal
 */
final class PageBundleParserOutputConverter {
	/**
	 * @var string Key used to store parsoid page bundle data in ParserOutput
	 */
	public const PARSOID_PAGE_BUNDLE_KEY = 'parsoid-page-bundle';

	/**
	 * We do not want instances of this class to be created
	 * @return void
	 */
	private function __construct() {
	}

	/**
	 * Creates a ParserOutput object containing the relevant data from
	 * the given PageBundle object.
	 *
	 * We need to inject data-parsoid and other properties into the
	 * parser output object for caching, so we can use it for VE edits
	 * and transformations.
	 *
	 * @param PageBundle $pageBundle
	 * @param ?ParserOutput $originalParserOutput Any non-parsoid metadata
	 *  from $originalParserOutput will be copied into the new ParserOutput object.
	 *
	 * @return ParserOutput
	 */
	public static function parserOutputFromPageBundle(
		PageBundle $pageBundle, ?ParserOutput $originalParserOutput = null
	): ParserOutput {
		$parserOutput = new ParserOutput( $pageBundle->html );
		if ( $originalParserOutput ) {
			$parserOutput->mergeHtmlMetaDataFrom( $originalParserOutput );
			$parserOutput->mergeTrackingMetaDataFrom( $originalParserOutput );
			$parserOutput->mergeInternalMetaDataFrom( $originalParserOutput );
		}
		self::applyPageBundleDataToParserOutput( $pageBundle, $parserOutput );
		return $parserOutput;
	}

	/**
	 * Given an existing ParserOutput and a PageBundle, applies the PageBundle data to the ParserOutput.
	 * NOTE: it does NOT apply the text of said pageBundle - this should be done by the calling method, if desired.
	 * This way, we can modify a ParserOutput's associated bundle without creating a new ParserOutput,
	 * which makes it easier to deal with in the OutputTransformPipeline.
	 * @param PageBundle|\stdClass $pageBundle
	 * @param ParserOutput $parserOutput
	 * @internal
	 */
	public static function applyPageBundleDataToParserOutput(
		$pageBundle, ParserOutput $parserOutput
	): void {
		// Overwriting ExtensionData was deprecated in 1.38 but it's safe inside an OutputTransform pipeline,
		// which is the only place where this should happen right now.
		$parserOutput->setExtensionData(
			self::PARSOID_PAGE_BUNDLE_KEY,
			[
				'parsoid' => $pageBundle->parsoid ?? null,
				'mw' => $pageBundle->mw ?? null,
				'version' => $pageBundle->version ?? null,
				'headers' => $pageBundle->headers ?? null,
				'contentmodel' => $pageBundle->contentmodel ?? null,
			]
		);

		if ( isset( $pageBundle->headers['content-language'] ) ) {
			$lang = LanguageCode::normalizeNonstandardCodeAndWarn(
			// @phan-suppress-next-line PhanTypeArraySuspiciousNullable
				$pageBundle->headers['content-language']
			);
			$parserOutput->setLanguage( $lang );
		}
	}

	/**
	 * Returns a Parsoid PageBundle equivalent to the given ParserOutput.
	 *
	 * @param ParserOutput $parserOutput
	 *
	 * @return PageBundle
	 */
	public static function pageBundleFromParserOutput( ParserOutput $parserOutput ): PageBundle {
		$pageBundleData = $parserOutput->getExtensionData( self::PARSOID_PAGE_BUNDLE_KEY );
		$lang = $parserOutput->getLanguage();

		$headers = $pageBundleData['headers'] ?? [];

		if ( $lang ) {
			$headers['content-language'] = $lang->toBcp47Code();
		}

		return new PageBundle(
			$parserOutput->getRawText(),
			$pageBundleData['parsoid'] ?? [],
			$pageBundleData['mw'] ?? [],
			// It would be nice to have this be "null", but PageBundle::responseData()
			// chocks on that: T325137.
			$pageBundleData['version'] ?? '0.0.0',
			$pageBundleData['headers'] ?? $headers,
			$pageBundleData['contentmodel'] ?? null
		);
	}

	public static function hasPageBundle( ParserOutput $parserOutput ) {
		return $parserOutput->getExtensionData( self::PARSOID_PAGE_BUNDLE_KEY ) !== null;
	}
}