File: ExtractBody.php

package info (click to toggle)
mediawiki 1%3A1.43.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 417,464 kB
  • sloc: php: 1,062,949; javascript: 664,290; sql: 9,714; python: 5,458; xml: 3,489; sh: 1,131; makefile: 64
file content (111 lines) | stat: -rw-r--r-- 3,962 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
<?php

namespace MediaWiki\OutputTransform\Stages;

use MediaWiki\Config\ServiceOptions;
use MediaWiki\Html\HtmlHelper;
use MediaWiki\OutputTransform\ContentTextTransformStage;
use MediaWiki\Parser\Parser;
use MediaWiki\Parser\ParserOptions;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Parser\Parsoid\ParsoidParser;
use MediaWiki\Utils\UrlUtils;
use Psr\Log\LoggerInterface;
use Wikimedia\RemexHtml\Serializer\SerializerNode;

/**
 * Applies base href, and strip everything but the <body>
 * @internal
 */
class ExtractBody extends ContentTextTransformStage {

	private UrlUtils $urlUtils;

	// @phan-suppress-next-line PhanUndeclaredTypeProperty
	private ?\MobileContext $mobileContext;

	public function __construct(
		ServiceOptions $options, LoggerInterface $logger, UrlUtils $urlUtils,
		// @phan-suppress-next-line PhanUndeclaredTypeParameter
		?\MobileContext $mobileContext
	) {
		parent::__construct( $options, $logger );
		$this->urlUtils = $urlUtils;
		$this->mobileContext = $mobileContext;
	}

	public function shouldRun( ParserOutput $po, ?ParserOptions $popts, array $options = [] ): bool {
		return ( $options['isParsoidContent'] ?? false );
	}

	private const EXPAND_ELEMENTS = [
		'a' => true, 'img' => true, 'video' => true, 'audio' => true,
	];

	private static function expandRelativeAttrs(
		string $text,
		string $baseHref,
		string $pageFragmentPrefix,
		UrlUtils $urlUtils
	): string {
		// T350952: Expand relative links
		// What we should be doing here is parsing as a title and then
		// using Title::getLocalURL()
		return HtmlHelper::modifyElements(
			$text,
			static function ( SerializerNode $node ): bool {
				if ( !isset( self::EXPAND_ELEMENTS[$node->name] ) ) {
					return false;
				}
				$attr = $node->name === 'a' ? 'href' : 'resource';
				return str_starts_with( $node->attrs[$attr] ?? '', './' );
			},
			static function ( SerializerNode $node ) use ( $baseHref, $pageFragmentPrefix, $urlUtils ): SerializerNode {
				$attr = $node->name === 'a' ? 'href' : 'resource';
				$href = $node->attrs[$attr];
				// Convert page fragment urls to true fragment urls
				// This ensures that those fragments include any URL query params
				// and resolve internally. (Ex: on pages with ?useparsoid=1,
				// cite link fragments should not take you to a different page).
				if ( $pageFragmentPrefix && str_starts_with( $href, $pageFragmentPrefix ) ) {
					$node->attrs[$attr] = substr( $href, strlen( $pageFragmentPrefix ) - 1 );
				} else {
					$href = $baseHref . $href;
					$node->attrs[$attr] = $urlUtils->expand( $href, PROTO_RELATIVE ) ?? false;
				}
				return $node;
			}
		);
	}

	protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string {
		// T350952: temporary fix for subpage paths: use Parsoid's
		// <base href> to expand relative links
		$baseHref = '';
		if ( preg_match( '{<base href=["\']([^"\']+)["\'][^>]+>}', $text, $matches ) === 1 ) {
			$baseHref = $matches[1];
			// @phan-suppress-next-line PhanUndeclaredClassMethod
			if ( $this->mobileContext !== null && $this->mobileContext->usingMobileDomain() ) {
				// @phan-suppress-next-line PhanUndeclaredClassMethod
				$mobileUrl = $this->mobileContext->getMobileUrl( $baseHref );
				if ( $mobileUrl !== false ) {
					$baseHref = $mobileUrl;
				}
			}
		}
		$title = $po->getExtensionData( ParsoidParser::PARSOID_TITLE_KEY );
		if ( !$title ) {
			// We don't think this should ever trigger, but being conservative
			$this->logger->error( __METHOD__ . ": Missing title information in ParserOutput" );
		}
		$pageFragmentPrefix = "./" . $title . "#";
		foreach ( $po->getIndicators() as $name => $html ) {
			$po->setIndicator(
				$name,
				self::expandRelativeAttrs( $html, $baseHref, $pageFragmentPrefix, $this->urlUtils )
			);
		}
		$text = Parser::extractBody( $text );
		return self::expandRelativeAttrs( $text, $baseHref, $pageFragmentPrefix, $this->urlUtils );
	}
}