From 201ae2e35438b7d8f7c831ba8ac33bfc035bbb0a Mon Sep 17 00:00:00 2001 From: Alex Pott <alex.a.pott@googlemail.com> Date: Wed, 4 Oct 2023 09:39:17 +0100 Subject: [PATCH] Issue #2441811 by longwave, daffie, andypost, edurenye, lauriii, joseph.olstad, Wim Leers, smustgrave, effulgentsia, alexpott, larowlan, chx, jibran: Upgrade filter system to HTML5 --- core/lib/Drupal/Component/Utility/Html.php | 64 ++-- .../Component/Utility/HtmlSerializerRules.php | 39 +++ .../Drupal/Component/Utility/composer.json | 3 +- .../Core/Render/PlaceholderGenerator.php | 6 +- .../modules/big_pipe/big_pipe.post_update.php | 13 + .../src/BigPipePlaceholderTestCases.php | 2 +- .../FunctionalJavascript/CKEditor5Test.php | 3 +- .../src/Kernel/WildcardHtmlSupportTest.php | 2 +- .../Kernel/EditorFileReferenceFilterTest.php | 20 +- .../src/Unit/EditorXssFilter/StandardTest.php | 6 +- .../filter/src/Plugin/Filter/FilterHtml.php | 13 +- .../src/Kernel/FilterCaptionTwigDebugTest.php | 2 +- .../tests/src/Kernel/FilterKernelTest.php | 291 ++++++++++-------- .../filter/tests/src/Unit/FilterHtmlTest.php | 4 +- .../src/Unit/FilterImageLazyLoadTest.php | 14 +- .../system/tests/src/Kernel/Mail/MailTest.php | 2 +- .../tests/src/Functional/StandardTest.php | 2 +- .../Tests/Component/Utility/HtmlTest.php | 2 +- .../Tests/Component/Utility/XssTest.php | 10 + 19 files changed, 296 insertions(+), 202 deletions(-) create mode 100644 core/lib/Drupal/Component/Utility/HtmlSerializerRules.php create mode 100644 core/modules/big_pipe/big_pipe.post_update.php diff --git a/core/lib/Drupal/Component/Utility/Html.php b/core/lib/Drupal/Component/Utility/Html.php index 4d460e068b19..d946e23ba9aa 100644 --- a/core/lib/Drupal/Component/Utility/Html.php +++ b/core/lib/Drupal/Component/Utility/Html.php @@ -2,6 +2,9 @@ namespace Drupal\Component\Utility; +use Masterminds\HTML5; +use Masterminds\HTML5\Serializer\Traverser; + /** * Provides DOMDocument helpers for parsing and serializing HTML strings. * @@ -146,7 +149,7 @@ public static function setIsAjax($is_ajax) { * This function ensures that each passed HTML ID value only exists once on * the page. By tracking the already returned ids, this function enables * forms, blocks, and other content to be output multiple times on the same - * page, without breaking (X)HTML validation. + * page, without breaking HTML validation. * * For already existing IDs, a counter is appended to the ID string. * Therefore, JavaScript and CSS code should not rely on any value that was @@ -258,49 +261,39 @@ public static function normalize($html) { /** * Parses an HTML snippet and returns it as a DOM object. * - * This function loads the body part of a partial (X)HTML document and returns - * a full \DOMDocument object that represents this document. + * This function loads the body part of a partial HTML document and returns a + * full \DOMDocument object that represents this document. * * Use \Drupal\Component\Utility\Html::serialize() to serialize this * \DOMDocument back to a string. * * @param string $html - * The partial (X)HTML snippet to load. Invalid markup will be corrected on + * The partial HTML snippet to load. Invalid markup will be corrected on * import. * * @return \DOMDocument - * A \DOMDocument that represents the loaded (X)HTML snippet. + * A \DOMDocument that represents the loaded HTML snippet. */ public static function load($html) { $document = <<<EOD -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml"> -<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head> -<body>!html</body> +<!DOCTYPE html> +<html> +<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head> +<body>$html</body> </html> EOD; - // PHP's \DOMDocument::saveXML() encodes carriage returns as so - // normalize all newlines to line feeds. - $html = str_replace(["\r\n", "\r"], "\n", $html); - - // PHP's \DOMDocument serialization adds extra whitespace when the markup - // of the wrapping document contains newlines, so ensure we remove all - // newlines before injecting the actual HTML body to be processed. - $document = strtr($document, ["\n" => '', '!html' => $html]); - - $dom = new \DOMDocument(); - // Ignore warnings during HTML soup loading. - @$dom->loadHTML($document, LIBXML_NOBLANKS); - - return $dom; + // Instantiate the HTML5 parser, but without the HTML5 namespace being + // added to the DOM document. + $html5 = new HTML5(['disable_html_ns' => TRUE]); + return $html5->loadHTML($document); } /** * Converts the body of a \DOMDocument back to an HTML snippet. * - * The function serializes the body part of a \DOMDocument back to an (X)HTML - * snippet. The resulting (X)HTML snippet will be properly formatted to be + * The function serializes the body part of a \DOMDocument back to an HTML + * snippet. The resulting HTML snippet will be properly formatted to be * compatible with HTML user agents. * * @param \DOMDocument $document @@ -308,7 +301,7 @@ public static function load($html) { * node will be converted. * * @return string - * A valid (X)HTML snippet, as a string. + * A valid HTML snippet, as a string. */ public static function serialize(\DOMDocument $document) { $body_node = $document->getElementsByTagName('body')->item(0); @@ -321,10 +314,23 @@ public static function serialize(\DOMDocument $document) { foreach ($body_node->getElementsByTagName('style') as $node) { static::escapeCdataElement($node, '/*', '*/'); } + + // Serialize the body using our custom set of rules. + // @see \Masterminds\HTML5::saveHTML() + $stream = fopen('php://temp', 'wb'); + $rules = new HtmlSerializerRules($stream); foreach ($body_node->childNodes as $node) { - $html .= $document->saveXML($node); + $traverser = new Traverser($node, $stream, $rules); + $traverser->walk(); } + $rules->unsetTraverser(); + $html = stream_get_contents($stream, -1, 0); + fclose($stream); } + + // Normalize all newlines. + $html = str_replace(["\r\n", "\r"], "\n", $html); + return $html; } @@ -455,13 +461,13 @@ public static function escape($text): string { * and email. * * @param string $html - * The partial (X)HTML snippet to load. Invalid markup will be corrected on + * The partial HTML snippet to load. Invalid markup will be corrected on * import. * @param string $scheme_and_host * The root URL, which has a URI scheme, host and optional port. * * @return string - * The updated (X)HTML snippet. + * The updated HTML snippet. */ public static function transformRootRelativeUrlsToAbsolute($html, $scheme_and_host) { assert(empty(array_diff(array_keys(parse_url($scheme_and_host)), ["scheme", "host", "port"])), '$scheme_and_host contains scheme, host and port at most.'); diff --git a/core/lib/Drupal/Component/Utility/HtmlSerializerRules.php b/core/lib/Drupal/Component/Utility/HtmlSerializerRules.php new file mode 100644 index 000000000000..3559850053d6 --- /dev/null +++ b/core/lib/Drupal/Component/Utility/HtmlSerializerRules.php @@ -0,0 +1,39 @@ +<?php + +declare(strict_types = 1); + +namespace Drupal\Component\Utility; + +use Masterminds\HTML5\Serializer\OutputRules; + +/** + * Drupal-specific HTML5 serializer rules. + * + * Drupal's XSS filtering cannot handle entities inside element attribute + * values. The XSS filtering was written based on W3C XML recommendations + * which constituted that the ampersand character (&) and the angle + * brackets (< and >) must not appear in their literal form in attribute + * values. This differs from the HTML living standard which permits angle + * brackets. + * + * @see core/modules/ckeditor5/js/ckeditor5_plugins/drupalHtmlEngine/src/drupalhtmlbuilder.js + */ +class HtmlSerializerRules extends OutputRules { + + /** + * {@inheritdoc} + */ + protected function escape($text, $attribute = FALSE) { + $text = parent::escape($text, $attribute); + + if ($attribute) { + $text = strtr($text, [ + '<' => '<', + '>' => '>', + ]); + } + + return $text; + } + +} diff --git a/core/lib/Drupal/Component/Utility/composer.json b/core/lib/Drupal/Component/Utility/composer.json index da6c4a096be8..f20959e228f8 100644 --- a/core/lib/Drupal/Component/Utility/composer.json +++ b/core/lib/Drupal/Component/Utility/composer.json @@ -7,7 +7,8 @@ "homepage": "https://www.drupal.org/project/drupal", "license": "GPL-2.0-or-later", "require": { - "php": ">=8.1.0" + "php": ">=8.1.0", + "masterminds/html5": "^2.7" }, "autoload": { "psr-4": { diff --git a/core/lib/Drupal/Core/Render/PlaceholderGenerator.php b/core/lib/Drupal/Core/Render/PlaceholderGenerator.php index f8906c449318..1cb379ea82e1 100644 --- a/core/lib/Drupal/Core/Render/PlaceholderGenerator.php +++ b/core/lib/Drupal/Core/Render/PlaceholderGenerator.php @@ -116,7 +116,11 @@ public function createPlaceholder(array $element) { $callback = $placeholder_render_array['#lazy_builder'][0]; $arguments = UrlHelper::buildQuery($placeholder_render_array['#lazy_builder'][1]); $token = Crypt::hashBase64(serialize($placeholder_render_array)); - $placeholder_markup = '<drupal-render-placeholder callback="' . Html::escape($callback) . '" arguments="' . Html::escape($arguments) . '" token="' . Html::escape($token) . '"></drupal-render-placeholder>'; + $placeholder_markup = '<drupal-render-placeholder callback="' . Html::escape($callback) . '"'; + if ($arguments !== '') { + $placeholder_markup .= ' arguments="' . Html::escape($arguments) . '"'; + } + $placeholder_markup .= ' token="' . Html::escape($token) . '"></drupal-render-placeholder>'; // Build the placeholder element to return. $placeholder_element = []; diff --git a/core/modules/big_pipe/big_pipe.post_update.php b/core/modules/big_pipe/big_pipe.post_update.php new file mode 100644 index 000000000000..c28880042f08 --- /dev/null +++ b/core/modules/big_pipe/big_pipe.post_update.php @@ -0,0 +1,13 @@ +<?php + +/** + * @file + * Post update functions for Big Pipe. + */ + +/** + * Clear the render cache. + */ +function big_pipe_post_update_html5_placeholders() { + // Empty post_update hook. +} diff --git a/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php b/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php index e10da9aa0c30..602e1118ed81 100644 --- a/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php +++ b/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php @@ -303,7 +303,7 @@ public static function cases(ContainerInterface $container = NULL, AccountInterf '#lazy_builder' => ['\Drupal\big_pipe_test\BigPipeTestController::responseException', []], '#create_placeholder' => TRUE, ], - '<drupal-render-placeholder callback="\Drupal\big_pipe_test\BigPipeTestController::responseException" arguments="" token="' . $token . ' "></drupal-render-placeholder>', + '<drupal-render-placeholder callback="\Drupal\big_pipe_test\BigPipeTestController::responseException" arguments token="' . $token . ' "></drupal-render-placeholder>', [ '#lazy_builder' => ['\Drupal\big_pipe_test\BigPipeTestController::responseException', []], ] diff --git a/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php b/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php index aa85bc1766c1..dc56f9bd1b9c 100644 --- a/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php +++ b/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php @@ -739,9 +739,8 @@ public function testFilterHtmlAllowedGlobalAttributes(): void { $this->waitForEditor(); $page->pressButton('Save'); - // @todo Remove the expected `xml:lang` attributes in https://www.drupal.org/project/drupal/issues/1333730 // cSpell:disable-next-line - $assert_session->responseContains('<p dir="ltr" lang="en" xml:lang="en">Hello World</p><p dir="rtl" lang="ar" xml:lang="ar">مرØبا بالعالم</p>'); + $assert_session->responseContains('<p dir="ltr" lang="en">Hello World</p><p dir="rtl" lang="ar">مرØبا بالعالم</p>'); } } diff --git a/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php b/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php index a5e0787f80c6..9a3023738a7c 100644 --- a/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php +++ b/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php @@ -159,7 +159,7 @@ public function providerGhsConfiguration(): array { ['alignment'], ], '<$text-container> with attribute from multiple plugins' => [ - '<p data-llama class"> <br>', + '<p data-llama class> <br>', ['<$text-container data-llama>', '<p class>'], [ [ diff --git a/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php b/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php index b764eedaef81..4c5d2555bc09 100644 --- a/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php +++ b/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php @@ -86,15 +86,15 @@ public function testEditorFileReferenceFilter() { $this->assertSame($input, $output->getProcessedText()); // One data-entity-uuid attribute. - $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; - $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; + $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">'; + $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">'; $output = $test($input); $this->assertSame($expected_output, $output->getProcessedText()); $this->assertEquals($cache_tag, $output->getCacheTags()); // One data-entity-uuid attribute with odd capitalization. $input = '<img src="llama.jpg" data-entity-type="file" DATA-entity-UUID = "' . $uuid . '" />'; - $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; + $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">'; $output = $test($input); $this->assertSame($expected_output, $output->getProcessedText()); $this->assertEquals($cache_tag, $output->getCacheTags()); @@ -107,7 +107,7 @@ public function testEditorFileReferenceFilter() { $this->assertEquals($cache_tag, $output->getCacheTags()); // One data-entity-uuid attribute with an invalid value. - $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="invalid-' . $uuid . '" />'; + $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="invalid-' . $uuid . '">'; $output = $test($input); $this->assertSame($input, $output->getProcessedText()); $this->assertEquals([], $output->getCacheTags()); @@ -115,8 +115,8 @@ public function testEditorFileReferenceFilter() { // Two different data-entity-uuid attributes. $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; $input .= '<img src="alpaca.jpg" data-entity-type="file" data-entity-uuid="' . $uuid_2 . '" />'; - $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; - $expected_output .= '<img src="/' . $this->siteDirectory . '/files/alpaca.jpg" data-entity-type="file" data-entity-uuid="' . $uuid_2 . '" />'; + $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">'; + $expected_output .= '<img src="/' . $this->siteDirectory . '/files/alpaca.jpg" data-entity-type="file" data-entity-uuid="' . $uuid_2 . '">'; $output = $test($input); $this->assertSame($expected_output, $output->getProcessedText()); $this->assertEquals(Cache::mergeTags($cache_tag, $cache_tag_2), $output->getCacheTags()); @@ -124,8 +124,8 @@ public function testEditorFileReferenceFilter() { // Two identical data-entity-uuid attributes. $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; $input .= '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; - $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; - $expected_output .= '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; + $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">'; + $expected_output .= '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">'; $output = $test($input); $this->assertSame($expected_output, $output->getProcessedText()); $this->assertEquals($cache_tag, $output->getCacheTags()); @@ -140,14 +140,14 @@ public function testEditorFileReferenceFilter() { // Image dimensions are present. $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />'; - $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" ' . $dimensions . ' />'; + $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" ' . $dimensions . '>'; $output = $test($input); $this->assertSame($expected_output, $output->getProcessedText()); $this->assertEquals($cache_tag, $output->getCacheTags()); // Image dimensions are set manually. $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '"width="41" height="21" />'; - $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" width="41" height="21" />'; + $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" width="41" height="21">'; $output = $test($input); $this->assertSame($expected_output, $output->getProcessedText()); $this->assertEquals($cache_tag, $output->getCacheTags()); diff --git a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php index 81c4e858d83c..af2990ffbdad 100644 --- a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php +++ b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php @@ -517,12 +517,12 @@ public function providerTestFilterXss() { // @see \Drupal\editor\EditorXssFilter::filterXssDataAttributes() // The following two test cases verify that XSS attack vectors are filtered. - $data[] = ['<img src="butterfly.jpg" data-caption="<script>alert();</script>" />', '<img src="butterfly.jpg" data-caption="alert();" />']; - $data[] = ['<img src="butterfly.jpg" data-caption="<EMBED SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></EMBED>" />', '<img src="butterfly.jpg" data-caption="" />']; + $data[] = ['<img src="butterfly.jpg" data-caption="<script>alert();</script>" />', '<img src="butterfly.jpg" data-caption="alert();">']; + $data[] = ['<img src="butterfly.jpg" data-caption="<EMBED SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></EMBED>" />', '<img src="butterfly.jpg" data-caption>']; // When including HTML-tags as visible content, they are double-escaped. // This test case ensures that we leave that content unchanged. - $data[] = ['<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />', '<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />']; + $data[] = ['<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />', '<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;">']; return $data; } diff --git a/core/modules/filter/src/Plugin/Filter/FilterHtml.php b/core/modules/filter/src/Plugin/Filter/FilterHtml.php index 88cf3484a0aa..345ae193dea9 100644 --- a/core/modules/filter/src/Plugin/Filter/FilterHtml.php +++ b/core/modules/filter/src/Plugin/Filter/FilterHtml.php @@ -250,22 +250,17 @@ public function getHTMLRestrictions() { // Parse the allowed HTML setting, and gradually make the list of allowed // tags more specific. $restrictions = ['allowed' => []]; + $html = $this->settings['allowed_html']; - // Make all the tags self-closing, so they will be parsed into direct - // children of the body tag in the DomDocument. - $html = str_replace('>', ' />', $this->settings['allowed_html']); // Protect any trailing * characters in attribute names, since DomDocument // strips them as invalid. // cSpell:disable-next-line $star_protector = '__zqh6vxfbk3cg__'; $html = str_replace('*', $star_protector, $html); - $body_child_nodes = Html::load($html)->getElementsByTagName('body')->item(0)->childNodes; - foreach ($body_child_nodes as $node) { - if ($node->nodeType !== XML_ELEMENT_NODE) { - // Skip the empty text nodes inside tags. - continue; - } + $dom = Html::load($html); + $xpath = new \DOMXPath($dom); + foreach ($xpath->query('//body//*') as $node) { $tag = $node->tagName; // All attributes are already allowed on this tag, this is the most diff --git a/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php b/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php index 6ebffd1f810a..eaeca4304d25 100644 --- a/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php +++ b/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php @@ -53,7 +53,7 @@ public function testCaptionFilter() { // Data-caption attribute. $input = '<img src="llama.jpg" data-caption="Loquacious llama!" />'; - $expected = '<img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption>'; + $expected = '<img src="llama.jpg">' . "\n" . '<figcaption>Loquacious llama!</figcaption>'; $output = $test($input)->getProcessedText(); $this->assertStringContainsString($expected, $output); $this->assertStringContainsString("<!-- THEME HOOK: 'filter_caption' -->", $output); diff --git a/core/modules/filter/tests/src/Kernel/FilterKernelTest.php b/core/modules/filter/tests/src/Kernel/FilterKernelTest.php index 0fa0d2885460..ff20df0c95fc 100644 --- a/core/modules/filter/tests/src/Kernel/FilterKernelTest.php +++ b/core/modules/filter/tests/src/Kernel/FilterKernelTest.php @@ -60,40 +60,40 @@ public function testAlignFilter() { // Data-align attribute: all 3 allowed values. $input = '<img src="llama.jpg" data-align="left" />'; - $expected = '<img src="llama.jpg" class="align-left" />'; + $expected = '<img src="llama.jpg" class="align-left">'; $this->assertSame($expected, $test($input)->getProcessedText()); $input = '<img src="llama.jpg" data-align="center" />'; - $expected = '<img src="llama.jpg" class="align-center" />'; + $expected = '<img src="llama.jpg" class="align-center">'; $this->assertSame($expected, $test($input)->getProcessedText()); $input = '<img src="llama.jpg" data-align="right" />'; - $expected = '<img src="llama.jpg" class="align-right" />'; + $expected = '<img src="llama.jpg" class="align-right">'; $this->assertSame($expected, $test($input)->getProcessedText()); // Data-align attribute: a disallowed value. $input = '<img src="llama.jpg" data-align="left foobar" />'; - $expected = '<img src="llama.jpg" />'; + $expected = '<img src="llama.jpg">'; $this->assertSame($expected, $test($input)->getProcessedText()); // Empty data-align attribute. $input = '<img src="llama.jpg" data-align="" />'; - $expected = '<img src="llama.jpg" />'; + $expected = '<img src="llama.jpg">'; $this->assertSame($expected, $test($input)->getProcessedText()); // Ensure the filter also works with uncommon yet valid attribute quoting. $input = '<img src=llama.jpg data-align=right />'; - $expected = '<img src="llama.jpg" class="align-right" />'; + $expected = '<img src="llama.jpg" class="align-right">'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); // Security test: attempt to inject an additional class. $input = '<img src="llama.jpg" data-align="center another-class-here" />'; - $expected = '<img src="llama.jpg" />'; + $expected = '<img src="llama.jpg">'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); // Security test: attempt an XSS. $input = '<img src="llama.jpg" data-align="center \'onclick=\'alert(foo);" />'; - $expected = '<img src="llama.jpg" />'; + $expected = '<img src="llama.jpg">'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); } @@ -125,26 +125,38 @@ public function testCaptionFilter() { // Data-caption attribute. $input = '<img src="llama.jpg" data-caption="Loquacious llama!" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); // Empty data-caption attribute. $input = '<img src="llama.jpg" data-caption="" />'; - $expected = '<img src="llama.jpg" />'; + $expected = '<img src="llama.jpg">'; $this->assertSame($expected, $test($input)->getProcessedText()); // HTML entities in the caption. $input = '<img src="llama.jpg" data-caption="“Loquacious llama!”" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>“Loquacious llama!â€</figcaption></figure>'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>“Loquacious llama!â€</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); // HTML encoded as HTML entities in data-caption attribute. $input = '<img src="llama.jpg" data-caption="<em>Loquacious llama!</em>" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption><em>Loquacious llama!</em></figcaption></figure>'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption><em>Loquacious llama!</em></figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); @@ -153,40 +165,64 @@ public function testCaptionFilter() { // not allowed by the HTML spec, but may happen when people manually write // HTML, so we explicitly support it. $input = '<img src="llama.jpg" data-caption="<em>Loquacious llama!</em>" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption><em>Loquacious llama!</em></figcaption></figure>'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption><em>Loquacious llama!</em></figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); // Security test: attempt an XSS. $input = '<img src="llama.jpg" data-caption="<script>alert(\'Loquacious llama!\')</script>" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>alert(\'Loquacious llama!\')</figcaption></figure>'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>alert(\'Loquacious llama!\')</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); // Ensure the filter also works with uncommon yet valid attribute quoting. $input = '<img src=llama.jpg data-caption=\'Loquacious llama!\' />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); // Finally, ensure that this also works on any other tag. $input = '<video src="llama.jpg" data-caption="Loquacious llama!" />'; - $expected = '<figure role="group"><video src="llama.jpg"></video><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group"> +<video src="llama.jpg"></video> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); $input = '<foobar data-caption="Loquacious llama!">baz</foobar>'; - $expected = '<figure role="group"><foobar>baz</foobar><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group"> +<foobar>baz</foobar> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); // Ensure the caption filter works for linked images. $input = '<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" data-caption="Loquacious llama!" /></a>'; - $expected = '<figure role="group"><a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" /></a>' . "\n" . '<figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group"> +<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg"></a> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); @@ -219,46 +255,74 @@ public function testCaptionFilter() { // All the tricky cases encountered at https://www.drupal.org/node/2105841. // A plain URL preceded by text. - $input = '<img data-caption="See https://www.drupal.org" src="llama.jpg" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>See https://www.drupal.org</figcaption></figure>'; + $input = '<img data-caption="See https://www.drupal.org" src="llama.jpg">'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>See https://www.drupal.org</figcaption> +</figure> +'; $this->assertSame($expected, $test_with_html_filter($input)); $this->assertSame($input, $test_editor_xss_filter($input)); // An anchor. - $input = '<img data-caption="This is a <a href="https://www.drupal.org">quick</a> test…" src="llama.jpg" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>This is a <a href="https://www.drupal.org">quick</a> test…</figcaption></figure>'; + $input = '<img data-caption="This is a <a href="https://www.drupal.org">quick</a> test…" src="llama.jpg">'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>This is a <a href="https://www.drupal.org">quick</a> test…</figcaption> +</figure> +'; $this->assertSame($expected, $test_with_html_filter($input)); $this->assertSame($input, $test_editor_xss_filter($input)); // A plain URL surrounded by parentheses. - $input = '<img data-caption="(https://www.drupal.org)" src="llama.jpg" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>(https://www.drupal.org)</figcaption></figure>'; + $input = '<img data-caption="(https://www.drupal.org)" src="llama.jpg">'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>(https://www.drupal.org)</figcaption> +</figure> +'; $this->assertSame($expected, $test_with_html_filter($input)); $this->assertSame($input, $test_editor_xss_filter($input)); // A source being credited. - $input = '<img data-caption="Source: Wikipedia" src="llama.jpg" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Source: Wikipedia</figcaption></figure>'; + $input = '<img data-caption="Source: Wikipedia" src="llama.jpg">'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>Source: Wikipedia</figcaption> +</figure> +'; $this->assertSame($expected, $test_with_html_filter($input)); $this->assertSame($input, $test_editor_xss_filter($input)); // A source being credited, without a space after the colon. - $input = '<img data-caption="Source:Wikipedia" src="llama.jpg" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Source:Wikipedia</figcaption></figure>'; + $input = '<img data-caption="Source:Wikipedia" src="llama.jpg">'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>Source:Wikipedia</figcaption> +</figure> +'; $this->assertSame($expected, $test_with_html_filter($input)); $this->assertSame($input, $test_editor_xss_filter($input)); // A pretty crazy edge case where we have two colons. - $input = '<img data-caption="Interesting (Scope resolution operator ::)" src="llama.jpg" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Interesting (Scope resolution operator ::)</figcaption></figure>'; + $input = '<img data-caption="Interesting (Scope resolution operator ::)" src="llama.jpg">'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>Interesting (Scope resolution operator ::)</figcaption> +</figure> +'; $this->assertSame($expected, $test_with_html_filter($input)); $this->assertSame($input, $test_editor_xss_filter($input)); // An evil anchor (to ensure XSS filtering is applied to the caption also). - $input = '<img data-caption="This is an <a href="javascript:alert();">evil</a> test…" src="llama.jpg" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>This is an <a href="alert();">evil</a> test…</figcaption></figure>'; + $input = '<img data-caption="This is an <a href="javascript:alert();">evil</a> test…" src="llama.jpg">'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>This is an <a href="alert();">evil</a> test…</figcaption> +</figure> +'; $this->assertSame($expected, $test_with_html_filter($input)); - $expected_xss_filtered = '<img data-caption="This is an <a href="alert();">evil</a> test…" src="llama.jpg" />'; + $expected_xss_filtered = '<img data-caption="This is an <a href="alert();">evil</a> test…" src="llama.jpg">'; $this->assertSame($expected_xss_filtered, $test_editor_xss_filter($input)); } @@ -286,17 +350,29 @@ public function testAlignAndCaptionFilters() { // Both data-caption and data-align attributes: all 3 allowed values for the // data-align attribute. $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="left" />'; - $expected = '<figure role="group" class="align-left"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group" class="align-left"> +<img src="llama.jpg"> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="center" />'; - $expected = '<figure role="group" class="align-center"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group" class="align-center"> +<img src="llama.jpg"> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="right" />'; - $expected = '<figure role="group" class="align-right"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group" class="align-right"> +<img src="llama.jpg"> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); @@ -304,14 +380,22 @@ public function testAlignAndCaptionFilters() { // Both data-caption and data-align attributes, but a disallowed data-align // attribute value. $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="left foobar" />'; - $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group"> +<img src="llama.jpg"> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); // Ensure both filters together work for linked images. $input = '<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" data-caption="Loquacious llama!" data-align="center" /></a>'; - $expected = '<figure role="group" class="align-center"><a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" /></a>' . "\n" . '<figcaption>Loquacious llama!</figcaption></figure>'; + $expected = '<figure role="group" class="align-center"> +<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg"></a> +<figcaption>Loquacious llama!</figcaption> +</figure> +'; $output = $test($input); $this->assertSame($expected, $output->getProcessedText()); $this->assertSame($attached_library, $output->getAttachments()); @@ -455,14 +539,13 @@ public function testHtmlFilter() { $f = (string) $filter->process('<code onerror> </code>', Language::LANGCODE_NOT_SPECIFIED); $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove empty on* attributes.'); - // Note - this string has a decoded character. - $this->assertSame('<code> </code>', $f); + $this->assertSame('<code> </code>', $f); $f = (string) $filter->process('<br>', Language::LANGCODE_NOT_SPECIFIED); - $this->assertNormalized($f, '<br />', 'HTML filter should allow line breaks.'); + $this->assertNormalized($f, '<br>', 'HTML filter should allow line breaks.'); $f = (string) $filter->process('<br />', Language::LANGCODE_NOT_SPECIFIED); - $this->assertNormalized($f, '<br />', 'HTML filter should allow self-closing line breaks.'); + $this->assertNormalized($f, '<br>', 'HTML filter should allow self-closing line breaks.'); // All attributes of allowed tags are stripped by default. $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED); @@ -934,9 +1017,8 @@ public function testHtmlCorrectorFilter() { $f = Html::normalize('<div id="d">content'); $this->assertEquals('<div id="d">content</div>', $f, 'HTML corrector -- unclosed tag with attribute.'); - // XHTML slash for empty elements. $f = Html::normalize('<hr><br>'); - $this->assertEquals('<hr /><br />', $f, 'HTML corrector -- XHTML closing slash.'); + $this->assertEquals('<hr><br>', $f, 'HTML corrector -- void element.'); $f = Html::normalize('<P>test</P>'); $this->assertEquals('<p>test</p>', $f, 'HTML corrector -- Convert uppercased tags to proper lowercased ones.'); @@ -945,37 +1027,37 @@ public function testHtmlCorrectorFilter() { $this->assertEquals('<p>test</p>', $f, 'HTML corrector -- Convert uppercased tags to proper lowercased ones.'); $f = Html::normalize('test<hr />'); - $this->assertEquals('test<hr />', $f, 'HTML corrector -- Let proper XHTML pass through.'); + $this->assertEquals('test<hr>', $f, 'HTML corrector -- convert self-closing element to HTML5 void element.'); $f = Html::normalize('test<hr/>'); - $this->assertEquals('test<hr />', $f, 'HTML corrector -- Let proper XHTML pass through, but ensure there is a single space before the closing slash.'); + $this->assertEquals('test<hr>', $f, 'HTML corrector -- convert self-closing element to HTML5 void element.'); $f = Html::normalize('test<hr />'); - $this->assertEquals('test<hr />', $f, 'HTML corrector -- Let proper XHTML pass through, but ensure there are not too many spaces before the closing slash.'); + $this->assertEquals('test<hr>', $f, 'HTML corrector -- convert self-closing element with multiple spaces to HTML5 void element.'); $f = Html::normalize('<span class="test" />'); $this->assertEquals('<span class="test"></span>', $f, 'HTML corrector -- Convert XHTML that is properly formed but that would not be compatible with typical HTML user agents.'); $f = Html::normalize('test1<br class="test">test2'); - $this->assertEquals('test1<br class="test" />test2', $f, 'HTML corrector -- Automatically close single tags.'); + $this->assertEquals('test1<br class="test">test2', $f, 'HTML corrector -- Keep self-closing tags.'); $f = Html::normalize('line1<hr>line2'); - $this->assertEquals('line1<hr />line2', $f, 'HTML corrector -- Automatically close single tags.'); + $this->assertEquals('line1<hr>line2', $f, 'HTML corrector -- Keep self-closing tags.'); $f = Html::normalize('line1<HR>line2'); - $this->assertEquals('line1<hr />line2', $f, 'HTML corrector -- Automatically close single tags.'); + $this->assertEquals('line1<hr>line2', $f, 'HTML corrector -- Keep self-closing tags.'); $f = Html::normalize('<img src="http://example.com/test.jpg">test</img>'); - $this->assertEquals('<img src="http://example.com/test.jpg" />test', $f, 'HTML corrector -- Automatically close single tags.'); + $this->assertEquals('<img src="http://example.com/test.jpg">test', $f, 'HTML corrector -- Fix self-closing single tags.'); $f = Html::normalize('<br></br>'); - $this->assertEquals('<br />', $f, "HTML corrector -- Transform empty tags to a single closed tag if the tag's content model is EMPTY."); + $this->assertEquals('<br><br>', $f, "HTML corrector -- Transform empty tags to a self-closed tag if the tag's content model is EMPTY."); $f = Html::normalize('<div></div>'); $this->assertEquals('<div></div>', $f, "HTML corrector -- Do not transform empty tags to a single closed tag if the tag's content model is not EMPTY."); $f = Html::normalize('<p>line1<br/><hr/>line2</p>'); - $this->assertEquals('<p>line1<br /></p><hr />line2', $f, 'HTML corrector -- Move non-inline elements outside of inline containers.'); + $this->assertEquals('<p>line1<br></p><hr>line2', $f, 'HTML corrector -- Move non-inline elements outside of inline containers.'); $f = Html::normalize('<p>line1<div>line2</div></p>'); $this->assertEquals('<p>line1</p><div>line2</div>', $f, 'HTML corrector -- Move non-inline elements outside of inline containers.'); @@ -984,7 +1066,7 @@ public function testHtmlCorrectorFilter() { $this->assertEquals('<p>test</p><p>test</p>\n', $f, 'HTML corrector -- Auto-close improperly nested tags.'); $f = Html::normalize('<p>Line1<br><STRONG>bold stuff</b>'); - $this->assertEquals('<p>Line1<br /><strong>bold stuff</strong></p>', $f, 'HTML corrector -- Properly close unclosed tags, and remove useless closing tags.'); + $this->assertEquals('<p>Line1<br><strong>bold stuff</strong></p>', $f, 'HTML corrector -- Properly close unclosed tags, and remove useless closing tags.'); $f = Html::normalize('test <!-- this is a comment -->'); $this->assertEquals('test <!-- this is a comment -->', $f, 'HTML corrector -- Do not touch HTML comments.'); @@ -1013,112 +1095,59 @@ public function testHtmlCorrectorFilter() { $this->assertEquals('<p>دروبال</p>', $f, 'HTML corrector -- Encoding is correctly kept.'); // cSpell:enable - $f = Html::normalize('<script>alert("test")</script>'); - $this->assertEquals('<script> -//<![CDATA[ -alert("test") -//]]> -</script>', $f, 'HTML corrector -- CDATA added to script element'); + $html = '<script>alert("test")</script>'; + $this->assertEquals($html, Html::normalize($html), 'HTML corrector -- script element'); - $f = Html::normalize('<p><script>alert("test")</script></p>'); - $this->assertEquals('<p><script> -//<![CDATA[ -alert("test") -//]]> -</script></p>', $f, 'HTML corrector -- CDATA added to a nested script element'); + $html = '<p><script>alert("test")</script></p>'; + $this->assertEquals($html, Html::normalize($html), 'HTML corrector -- nested script element'); - $f = Html::normalize('<p><style> /* Styling */ body {color:red}</style></p>'); - $this->assertEquals('<p><style> -/*<![CDATA[*/ - /* Styling */ body {color:red} -/*]]>*/ -</style></p>', $f, 'HTML corrector -- CDATA added to a style element.'); + $html = '<p><style> /* Styling */ body {color:red}</style></p>'; + $this->assertEquals($html, Html::normalize($html), 'HTML corrector -- style element.'); - $filtered_data = Html::normalize('<p><style> -/*<![CDATA[*/ -/* Styling */ -body {color:red} -/*]]>*/ -</style></p>'); - $this->assertEquals('<p><style> + $html = '<p><style> /*<![CDATA[*/ /* Styling */ body {color:red} /*]]>*/ -</style></p>', $filtered_data, - new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '/*<![CDATA[*/']) - ); +</style></p>'; + $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '/*<![CDATA[*/'])); - $filtered_data = Html::normalize('<p><style> -/*<![CDATA[*/ - /* Styling */ - body {color:red} -/*]]>*/ -</style></p>'); - $this->assertEquals('<p><style> + $html = '<p><style> /*<![CDATA[*/ /* Styling */ body {color:red} /*]]>*/ -</style></p>', $filtered_data, - new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--/*--><![CDATA[/* ><!--*/']) - ); +</style></p>'; + $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--/*--><![CDATA[/* ><!--*/'])); - $filtered_data = Html::normalize('<p><script> + $html = '<p><script> //<![CDATA[ alert("test"); //]]> -</script></p>'); - $this->assertEquals('<p><script> -//<![CDATA[ - alert("test"); -//]]> -</script></p>', $filtered_data, - new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--//--><![CDATA[// ><!--']) - ); +</script></p>'; + $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--//--><![CDATA[// ><!--'])); - $filtered_data = Html::normalize('<p><script> + $html = '<p><script> // <![CDATA[ alert("test"); //]]> -</script></p>'); - $this->assertEquals('<p><script> -// <![CDATA[ - alert("test"); -//]]> -</script></p>', $filtered_data, - new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[']) - ); +</script></p>'; + $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA['])); - $filtered_data = Html::normalize('<p><script> -// <![CDATA[![CDATA[![CDATA[ - alert("test"); -//]]]]]]> -</script></p>'); - $this->assertEquals('<p><script> + $html = '<p><script> // <![CDATA[![CDATA[![CDATA[ alert("test"); //]]]]]]> -</script></p>', $filtered_data, - new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA[']) - ); +</script></p>'; + $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA['])); // Test calling Html::normalize() twice. - $filtered_data = Html::normalize('<p><script> + $html = '<p><script> // <![CDATA[![CDATA[![CDATA[ alert("test"); //]]]]]]> -</script></p>'); - $filtered_data = Html::normalize($filtered_data); - - $this->assertEquals('<p><script> -// <![CDATA[![CDATA[![CDATA[ - alert("test"); -//]]]]]]> -</script></p>', $filtered_data, - new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA[']) - ); - +</script></p>'; + $this->assertEquals($html, Html::normalize(Html::normalize($html)), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA['])); } /** diff --git a/core/modules/filter/tests/src/Unit/FilterHtmlTest.php b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php index 56740c8b3bef..7ffb1691dd09 100644 --- a/core/modules/filter/tests/src/Unit/FilterHtmlTest.php +++ b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php @@ -56,9 +56,7 @@ public function providerFilterAttributes() { ['<p dir="rtl" />', '<p dir="rtl"></p>'], ['<p dir="bogus" />', '<p></p>'], ['<p id="first" />', '<p></p>'], - // The addition of xml:lang isn't especially desired, but is still valid - // HTML5. See https://www.drupal.org/node/1333730. - ['<p id="first" lang="en">text</p>', '<p lang="en" xml:lang="en">text</p>'], + ['<p id="first" lang="en">text</p>', '<p lang="en">text</p>'], ['<p style="display: none;" />', '<p></p>'], ['<code class="pretty invalid">foreach ($a as $b) {}</code>', '<code class="pretty">foreach ($a as $b) {}</code>'], ['<code class="boring pretty">foreach ($a as $b) {}</code>', '<code class="boring pretty">foreach ($a as $b) {}</code>'], diff --git a/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php b/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php index 6da46a4a55fa..9686beb343a0 100644 --- a/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php +++ b/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php @@ -50,27 +50,27 @@ public function providerHtml(): array { return [ 'lazy loading attribute already added' => [ 'input' => '<p><img src="foo.png" loading="lazy"></p>', - 'output' => '<p><img src="foo.png" loading="lazy" /></p>', + 'output' => '<p><img src="foo.png" loading="lazy"></p>', ], 'eager loading attribute already added' => [ 'input' => '<p><img src="foo.png" loading="eager"/></p>', - 'output' => '<p><img src="foo.png" loading="eager" /></p>', + 'output' => '<p><img src="foo.png" loading="eager"></p>', ], 'image dimensions provided' => [ 'input' => '<p><img src="foo.png" width="200" height="200"/></p>', - '<p><img src="foo.png" width="200" height="200" loading="lazy" /></p>', + 'output' => '<p><img src="foo.png" width="200" height="200" loading="lazy"></p>', ], 'width image dimensions provided' => [ 'input' => '<p><img src="foo.png" width="200"/></p>', - '<p><img src="foo.png" width="200" /></p>', + 'output' => '<p><img src="foo.png" width="200"></p>', ], 'height image dimensions provided' => [ 'input' => '<p><img src="foo.png" height="200"/></p>', - '<p><img src="foo.png" height="200" /></p>', + 'output' => '<p><img src="foo.png" height="200"></p>', ], 'invalid loading attribute' => [ 'input' => '<p><img src="foo.png" width="200" height="200" loading="foo"></p>', - 'output' => '<p><img src="foo.png" width="200" height="200" loading="lazy" /></p>', + 'output' => '<p><img src="foo.png" width="200" height="200" loading="lazy"></p>', ], 'no image tag' => [ 'input' => '<p>Lorem ipsum...</p>', @@ -78,7 +78,7 @@ public function providerHtml(): array { ], 'no image dimensions provided' => [ 'input' => '<p><img src="foo.png"></p>', - 'output' => '<p><img src="foo.png" /></p>', + 'output' => '<p><img src="foo.png"></p>', ], ]; } diff --git a/core/modules/system/tests/src/Kernel/Mail/MailTest.php b/core/modules/system/tests/src/Kernel/Mail/MailTest.php index 8398fd24ebd6..3b99732d6971 100644 --- a/core/modules/system/tests/src/Kernel/Mail/MailTest.php +++ b/core/modules/system/tests/src/Kernel/Mail/MailTest.php @@ -324,7 +324,7 @@ public function testRenderedElementsUseAbsolutePaths() { '#theme' => 'image', '#uri' => $input_path, ]; - $expected_html = "<img src=\"$expected_path\" alt=\"\" />"; + $expected_html = "<img src=\"$expected_path\" alt>\n"; // Send a test message that mail_cancel_test_mail_alter should cancel. \Drupal::service('plugin.manager.mail')->mail('mail_html_test', 'render_from_message_param', 'relative_url@example.com', $language_interface->getId(), ['message' => $render]); diff --git a/core/profiles/standard/tests/src/Functional/StandardTest.php b/core/profiles/standard/tests/src/Functional/StandardTest.php index 40a04624ac4a..15ed8479bbe9 100644 --- a/core/profiles/standard/tests/src/Functional/StandardTest.php +++ b/core/profiles/standard/tests/src/Functional/StandardTest.php @@ -85,7 +85,7 @@ public function testStandard() { $this->drupalLogin($this->adminUser); $this->drupalGet('node/1'); // Verify that a line break is present. - $this->assertSession()->responseContains('Then she picked out two somebodies,<br />Sally and me'); + $this->assertSession()->responseContains('Then she picked out two somebodies,<br>Sally and me'); $this->submitForm([ 'subject[0][value]' => 'Barfoo', 'comment_body[0][value]' => 'Then she picked out two somebodies, Sally and me', diff --git a/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php b/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php index 3ee9af464992..b2867ec012dd 100644 --- a/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php @@ -392,7 +392,7 @@ public function providerTestTransformRootRelativeUrlsToAbsolute() { // Double-character carriage return should be normalized. $data['line break with double special character'] = ["Test without links but with\r\nsome special characters", 'http://example.com', "Test without links but with\nsome special characters"]; - $data['line break with single special character'] = ["Test without links but with \nsome special characters", 'http://example.com', FALSE]; + $data['line break with single special character'] = ["Test without links but with \nsome special characters", 'http://example.com', "Test without links but with\nsome special characters"]; $data['carriage return within html'] = ["<a\rhref='/node'>My link</a>", 'http://example.com', '<a href="http://example.com/node">My link</a>']; return $data; diff --git a/core/tests/Drupal/Tests/Component/Utility/XssTest.php b/core/tests/Drupal/Tests/Component/Utility/XssTest.php index 7965b198a8bd..e076e977b392 100644 --- a/core/tests/Drupal/Tests/Component/Utility/XssTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/XssTest.php @@ -607,6 +607,16 @@ public function providerTestFilterXssAdminNotNormalized() { ]; } + /** + * Checks that escaped HTML embedded in an attribute is not filtered. + * + * @see \Drupal\Component\Utility\HtmlSerializerRules + */ + public function testFilterNormalizedHtml5() { + $input = '<span data-caption="foo <em>bar</em>"></span>'; + $this->assertEquals($input, Xss::filter(Html::normalize($input), ['span'])); + } + /** * Asserts that a text transformed to lowercase with HTML entities decoded does contain a given string. * -- GitLab