From 201ae2e35438b7d8f7c831ba8ac33bfc035bbb0a Mon Sep 17 00:00:00 2001
From: Alex Pott <alex.a.pott@googlemail.com>
Date: Wed, 4 Oct 2023 09:39:17 +0100
Subject: [PATCH] Issue #2441811 by longwave, daffie, andypost, edurenye,
 lauriii, joseph.olstad, Wim Leers, smustgrave, effulgentsia, alexpott,
 larowlan, chx, jibran: Upgrade filter system to HTML5

---
 core/lib/Drupal/Component/Utility/Html.php    |  64 ++--
 .../Component/Utility/HtmlSerializerRules.php |  39 +++
 .../Drupal/Component/Utility/composer.json    |   3 +-
 .../Core/Render/PlaceholderGenerator.php      |   6 +-
 .../modules/big_pipe/big_pipe.post_update.php |  13 +
 .../src/BigPipePlaceholderTestCases.php       |   2 +-
 .../FunctionalJavascript/CKEditor5Test.php    |   3 +-
 .../src/Kernel/WildcardHtmlSupportTest.php    |   2 +-
 .../Kernel/EditorFileReferenceFilterTest.php  |  20 +-
 .../src/Unit/EditorXssFilter/StandardTest.php |   6 +-
 .../filter/src/Plugin/Filter/FilterHtml.php   |  13 +-
 .../src/Kernel/FilterCaptionTwigDebugTest.php |   2 +-
 .../tests/src/Kernel/FilterKernelTest.php     | 291 ++++++++++--------
 .../filter/tests/src/Unit/FilterHtmlTest.php  |   4 +-
 .../src/Unit/FilterImageLazyLoadTest.php      |  14 +-
 .../system/tests/src/Kernel/Mail/MailTest.php |   2 +-
 .../tests/src/Functional/StandardTest.php     |   2 +-
 .../Tests/Component/Utility/HtmlTest.php      |   2 +-
 .../Tests/Component/Utility/XssTest.php       |  10 +
 19 files changed, 296 insertions(+), 202 deletions(-)
 create mode 100644 core/lib/Drupal/Component/Utility/HtmlSerializerRules.php
 create mode 100644 core/modules/big_pipe/big_pipe.post_update.php

diff --git a/core/lib/Drupal/Component/Utility/Html.php b/core/lib/Drupal/Component/Utility/Html.php
index 4d460e068b19..d946e23ba9aa 100644
--- a/core/lib/Drupal/Component/Utility/Html.php
+++ b/core/lib/Drupal/Component/Utility/Html.php
@@ -2,6 +2,9 @@
 
 namespace Drupal\Component\Utility;
 
+use Masterminds\HTML5;
+use Masterminds\HTML5\Serializer\Traverser;
+
 /**
  * Provides DOMDocument helpers for parsing and serializing HTML strings.
  *
@@ -146,7 +149,7 @@ public static function setIsAjax($is_ajax) {
    * This function ensures that each passed HTML ID value only exists once on
    * the page. By tracking the already returned ids, this function enables
    * forms, blocks, and other content to be output multiple times on the same
-   * page, without breaking (X)HTML validation.
+   * page, without breaking HTML validation.
    *
    * For already existing IDs, a counter is appended to the ID string.
    * Therefore, JavaScript and CSS code should not rely on any value that was
@@ -258,49 +261,39 @@ public static function normalize($html) {
   /**
    * Parses an HTML snippet and returns it as a DOM object.
    *
-   * This function loads the body part of a partial (X)HTML document and returns
-   * a full \DOMDocument object that represents this document.
+   * This function loads the body part of a partial HTML document and returns a
+   * full \DOMDocument object that represents this document.
    *
    * Use \Drupal\Component\Utility\Html::serialize() to serialize this
    * \DOMDocument back to a string.
    *
    * @param string $html
-   *   The partial (X)HTML snippet to load. Invalid markup will be corrected on
+   *   The partial HTML snippet to load. Invalid markup will be corrected on
    *   import.
    *
    * @return \DOMDocument
-   *   A \DOMDocument that represents the loaded (X)HTML snippet.
+   *   A \DOMDocument that represents the loaded HTML snippet.
    */
   public static function load($html) {
     $document = <<<EOD
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head>
-<body>!html</body>
+<!DOCTYPE html>
+<html>
+<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>
+<body>$html</body>
 </html>
 EOD;
 
-    // PHP's \DOMDocument::saveXML() encodes carriage returns as &#13; so
-    // normalize all newlines to line feeds.
-    $html = str_replace(["\r\n", "\r"], "\n", $html);
-
-    // PHP's \DOMDocument serialization adds extra whitespace when the markup
-    // of the wrapping document contains newlines, so ensure we remove all
-    // newlines before injecting the actual HTML body to be processed.
-    $document = strtr($document, ["\n" => '', '!html' => $html]);
-
-    $dom = new \DOMDocument();
-    // Ignore warnings during HTML soup loading.
-    @$dom->loadHTML($document, LIBXML_NOBLANKS);
-
-    return $dom;
+    // Instantiate the HTML5 parser, but without the HTML5 namespace being
+    // added to the DOM document.
+    $html5 = new HTML5(['disable_html_ns' => TRUE]);
+    return $html5->loadHTML($document);
   }
 
   /**
    * Converts the body of a \DOMDocument back to an HTML snippet.
    *
-   * The function serializes the body part of a \DOMDocument back to an (X)HTML
-   * snippet. The resulting (X)HTML snippet will be properly formatted to be
+   * The function serializes the body part of a \DOMDocument back to an HTML
+   * snippet. The resulting HTML snippet will be properly formatted to be
    * compatible with HTML user agents.
    *
    * @param \DOMDocument $document
@@ -308,7 +301,7 @@ public static function load($html) {
    *   node will be converted.
    *
    * @return string
-   *   A valid (X)HTML snippet, as a string.
+   *   A valid HTML snippet, as a string.
    */
   public static function serialize(\DOMDocument $document) {
     $body_node = $document->getElementsByTagName('body')->item(0);
@@ -321,10 +314,23 @@ public static function serialize(\DOMDocument $document) {
       foreach ($body_node->getElementsByTagName('style') as $node) {
         static::escapeCdataElement($node, '/*', '*/');
       }
+
+      // Serialize the body using our custom set of rules.
+      // @see \Masterminds\HTML5::saveHTML()
+      $stream = fopen('php://temp', 'wb');
+      $rules = new HtmlSerializerRules($stream);
       foreach ($body_node->childNodes as $node) {
-        $html .= $document->saveXML($node);
+        $traverser = new Traverser($node, $stream, $rules);
+        $traverser->walk();
       }
+      $rules->unsetTraverser();
+      $html = stream_get_contents($stream, -1, 0);
+      fclose($stream);
     }
+
+    // Normalize all newlines.
+    $html = str_replace(["\r\n", "\r"], "\n", $html);
+
     return $html;
   }
 
@@ -455,13 +461,13 @@ public static function escape($text): string {
    * and email.
    *
    * @param string $html
-   *   The partial (X)HTML snippet to load. Invalid markup will be corrected on
+   *   The partial HTML snippet to load. Invalid markup will be corrected on
    *   import.
    * @param string $scheme_and_host
    *   The root URL, which has a URI scheme, host and optional port.
    *
    * @return string
-   *   The updated (X)HTML snippet.
+   *   The updated HTML snippet.
    */
   public static function transformRootRelativeUrlsToAbsolute($html, $scheme_and_host) {
     assert(empty(array_diff(array_keys(parse_url($scheme_and_host)), ["scheme", "host", "port"])), '$scheme_and_host contains scheme, host and port at most.');
diff --git a/core/lib/Drupal/Component/Utility/HtmlSerializerRules.php b/core/lib/Drupal/Component/Utility/HtmlSerializerRules.php
new file mode 100644
index 000000000000..3559850053d6
--- /dev/null
+++ b/core/lib/Drupal/Component/Utility/HtmlSerializerRules.php
@@ -0,0 +1,39 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace Drupal\Component\Utility;
+
+use Masterminds\HTML5\Serializer\OutputRules;
+
+/**
+ * Drupal-specific HTML5 serializer rules.
+ *
+ * Drupal's XSS filtering cannot handle entities inside element attribute
+ * values. The XSS filtering was written based on W3C XML recommendations
+ * which constituted that the ampersand character (&) and the angle
+ * brackets (< and >) must not appear in their literal form in attribute
+ * values. This differs from the HTML living standard which permits angle
+ * brackets.
+ *
+ * @see core/modules/ckeditor5/js/ckeditor5_plugins/drupalHtmlEngine/src/drupalhtmlbuilder.js
+ */
+class HtmlSerializerRules extends OutputRules {
+
+  /**
+   * {@inheritdoc}
+   */
+  protected function escape($text, $attribute = FALSE) {
+    $text = parent::escape($text, $attribute);
+
+    if ($attribute) {
+      $text = strtr($text, [
+        '<' => '&lt;',
+        '>' => '&gt;',
+      ]);
+    }
+
+    return $text;
+  }
+
+}
diff --git a/core/lib/Drupal/Component/Utility/composer.json b/core/lib/Drupal/Component/Utility/composer.json
index da6c4a096be8..f20959e228f8 100644
--- a/core/lib/Drupal/Component/Utility/composer.json
+++ b/core/lib/Drupal/Component/Utility/composer.json
@@ -7,7 +7,8 @@
     "homepage": "https://www.drupal.org/project/drupal",
     "license": "GPL-2.0-or-later",
     "require": {
-        "php": ">=8.1.0"
+        "php": ">=8.1.0",
+        "masterminds/html5": "^2.7"
     },
     "autoload": {
         "psr-4": {
diff --git a/core/lib/Drupal/Core/Render/PlaceholderGenerator.php b/core/lib/Drupal/Core/Render/PlaceholderGenerator.php
index f8906c449318..1cb379ea82e1 100644
--- a/core/lib/Drupal/Core/Render/PlaceholderGenerator.php
+++ b/core/lib/Drupal/Core/Render/PlaceholderGenerator.php
@@ -116,7 +116,11 @@ public function createPlaceholder(array $element) {
     $callback = $placeholder_render_array['#lazy_builder'][0];
     $arguments = UrlHelper::buildQuery($placeholder_render_array['#lazy_builder'][1]);
     $token = Crypt::hashBase64(serialize($placeholder_render_array));
-    $placeholder_markup = '<drupal-render-placeholder callback="' . Html::escape($callback) . '" arguments="' . Html::escape($arguments) . '" token="' . Html::escape($token) . '"></drupal-render-placeholder>';
+    $placeholder_markup = '<drupal-render-placeholder callback="' . Html::escape($callback) . '"';
+    if ($arguments !== '') {
+      $placeholder_markup .= ' arguments="' . Html::escape($arguments) . '"';
+    }
+    $placeholder_markup .= ' token="' . Html::escape($token) . '"></drupal-render-placeholder>';
 
     // Build the placeholder element to return.
     $placeholder_element = [];
diff --git a/core/modules/big_pipe/big_pipe.post_update.php b/core/modules/big_pipe/big_pipe.post_update.php
new file mode 100644
index 000000000000..c28880042f08
--- /dev/null
+++ b/core/modules/big_pipe/big_pipe.post_update.php
@@ -0,0 +1,13 @@
+<?php
+
+/**
+ * @file
+ * Post update functions for Big Pipe.
+ */
+
+/**
+ * Clear the render cache.
+ */
+function big_pipe_post_update_html5_placeholders() {
+  // Empty post_update hook.
+}
diff --git a/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php b/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php
index e10da9aa0c30..602e1118ed81 100644
--- a/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php
+++ b/core/modules/big_pipe/tests/modules/big_pipe_test/src/BigPipePlaceholderTestCases.php
@@ -303,7 +303,7 @@ public static function cases(ContainerInterface $container = NULL, AccountInterf
         '#lazy_builder' => ['\Drupal\big_pipe_test\BigPipeTestController::responseException', []],
         '#create_placeholder' => TRUE,
       ],
-      '<drupal-render-placeholder callback="\Drupal\big_pipe_test\BigPipeTestController::responseException" arguments="" token="' . $token . ' "></drupal-render-placeholder>',
+      '<drupal-render-placeholder callback="\Drupal\big_pipe_test\BigPipeTestController::responseException" arguments token="' . $token . ' "></drupal-render-placeholder>',
       [
         '#lazy_builder' => ['\Drupal\big_pipe_test\BigPipeTestController::responseException', []],
       ]
diff --git a/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php b/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php
index aa85bc1766c1..dc56f9bd1b9c 100644
--- a/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php
+++ b/core/modules/ckeditor5/tests/src/FunctionalJavascript/CKEditor5Test.php
@@ -739,9 +739,8 @@ public function testFilterHtmlAllowedGlobalAttributes(): void {
     $this->waitForEditor();
     $page->pressButton('Save');
 
-    // @todo Remove the expected `xml:lang` attributes in https://www.drupal.org/project/drupal/issues/1333730
     // cSpell:disable-next-line
-    $assert_session->responseContains('<p dir="ltr" lang="en" xml:lang="en">Hello World</p><p dir="rtl" lang="ar" xml:lang="ar">مرحبا بالعالم</p>');
+    $assert_session->responseContains('<p dir="ltr" lang="en">Hello World</p><p dir="rtl" lang="ar">مرحبا بالعالم</p>');
   }
 
 }
diff --git a/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php b/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php
index a5e0787f80c6..9a3023738a7c 100644
--- a/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php
+++ b/core/modules/ckeditor5/tests/src/Kernel/WildcardHtmlSupportTest.php
@@ -159,7 +159,7 @@ public function providerGhsConfiguration(): array {
         ['alignment'],
       ],
       '<$text-container> with attribute from multiple plugins' => [
-        '<p data-llama class"> <br>',
+        '<p data-llama class> <br>',
         ['<$text-container data-llama>', '<p class>'],
         [
           [
diff --git a/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php b/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php
index b764eedaef81..4c5d2555bc09 100644
--- a/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php
+++ b/core/modules/editor/tests/src/Kernel/EditorFileReferenceFilterTest.php
@@ -86,15 +86,15 @@ public function testEditorFileReferenceFilter() {
     $this->assertSame($input, $output->getProcessedText());
 
     // One data-entity-uuid attribute.
-    $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
-    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
+    $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">';
+    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">';
     $output = $test($input);
     $this->assertSame($expected_output, $output->getProcessedText());
     $this->assertEquals($cache_tag, $output->getCacheTags());
 
     // One data-entity-uuid attribute with odd capitalization.
     $input = '<img src="llama.jpg" data-entity-type="file" DATA-entity-UUID =   "' . $uuid . '" />';
-    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
+    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">';
     $output = $test($input);
     $this->assertSame($expected_output, $output->getProcessedText());
     $this->assertEquals($cache_tag, $output->getCacheTags());
@@ -107,7 +107,7 @@ public function testEditorFileReferenceFilter() {
     $this->assertEquals($cache_tag, $output->getCacheTags());
 
     // One data-entity-uuid attribute with an invalid value.
-    $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="invalid-' . $uuid . '" />';
+    $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="invalid-' . $uuid . '">';
     $output = $test($input);
     $this->assertSame($input, $output->getProcessedText());
     $this->assertEquals([], $output->getCacheTags());
@@ -115,8 +115,8 @@ public function testEditorFileReferenceFilter() {
     // Two different data-entity-uuid attributes.
     $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
     $input .= '<img src="alpaca.jpg" data-entity-type="file" data-entity-uuid="' . $uuid_2 . '" />';
-    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
-    $expected_output .= '<img src="/' . $this->siteDirectory . '/files/alpaca.jpg" data-entity-type="file" data-entity-uuid="' . $uuid_2 . '" />';
+    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">';
+    $expected_output .= '<img src="/' . $this->siteDirectory . '/files/alpaca.jpg" data-entity-type="file" data-entity-uuid="' . $uuid_2 . '">';
     $output = $test($input);
     $this->assertSame($expected_output, $output->getProcessedText());
     $this->assertEquals(Cache::mergeTags($cache_tag, $cache_tag_2), $output->getCacheTags());
@@ -124,8 +124,8 @@ public function testEditorFileReferenceFilter() {
     // Two identical  data-entity-uuid attributes.
     $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
     $input .= '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
-    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
-    $expected_output .= '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
+    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">';
+    $expected_output .= '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '">';
     $output = $test($input);
     $this->assertSame($expected_output, $output->getProcessedText());
     $this->assertEquals($cache_tag, $output->getCacheTags());
@@ -140,14 +140,14 @@ public function testEditorFileReferenceFilter() {
 
     // Image dimensions are present.
     $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" />';
-    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" ' . $dimensions . ' />';
+    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" ' . $dimensions . '>';
     $output = $test($input);
     $this->assertSame($expected_output, $output->getProcessedText());
     $this->assertEquals($cache_tag, $output->getCacheTags());
 
     // Image dimensions are set manually.
     $input = '<img src="llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '"width="41" height="21" />';
-    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" width="41" height="21" />';
+    $expected_output = '<img src="/' . $this->siteDirectory . '/files/llama.jpg" data-entity-type="file" data-entity-uuid="' . $uuid . '" width="41" height="21">';
     $output = $test($input);
     $this->assertSame($expected_output, $output->getProcessedText());
     $this->assertEquals($cache_tag, $output->getCacheTags());
diff --git a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php
index 81c4e858d83c..af2990ffbdad 100644
--- a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php
+++ b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php
@@ -517,12 +517,12 @@ public function providerTestFilterXss() {
     // @see \Drupal\editor\EditorXssFilter::filterXssDataAttributes()
 
     // The following two test cases verify that XSS attack vectors are filtered.
-    $data[] = ['<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />', '<img src="butterfly.jpg" data-caption="alert();" />'];
-    $data[] = ['<img src="butterfly.jpg" data-caption="&lt;EMBED SRC=&quot;http://ha.ckers.org/xss.swf&quot; AllowScriptAccess=&quot;always&quot;&gt;&lt;/EMBED&gt;" />', '<img src="butterfly.jpg" data-caption="" />'];
+    $data[] = ['<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />', '<img src="butterfly.jpg" data-caption="alert();">'];
+    $data[] = ['<img src="butterfly.jpg" data-caption="&lt;EMBED SRC=&quot;http://ha.ckers.org/xss.swf&quot; AllowScriptAccess=&quot;always&quot;&gt;&lt;/EMBED&gt;" />', '<img src="butterfly.jpg" data-caption>'];
 
     // When including HTML-tags as visible content, they are double-escaped.
     // This test case ensures that we leave that content unchanged.
-    $data[] = ['<img src="butterfly.jpg" data-caption="&amp;lt;script&amp;gt;alert();&amp;lt;/script&amp;gt;" />', '<img src="butterfly.jpg" data-caption="&amp;lt;script&amp;gt;alert();&amp;lt;/script&amp;gt;" />'];
+    $data[] = ['<img src="butterfly.jpg" data-caption="&amp;lt;script&amp;gt;alert();&amp;lt;/script&amp;gt;" />', '<img src="butterfly.jpg" data-caption="&amp;lt;script&amp;gt;alert();&amp;lt;/script&amp;gt;">'];
 
     return $data;
   }
diff --git a/core/modules/filter/src/Plugin/Filter/FilterHtml.php b/core/modules/filter/src/Plugin/Filter/FilterHtml.php
index 88cf3484a0aa..345ae193dea9 100644
--- a/core/modules/filter/src/Plugin/Filter/FilterHtml.php
+++ b/core/modules/filter/src/Plugin/Filter/FilterHtml.php
@@ -250,22 +250,17 @@ public function getHTMLRestrictions() {
     // Parse the allowed HTML setting, and gradually make the list of allowed
     // tags more specific.
     $restrictions = ['allowed' => []];
+    $html = $this->settings['allowed_html'];
 
-    // Make all the tags self-closing, so they will be parsed into direct
-    // children of the body tag in the DomDocument.
-    $html = str_replace('>', ' />', $this->settings['allowed_html']);
     // Protect any trailing * characters in attribute names, since DomDocument
     // strips them as invalid.
     // cSpell:disable-next-line
     $star_protector = '__zqh6vxfbk3cg__';
     $html = str_replace('*', $star_protector, $html);
-    $body_child_nodes = Html::load($html)->getElementsByTagName('body')->item(0)->childNodes;
 
-    foreach ($body_child_nodes as $node) {
-      if ($node->nodeType !== XML_ELEMENT_NODE) {
-        // Skip the empty text nodes inside tags.
-        continue;
-      }
+    $dom = Html::load($html);
+    $xpath = new \DOMXPath($dom);
+    foreach ($xpath->query('//body//*') as $node) {
       $tag = $node->tagName;
 
       // All attributes are already allowed on this tag, this is the most
diff --git a/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php b/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php
index 6ebffd1f810a..eaeca4304d25 100644
--- a/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php
+++ b/core/modules/filter/tests/src/Kernel/FilterCaptionTwigDebugTest.php
@@ -53,7 +53,7 @@ public function testCaptionFilter() {
 
     // Data-caption attribute.
     $input = '<img src="llama.jpg" data-caption="Loquacious llama!" />';
-    $expected = '<img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption>';
+    $expected = '<img src="llama.jpg">' . "\n" . '<figcaption>Loquacious llama!</figcaption>';
     $output = $test($input)->getProcessedText();
     $this->assertStringContainsString($expected, $output);
     $this->assertStringContainsString("<!-- THEME HOOK: 'filter_caption' -->", $output);
diff --git a/core/modules/filter/tests/src/Kernel/FilterKernelTest.php b/core/modules/filter/tests/src/Kernel/FilterKernelTest.php
index 0fa0d2885460..ff20df0c95fc 100644
--- a/core/modules/filter/tests/src/Kernel/FilterKernelTest.php
+++ b/core/modules/filter/tests/src/Kernel/FilterKernelTest.php
@@ -60,40 +60,40 @@ public function testAlignFilter() {
 
     // Data-align attribute: all 3 allowed values.
     $input = '<img src="llama.jpg" data-align="left" />';
-    $expected = '<img src="llama.jpg" class="align-left" />';
+    $expected = '<img src="llama.jpg" class="align-left">';
     $this->assertSame($expected, $test($input)->getProcessedText());
     $input = '<img src="llama.jpg" data-align="center" />';
-    $expected = '<img src="llama.jpg" class="align-center" />';
+    $expected = '<img src="llama.jpg" class="align-center">';
     $this->assertSame($expected, $test($input)->getProcessedText());
     $input = '<img src="llama.jpg" data-align="right" />';
-    $expected = '<img src="llama.jpg" class="align-right" />';
+    $expected = '<img src="llama.jpg" class="align-right">';
     $this->assertSame($expected, $test($input)->getProcessedText());
 
     // Data-align attribute: a disallowed value.
     $input = '<img src="llama.jpg" data-align="left foobar" />';
-    $expected = '<img src="llama.jpg" />';
+    $expected = '<img src="llama.jpg">';
     $this->assertSame($expected, $test($input)->getProcessedText());
 
     // Empty data-align attribute.
     $input = '<img src="llama.jpg" data-align="" />';
-    $expected = '<img src="llama.jpg" />';
+    $expected = '<img src="llama.jpg">';
     $this->assertSame($expected, $test($input)->getProcessedText());
 
     // Ensure the filter also works with uncommon yet valid attribute quoting.
     $input = '<img src=llama.jpg data-align=right />';
-    $expected = '<img src="llama.jpg" class="align-right" />';
+    $expected = '<img src="llama.jpg" class="align-right">';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
 
     // Security test: attempt to inject an additional class.
     $input = '<img src="llama.jpg" data-align="center another-class-here" />';
-    $expected = '<img src="llama.jpg" />';
+    $expected = '<img src="llama.jpg">';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
 
     // Security test: attempt an XSS.
     $input = '<img src="llama.jpg" data-align="center \'onclick=\'alert(foo);" />';
-    $expected = '<img src="llama.jpg" />';
+    $expected = '<img src="llama.jpg">';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
   }
@@ -125,26 +125,38 @@ public function testCaptionFilter() {
 
     // Data-caption attribute.
     $input = '<img src="llama.jpg" data-caption="Loquacious llama!" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
 
     // Empty data-caption attribute.
     $input = '<img src="llama.jpg" data-caption="" />';
-    $expected = '<img src="llama.jpg" />';
+    $expected = '<img src="llama.jpg">';
     $this->assertSame($expected, $test($input)->getProcessedText());
 
     // HTML entities in the caption.
     $input = '<img src="llama.jpg" data-caption="&ldquo;Loquacious llama!&rdquo;" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>“Loquacious llama!”</figcaption></figure>';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>“Loquacious llama!”</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
 
     // HTML encoded as HTML entities in data-caption attribute.
     $input = '<img src="llama.jpg" data-caption="&lt;em&gt;Loquacious llama!&lt;/em&gt;" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption><em>Loquacious llama!</em></figcaption></figure>';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption><em>Loquacious llama!</em></figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
@@ -153,40 +165,64 @@ public function testCaptionFilter() {
     // not allowed by the HTML spec, but may happen when people manually write
     // HTML, so we explicitly support it.
     $input = '<img src="llama.jpg" data-caption="<em>Loquacious llama!</em>" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption><em>Loquacious llama!</em></figcaption></figure>';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption><em>Loquacious llama!</em></figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
 
     // Security test: attempt an XSS.
     $input = '<img src="llama.jpg" data-caption="<script>alert(\'Loquacious llama!\')</script>" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>alert(\'Loquacious llama!\')</figcaption></figure>';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>alert(\'Loquacious llama!\')</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
 
     // Ensure the filter also works with uncommon yet valid attribute quoting.
     $input = '<img src=llama.jpg data-caption=\'Loquacious llama!\' />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
 
     // Finally, ensure that this also works on any other tag.
     $input = '<video src="llama.jpg" data-caption="Loquacious llama!" />';
-    $expected = '<figure role="group"><video src="llama.jpg"></video><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group">
+<video src="llama.jpg"></video>
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
     $input = '<foobar data-caption="Loquacious llama!">baz</foobar>';
-    $expected = '<figure role="group"><foobar>baz</foobar><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group">
+<foobar>baz</foobar>
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
 
     // Ensure the caption filter works for linked images.
     $input = '<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" data-caption="Loquacious llama!" /></a>';
-    $expected = '<figure role="group"><a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" /></a>' . "\n" . '<figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group">
+<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg"></a>
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
@@ -219,46 +255,74 @@ public function testCaptionFilter() {
 
     // All the tricky cases encountered at https://www.drupal.org/node/2105841.
     // A plain URL preceded by text.
-    $input = '<img data-caption="See https://www.drupal.org" src="llama.jpg" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>See https://www.drupal.org</figcaption></figure>';
+    $input = '<img data-caption="See https://www.drupal.org" src="llama.jpg">';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>See https://www.drupal.org</figcaption>
+</figure>
+';
     $this->assertSame($expected, $test_with_html_filter($input));
     $this->assertSame($input, $test_editor_xss_filter($input));
 
     // An anchor.
-    $input = '<img data-caption="This is a &lt;a href=&quot;https://www.drupal.org&quot;&gt;quick&lt;/a&gt; test…" src="llama.jpg" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>This is a <a href="https://www.drupal.org">quick</a> test…</figcaption></figure>';
+    $input = '<img data-caption="This is a &lt;a href=&quot;https://www.drupal.org&quot;&gt;quick&lt;/a&gt; test…" src="llama.jpg">';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>This is a <a href="https://www.drupal.org">quick</a> test…</figcaption>
+</figure>
+';
     $this->assertSame($expected, $test_with_html_filter($input));
     $this->assertSame($input, $test_editor_xss_filter($input));
 
     // A plain URL surrounded by parentheses.
-    $input = '<img data-caption="(https://www.drupal.org)" src="llama.jpg" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>(https://www.drupal.org)</figcaption></figure>';
+    $input = '<img data-caption="(https://www.drupal.org)" src="llama.jpg">';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>(https://www.drupal.org)</figcaption>
+</figure>
+';
     $this->assertSame($expected, $test_with_html_filter($input));
     $this->assertSame($input, $test_editor_xss_filter($input));
 
     // A source being credited.
-    $input = '<img data-caption="Source: Wikipedia" src="llama.jpg" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Source: Wikipedia</figcaption></figure>';
+    $input = '<img data-caption="Source: Wikipedia" src="llama.jpg">';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>Source: Wikipedia</figcaption>
+</figure>
+';
     $this->assertSame($expected, $test_with_html_filter($input));
     $this->assertSame($input, $test_editor_xss_filter($input));
 
     // A source being credited, without a space after the colon.
-    $input = '<img data-caption="Source:Wikipedia" src="llama.jpg" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Source:Wikipedia</figcaption></figure>';
+    $input = '<img data-caption="Source:Wikipedia" src="llama.jpg">';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>Source:Wikipedia</figcaption>
+</figure>
+';
     $this->assertSame($expected, $test_with_html_filter($input));
     $this->assertSame($input, $test_editor_xss_filter($input));
 
     // A pretty crazy edge case where we have two colons.
-    $input = '<img data-caption="Interesting (Scope resolution operator ::)" src="llama.jpg" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Interesting (Scope resolution operator ::)</figcaption></figure>';
+    $input = '<img data-caption="Interesting (Scope resolution operator ::)" src="llama.jpg">';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>Interesting (Scope resolution operator ::)</figcaption>
+</figure>
+';
     $this->assertSame($expected, $test_with_html_filter($input));
     $this->assertSame($input, $test_editor_xss_filter($input));
 
     // An evil anchor (to ensure XSS filtering is applied to the caption also).
-    $input = '<img data-caption="This is an &lt;a href=&quot;javascript:alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>This is an <a href="alert();">evil</a> test…</figcaption></figure>';
+    $input = '<img data-caption="This is an &lt;a href=&quot;javascript:alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg">';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>This is an <a href="alert();">evil</a> test…</figcaption>
+</figure>
+';
     $this->assertSame($expected, $test_with_html_filter($input));
-    $expected_xss_filtered = '<img data-caption="This is an &lt;a href=&quot;alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg" />';
+    $expected_xss_filtered = '<img data-caption="This is an &lt;a href=&quot;alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg">';
     $this->assertSame($expected_xss_filtered, $test_editor_xss_filter($input));
   }
 
@@ -286,17 +350,29 @@ public function testAlignAndCaptionFilters() {
     // Both data-caption and data-align attributes: all 3 allowed values for the
     // data-align attribute.
     $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="left" />';
-    $expected = '<figure role="group" class="align-left"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group" class="align-left">
+<img src="llama.jpg">
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
     $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="center" />';
-    $expected = '<figure role="group" class="align-center"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group" class="align-center">
+<img src="llama.jpg">
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
     $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="right" />';
-    $expected = '<figure role="group" class="align-right"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group" class="align-right">
+<img src="llama.jpg">
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
@@ -304,14 +380,22 @@ public function testAlignAndCaptionFilters() {
     // Both data-caption and data-align attributes, but a disallowed data-align
     // attribute value.
     $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="left foobar" />';
-    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group">
+<img src="llama.jpg">
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
 
     // Ensure both filters together work for linked images.
     $input = '<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" data-caption="Loquacious llama!" data-align="center" /></a>';
-    $expected = '<figure role="group" class="align-center"><a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg" /></a>' . "\n" . '<figcaption>Loquacious llama!</figcaption></figure>';
+    $expected = '<figure role="group" class="align-center">
+<a href="http://example.com/llamas/are/awesome/but/kittens/are/cool/too"><img src="llama.jpg"></a>
+<figcaption>Loquacious llama!</figcaption>
+</figure>
+';
     $output = $test($input);
     $this->assertSame($expected, $output->getProcessedText());
     $this->assertSame($attached_library, $output->getAttachments());
@@ -455,14 +539,13 @@ public function testHtmlFilter() {
 
     $f = (string) $filter->process('<code onerror>&nbsp;</code>', Language::LANGCODE_NOT_SPECIFIED);
     $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove empty on* attributes.');
-    // Note - this string has a decoded &nbsp; character.
-    $this->assertSame('<code> </code>', $f);
+    $this->assertSame('<code>&nbsp;</code>', $f);
 
     $f = (string) $filter->process('<br>', Language::LANGCODE_NOT_SPECIFIED);
-    $this->assertNormalized($f, '<br />', 'HTML filter should allow line breaks.');
+    $this->assertNormalized($f, '<br>', 'HTML filter should allow line breaks.');
 
     $f = (string) $filter->process('<br />', Language::LANGCODE_NOT_SPECIFIED);
-    $this->assertNormalized($f, '<br />', 'HTML filter should allow self-closing line breaks.');
+    $this->assertNormalized($f, '<br>', 'HTML filter should allow self-closing line breaks.');
 
     // All attributes of allowed tags are stripped by default.
     $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
@@ -934,9 +1017,8 @@ public function testHtmlCorrectorFilter() {
     $f = Html::normalize('<div id="d">content');
     $this->assertEquals('<div id="d">content</div>', $f, 'HTML corrector -- unclosed tag with attribute.');
 
-    // XHTML slash for empty elements.
     $f = Html::normalize('<hr><br>');
-    $this->assertEquals('<hr /><br />', $f, 'HTML corrector -- XHTML closing slash.');
+    $this->assertEquals('<hr><br>', $f, 'HTML corrector -- void element.');
 
     $f = Html::normalize('<P>test</P>');
     $this->assertEquals('<p>test</p>', $f, 'HTML corrector -- Convert uppercased tags to proper lowercased ones.');
@@ -945,37 +1027,37 @@ public function testHtmlCorrectorFilter() {
     $this->assertEquals('<p>test</p>', $f, 'HTML corrector -- Convert uppercased tags to proper lowercased ones.');
 
     $f = Html::normalize('test<hr />');
-    $this->assertEquals('test<hr />', $f, 'HTML corrector -- Let proper XHTML pass through.');
+    $this->assertEquals('test<hr>', $f, 'HTML corrector -- convert self-closing element to HTML5 void element.');
 
     $f = Html::normalize('test<hr/>');
-    $this->assertEquals('test<hr />', $f, 'HTML corrector -- Let proper XHTML pass through, but ensure there is a single space before the closing slash.');
+    $this->assertEquals('test<hr>', $f, 'HTML corrector -- convert self-closing element to HTML5 void element.');
 
     $f = Html::normalize('test<hr    />');
-    $this->assertEquals('test<hr />', $f, 'HTML corrector -- Let proper XHTML pass through, but ensure there are not too many spaces before the closing slash.');
+    $this->assertEquals('test<hr>', $f, 'HTML corrector -- convert self-closing element with multiple spaces to HTML5 void element.');
 
     $f = Html::normalize('<span class="test" />');
     $this->assertEquals('<span class="test"></span>', $f, 'HTML corrector -- Convert XHTML that is properly formed but that would not be compatible with typical HTML user agents.');
 
     $f = Html::normalize('test1<br class="test">test2');
-    $this->assertEquals('test1<br class="test" />test2', $f, 'HTML corrector -- Automatically close single tags.');
+    $this->assertEquals('test1<br class="test">test2', $f, 'HTML corrector -- Keep self-closing tags.');
 
     $f = Html::normalize('line1<hr>line2');
-    $this->assertEquals('line1<hr />line2', $f, 'HTML corrector -- Automatically close single tags.');
+    $this->assertEquals('line1<hr>line2', $f, 'HTML corrector -- Keep self-closing tags.');
 
     $f = Html::normalize('line1<HR>line2');
-    $this->assertEquals('line1<hr />line2', $f, 'HTML corrector -- Automatically close single tags.');
+    $this->assertEquals('line1<hr>line2', $f, 'HTML corrector -- Keep self-closing tags.');
 
     $f = Html::normalize('<img src="http://example.com/test.jpg">test</img>');
-    $this->assertEquals('<img src="http://example.com/test.jpg" />test', $f, 'HTML corrector -- Automatically close single tags.');
+    $this->assertEquals('<img src="http://example.com/test.jpg">test', $f, 'HTML corrector -- Fix self-closing single tags.');
 
     $f = Html::normalize('<br></br>');
-    $this->assertEquals('<br />', $f, "HTML corrector -- Transform empty tags to a single closed tag if the tag's content model is EMPTY.");
+    $this->assertEquals('<br><br>', $f, "HTML corrector -- Transform empty tags to a self-closed tag if the tag's content model is EMPTY.");
 
     $f = Html::normalize('<div></div>');
     $this->assertEquals('<div></div>', $f, "HTML corrector -- Do not transform empty tags to a single closed tag if the tag's content model is not EMPTY.");
 
     $f = Html::normalize('<p>line1<br/><hr/>line2</p>');
-    $this->assertEquals('<p>line1<br /></p><hr />line2', $f, 'HTML corrector -- Move non-inline elements outside of inline containers.');
+    $this->assertEquals('<p>line1<br></p><hr>line2', $f, 'HTML corrector -- Move non-inline elements outside of inline containers.');
 
     $f = Html::normalize('<p>line1<div>line2</div></p>');
     $this->assertEquals('<p>line1</p><div>line2</div>', $f, 'HTML corrector -- Move non-inline elements outside of inline containers.');
@@ -984,7 +1066,7 @@ public function testHtmlCorrectorFilter() {
     $this->assertEquals('<p>test</p><p>test</p>\n', $f, 'HTML corrector -- Auto-close improperly nested tags.');
 
     $f = Html::normalize('<p>Line1<br><STRONG>bold stuff</b>');
-    $this->assertEquals('<p>Line1<br /><strong>bold stuff</strong></p>', $f, 'HTML corrector -- Properly close unclosed tags, and remove useless closing tags.');
+    $this->assertEquals('<p>Line1<br><strong>bold stuff</strong></p>', $f, 'HTML corrector -- Properly close unclosed tags, and remove useless closing tags.');
 
     $f = Html::normalize('test <!-- this is a comment -->');
     $this->assertEquals('test <!-- this is a comment -->', $f, 'HTML corrector -- Do not touch HTML comments.');
@@ -1013,112 +1095,59 @@ public function testHtmlCorrectorFilter() {
     $this->assertEquals('<p>دروبال</p>', $f, 'HTML corrector -- Encoding is correctly kept.');
     // cSpell:enable
 
-    $f = Html::normalize('<script>alert("test")</script>');
-    $this->assertEquals('<script>
-//<![CDATA[
-alert("test")
-//]]>
-</script>', $f, 'HTML corrector -- CDATA added to script element');
+    $html = '<script>alert("test")</script>';
+    $this->assertEquals($html, Html::normalize($html), 'HTML corrector -- script element');
 
-    $f = Html::normalize('<p><script>alert("test")</script></p>');
-    $this->assertEquals('<p><script>
-//<![CDATA[
-alert("test")
-//]]>
-</script></p>', $f, 'HTML corrector -- CDATA added to a nested script element');
+    $html = '<p><script>alert("test")</script></p>';
+    $this->assertEquals($html, Html::normalize($html), 'HTML corrector -- nested script element');
 
-    $f = Html::normalize('<p><style> /* Styling */ body {color:red}</style></p>');
-    $this->assertEquals('<p><style>
-/*<![CDATA[*/
- /* Styling */ body {color:red}
-/*]]>*/
-</style></p>', $f, 'HTML corrector -- CDATA added to a style element.');
+    $html = '<p><style> /* Styling */ body {color:red}</style></p>';
+    $this->assertEquals($html, Html::normalize($html), 'HTML corrector -- style element.');
 
-    $filtered_data = Html::normalize('<p><style>
-/*<![CDATA[*/
-/* Styling */
-body {color:red}
-/*]]>*/
-</style></p>');
-    $this->assertEquals('<p><style>
+    $html = '<p><style>
 /*<![CDATA[*/
 /* Styling */
 body {color:red}
 /*]]>*/
-</style></p>', $filtered_data,
-      new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '/*<![CDATA[*/'])
-    );
+</style></p>';
+    $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '/*<![CDATA[*/']));
 
-    $filtered_data = Html::normalize('<p><style>
-/*<![CDATA[*/
-  /* Styling */
-  body {color:red}
-/*]]>*/
-</style></p>');
-    $this->assertEquals('<p><style>
+    $html = '<p><style>
 /*<![CDATA[*/
   /* Styling */
   body {color:red}
 /*]]>*/
-</style></p>', $filtered_data,
-      new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--/*--><![CDATA[/* ><!--*/'])
-    );
+</style></p>';
+    $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--/*--><![CDATA[/* ><!--*/']));
 
-    $filtered_data = Html::normalize('<p><script>
+    $html = '<p><script>
 //<![CDATA[
   alert("test");
 //]]>
-</script></p>');
-    $this->assertEquals('<p><script>
-//<![CDATA[
-  alert("test");
-//]]>
-</script></p>', $filtered_data,
-      new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--//--><![CDATA[// ><!--'])
-    );
+</script></p>';
+    $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '<!--//--><![CDATA[// ><!--']));
 
-    $filtered_data = Html::normalize('<p><script>
+    $html = '<p><script>
 // <![CDATA[
   alert("test");
 //]]>
-</script></p>');
-    $this->assertEquals('<p><script>
-// <![CDATA[
-  alert("test");
-//]]>
-</script></p>', $filtered_data,
-      new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA['])
-    );
+</script></p>';
+    $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[']));
 
-    $filtered_data = Html::normalize('<p><script>
-// <![CDATA[![CDATA[![CDATA[
-  alert("test");
-//]]]]]]>
-</script></p>');
-    $this->assertEquals('<p><script>
+    $html = '<p><script>
 // <![CDATA[![CDATA[![CDATA[
   alert("test");
 //]]]]]]>
-</script></p>', $filtered_data,
-      new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA['])
-    );
+</script></p>';
+    $this->assertEquals($html, Html::normalize($html), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA[']));
 
     // Test calling Html::normalize() twice.
-    $filtered_data = Html::normalize('<p><script>
+    $html = '<p><script>
 // <![CDATA[![CDATA[![CDATA[
   alert("test");
 //]]]]]]>
-</script></p>');
-    $filtered_data = Html::normalize($filtered_data);
-
-    $this->assertEquals('<p><script>
-// <![CDATA[![CDATA[![CDATA[
-  alert("test");
-//]]]]]]>
-</script></p>', $filtered_data,
-      new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA['])
-    );
-
+</script></p>';
+    $this->assertEquals($html, Html::normalize(Html::normalize($html)), new FormattableMarkup('HTML corrector -- Existing cdata section @pattern_name properly escaped', ['@pattern_name' => '// <![CDATA[![CDATA[![CDATA[']));
   }
 
   /**
diff --git a/core/modules/filter/tests/src/Unit/FilterHtmlTest.php b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php
index 56740c8b3bef..7ffb1691dd09 100644
--- a/core/modules/filter/tests/src/Unit/FilterHtmlTest.php
+++ b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php
@@ -56,9 +56,7 @@ public function providerFilterAttributes() {
       ['<p dir="rtl" />', '<p dir="rtl"></p>'],
       ['<p dir="bogus" />', '<p></p>'],
       ['<p id="first" />', '<p></p>'],
-      // The addition of xml:lang isn't especially desired, but is still valid
-      // HTML5. See https://www.drupal.org/node/1333730.
-      ['<p id="first" lang="en">text</p>', '<p lang="en" xml:lang="en">text</p>'],
+      ['<p id="first" lang="en">text</p>', '<p lang="en">text</p>'],
       ['<p style="display: none;" />', '<p></p>'],
       ['<code class="pretty invalid">foreach ($a as $b) {}</code>', '<code class="pretty">foreach ($a as $b) {}</code>'],
       ['<code class="boring pretty">foreach ($a as $b) {}</code>', '<code class="boring pretty">foreach ($a as $b) {}</code>'],
diff --git a/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php b/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php
index 6da46a4a55fa..9686beb343a0 100644
--- a/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php
+++ b/core/modules/filter/tests/src/Unit/FilterImageLazyLoadTest.php
@@ -50,27 +50,27 @@ public function providerHtml(): array {
     return [
       'lazy loading attribute already added' => [
         'input' => '<p><img src="foo.png" loading="lazy"></p>',
-        'output' => '<p><img src="foo.png" loading="lazy" /></p>',
+        'output' => '<p><img src="foo.png" loading="lazy"></p>',
       ],
       'eager loading attribute already added' => [
         'input' => '<p><img src="foo.png" loading="eager"/></p>',
-        'output' => '<p><img src="foo.png" loading="eager" /></p>',
+        'output' => '<p><img src="foo.png" loading="eager"></p>',
       ],
       'image dimensions provided' => [
         'input' => '<p><img src="foo.png" width="200" height="200"/></p>',
-        '<p><img src="foo.png" width="200" height="200" loading="lazy" /></p>',
+        'output' => '<p><img src="foo.png" width="200" height="200" loading="lazy"></p>',
       ],
       'width image dimensions provided' => [
         'input' => '<p><img src="foo.png" width="200"/></p>',
-        '<p><img src="foo.png" width="200" /></p>',
+        'output' => '<p><img src="foo.png" width="200"></p>',
       ],
       'height image dimensions provided' => [
         'input' => '<p><img src="foo.png" height="200"/></p>',
-        '<p><img src="foo.png" height="200" /></p>',
+        'output' => '<p><img src="foo.png" height="200"></p>',
       ],
       'invalid loading attribute' => [
         'input' => '<p><img src="foo.png" width="200" height="200" loading="foo"></p>',
-        'output' => '<p><img src="foo.png" width="200" height="200" loading="lazy" /></p>',
+        'output' => '<p><img src="foo.png" width="200" height="200" loading="lazy"></p>',
       ],
       'no image tag' => [
         'input' => '<p>Lorem ipsum...</p>',
@@ -78,7 +78,7 @@ public function providerHtml(): array {
       ],
       'no image dimensions provided' => [
         'input' => '<p><img src="foo.png"></p>',
-        'output' => '<p><img src="foo.png" /></p>',
+        'output' => '<p><img src="foo.png"></p>',
       ],
     ];
   }
diff --git a/core/modules/system/tests/src/Kernel/Mail/MailTest.php b/core/modules/system/tests/src/Kernel/Mail/MailTest.php
index 8398fd24ebd6..3b99732d6971 100644
--- a/core/modules/system/tests/src/Kernel/Mail/MailTest.php
+++ b/core/modules/system/tests/src/Kernel/Mail/MailTest.php
@@ -324,7 +324,7 @@ public function testRenderedElementsUseAbsolutePaths() {
         '#theme' => 'image',
         '#uri' => $input_path,
       ];
-      $expected_html = "<img src=\"$expected_path\" alt=\"\" />";
+      $expected_html = "<img src=\"$expected_path\" alt>\n";
 
       // Send a test message that mail_cancel_test_mail_alter should cancel.
       \Drupal::service('plugin.manager.mail')->mail('mail_html_test', 'render_from_message_param', 'relative_url@example.com', $language_interface->getId(), ['message' => $render]);
diff --git a/core/profiles/standard/tests/src/Functional/StandardTest.php b/core/profiles/standard/tests/src/Functional/StandardTest.php
index 40a04624ac4a..15ed8479bbe9 100644
--- a/core/profiles/standard/tests/src/Functional/StandardTest.php
+++ b/core/profiles/standard/tests/src/Functional/StandardTest.php
@@ -85,7 +85,7 @@ public function testStandard() {
     $this->drupalLogin($this->adminUser);
     $this->drupalGet('node/1');
     // Verify that a line break is present.
-    $this->assertSession()->responseContains('Then she picked out two somebodies,<br />Sally and me');
+    $this->assertSession()->responseContains('Then she picked out two somebodies,<br>Sally and me');
     $this->submitForm([
       'subject[0][value]' => 'Barfoo',
       'comment_body[0][value]' => 'Then she picked out two somebodies, Sally and me',
diff --git a/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php b/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php
index 3ee9af464992..b2867ec012dd 100644
--- a/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php
+++ b/core/tests/Drupal/Tests/Component/Utility/HtmlTest.php
@@ -392,7 +392,7 @@ public function providerTestTransformRootRelativeUrlsToAbsolute() {
 
     // Double-character carriage return should be normalized.
     $data['line break with double special character'] = ["Test without links but with\r\nsome special characters", 'http://example.com', "Test without links but with\nsome special characters"];
-    $data['line break with single special character'] = ["Test without links but with&#13;\nsome special characters", 'http://example.com', FALSE];
+    $data['line break with single special character'] = ["Test without links but with&#13;\nsome special characters", 'http://example.com', "Test without links but with\nsome special characters"];
     $data['carriage return within html'] = ["<a\rhref='/node'>My link</a>", 'http://example.com', '<a href="http://example.com/node">My link</a>'];
 
     return $data;
diff --git a/core/tests/Drupal/Tests/Component/Utility/XssTest.php b/core/tests/Drupal/Tests/Component/Utility/XssTest.php
index 7965b198a8bd..e076e977b392 100644
--- a/core/tests/Drupal/Tests/Component/Utility/XssTest.php
+++ b/core/tests/Drupal/Tests/Component/Utility/XssTest.php
@@ -607,6 +607,16 @@ public function providerTestFilterXssAdminNotNormalized() {
     ];
   }
 
+  /**
+   * Checks that escaped HTML embedded in an attribute is not filtered.
+   *
+   * @see \Drupal\Component\Utility\HtmlSerializerRules
+   */
+  public function testFilterNormalizedHtml5() {
+    $input = '<span data-caption="foo &lt;em&gt;bar&lt;/em&gt;"></span>';
+    $this->assertEquals($input, Xss::filter(Html::normalize($input), ['span']));
+  }
+
   /**
    * Asserts that a text transformed to lowercase with HTML entities decoded does contain a given string.
    *
-- 
GitLab