From 3ae37397adf8a74bcee19dd03935ea536ff2b9ac Mon Sep 17 00:00:00 2001 From: Lee Rowlands <lee.rowlands@previousnext.com.au> Date: Tue, 2 Jan 2024 08:31:58 +1000 Subject: [PATCH] Issue #3410303 by longwave, Luke.Leber, Wim Leers, quietone, dslatkin: FilterHtml data loss when iframe and/or textarea is allowed --- .../filter/src/Plugin/Filter/FilterHtml.php | 18 +++++++++++++++++- .../tests/src/Kernel/FilterKernelTest.php | 11 +++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/core/modules/filter/src/Plugin/Filter/FilterHtml.php b/core/modules/filter/src/Plugin/Filter/FilterHtml.php index 345ae193dea9..0718b73092e0 100644 --- a/core/modules/filter/src/Plugin/Filter/FilterHtml.php +++ b/core/modules/filter/src/Plugin/Filter/FilterHtml.php @@ -7,6 +7,9 @@ use Drupal\Component\Utility\Html; use Drupal\filter\FilterProcessResult; use Drupal\filter\Plugin\FilterBase; +use Masterminds\HTML5\Parser\DOMTreeBuilder; +use Masterminds\HTML5\Parser\Scanner; +use Masterminds\HTML5\Parser\Tokenizer; /** * Provides a filter to limit allowed HTML tags. @@ -258,7 +261,20 @@ public function getHTMLRestrictions() { $star_protector = '__zqh6vxfbk3cg__'; $html = str_replace('*', $star_protector, $html); - $dom = Html::load($html); + // Use HTML5 parser with a custom tokenizer to correctly parse tags that + // normally use text mode, such as iframe. + $events = new DOMTreeBuilder(FALSE, ['disable_html_ns' => TRUE]); + $scanner = new Scanner('<body>' . $html); + $parser = new class($scanner, $events) extends Tokenizer { + + public function setTextMode($textMode, $untilTag = NULL) { + // Do nothing, we never enter text mode. + } + + }; + $parser->parse(); + + $dom = $events->document(); $xpath = new \DOMXPath($dom); foreach ($xpath->query('//body//*') as $node) { $tag = $node->tagName; diff --git a/core/modules/filter/tests/src/Kernel/FilterKernelTest.php b/core/modules/filter/tests/src/Kernel/FilterKernelTest.php index 660ba730da9f..357437c33446 100644 --- a/core/modules/filter/tests/src/Kernel/FilterKernelTest.php +++ b/core/modules/filter/tests/src/Kernel/FilterKernelTest.php @@ -579,6 +579,17 @@ public function testHtmlFilter() { $this->assertNormalized($f, '<a>link</a>', 'HTML filter removes allowed attributes that have a not explicitly allowed value.'); $f = (string) $filter->process('<a href="/beautiful-animals" kitten="cute" llama="epic majestical">link</a>', Language::LANGCODE_NOT_SPECIFIED); $this->assertSame('<a href="/beautiful-animals" llama="epic majestical">link</a>', $f, 'HTML filter keeps explicitly allowed attributes with an attribute value that is also explicitly allowed.'); + + // Allow iframes and check that the subsequent tags are parsed correctly. + $filter->setConfiguration([ + 'settings' => [ + 'allowed_html' => '<iframe> <a href llama>', + 'filter_html_help' => 1, + 'filter_html_nofollow' => 0, + ], + ]); + $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNormalized($f, '<a llama="awesome">link</a>'); } /** -- GitLab