diff --git a/core/lib/Drupal/Component/Utility/Xss.php b/core/lib/Drupal/Component/Utility/Xss.php index 7a06e124e6c4a07863e66ea0d47f16e4e893ac94..9081fae3fe216073435c2c250d005b1c7d3ae57e 100644 --- a/core/lib/Drupal/Component/Utility/Xss.php +++ b/core/lib/Drupal/Component/Utility/Xss.php @@ -192,6 +192,7 @@ protected static function attributes($attributes) { $mode = 0; $attribute_name = ''; $skip = FALSE; + $skip_protocol_filtering = FALSE; while (strlen($attributes) != 0) { // Was the last operation successful? @@ -203,6 +204,20 @@ protected static function attributes($attributes) { if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) { $attribute_name = strtolower($match[1]); $skip = ($attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on'); + + // Values for attributes of type URI should be filtered for + // potentially malicious protocols (for example, an href-attribute + // starting with "javascript:"). However, for some non-URI + // attributes performing this filtering causes valid and safe data + // to be mangled. We prevent this by skipping protocol filtering on + // such attributes. + // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol() + // @see http://www.w3.org/TR/html4/index/attributes.html + $skip_protocol_filtering = substr($attribute_name, 0, 5) === 'data-' || in_array($attribute_name, array( + 'title', + 'alt', + )); + $working = $mode = 1; $attributes = preg_replace('/^[-a-zA-Z]+/', '', $attributes); } @@ -228,7 +243,7 @@ protected static function attributes($attributes) { case 2: // Attribute value, a URL after href= for instance. if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) { - $thisval = UrlHelper::filterBadProtocol($match[1]); + $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "$attribute_name=\"$thisval\""; @@ -240,7 +255,7 @@ protected static function attributes($attributes) { } if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) { - $thisval = UrlHelper::filterBadProtocol($match[1]); + $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "$attribute_name='$thisval'"; @@ -251,7 +266,7 @@ protected static function attributes($attributes) { } if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) { - $thisval = UrlHelper::filterBadProtocol($match[1]); + $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]); if (!$skip) { $attributes_array[] = "$attribute_name=\"$thisval\""; diff --git a/core/modules/editor/src/EditorXssFilter/Standard.php b/core/modules/editor/src/EditorXssFilter/Standard.php index b34e3496bafac75178437abbb0293c417e40b133..43ef797684acc9fa592ff7059f56d5f7e83209bc 100644 --- a/core/modules/editor/src/EditorXssFilter/Standard.php +++ b/core/modules/editor/src/EditorXssFilter/Standard.php @@ -7,7 +7,9 @@ namespace Drupal\editor\EditorXssFilter; +use Drupal\Component\Utility\Html; use Drupal\Component\Utility\Xss; +use Drupal\Component\Utility\SafeMarkup; use Drupal\filter\FilterFormatInterface; use Drupal\editor\EditorXssFilterInterface; @@ -85,7 +87,39 @@ public static function filterXss($html, FilterFormatInterface $format, FilterFor // Also blacklist tags that are explicitly forbidden in either text format. $blacklisted_tags = array_merge($blacklisted_tags, $forbidden_tags); - return static::filter($html, $blacklisted_tags); + $output = static::filter($html, $blacklisted_tags); + + // Since data-attributes can contain encoded HTML markup that could be + // decoded and interpreted by editors, we need to apply XSS filtering to + // their contents. + return static::filterXssDataAttributes($output); + } + + /** + * Applies a very permissive XSS/HTML filter to data-attributes. + * + * @param string $html + * The string to apply the data-attributes filtering to. + * + * @return string + * The filtered string. + */ + protected static function filterXssDataAttributes($html) { + if (stristr($html, 'data-') !== FALSE) { + $dom = Html::load($html); + $xpath = new \DOMXPath($dom); + foreach ($xpath->query('//@*[starts-with(name(.), "data-")]') as $node) { + // The data-attributes contain an HTML-encoded value, so we need to + // decode the value, apply XSS filtering and then re-save as encoded + // value. There is no need to explicitly decode $node->value, since the + // DOMAttr::value getter returns the decoded value. + $value = Xss::filterAdmin($node->value); + $node->value = SafeMarkup::checkPlain($value); + } + $html = Html::serialize($dom); + } + + return $html; } diff --git a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php index ee25b00636e350474305a1eeeeb9fd3dbcff8679..13c831129d81689efc7cac589946b525a022dd54 100644 --- a/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php +++ b/core/modules/editor/tests/src/Unit/EditorXssFilter/StandardTest.php @@ -512,6 +512,17 @@ public function providerTestFilterXss() { // sites, it only forbids linking to any protocols other than those that are // whitelisted. + // Test XSS filtering on data-attributes. + // @see \Drupal\editor\EditorXssFilter::filterXssDataAttributes() + + // The following two test cases verify that XSS attack vectors are filtered. + $data[] = array('<img src="butterfly.jpg" data-caption="<script>alert();</script>" />', '<img src="butterfly.jpg" data-caption="alert();" />'); + $data[] = array('<img src="butterfly.jpg" data-caption="<EMBED SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></EMBED>" />', '<img src="butterfly.jpg" data-caption="" />'); + + // When including HTML-tags as visible content, they are double-escaped. + // This test case ensures that we leave that content unchanged. + $data[] = array('<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />', '<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />'); + return $data; } diff --git a/core/modules/filter/src/Tests/FilterUnitTest.php b/core/modules/filter/src/Tests/FilterUnitTest.php index 87eb773e3bee33cc53c66b8400318f6bf6cc9dbf..d2b515b95789316ed63ba834c361226e889324b0 100644 --- a/core/modules/filter/src/Tests/FilterUnitTest.php +++ b/core/modules/filter/src/Tests/FilterUnitTest.php @@ -9,6 +9,8 @@ use Drupal\Component\Utility\Html; use Drupal\Component\Utility\SafeMarkup; +use Drupal\editor\EditorXssFilter\Standard; +use Drupal\filter\Entity\FilterFormat; use Drupal\filter\FilterPluginCollection; use Drupal\simpletest\KernelTestBase; @@ -176,6 +178,74 @@ function testCaptionFilter() { $output = $test($input); $this->assertIdentical($expected, $output->getProcessedText()); $this->assertIdentical($attached_library, $output->getAssets()); + + // So far we've tested that the caption filter works correctly. But we also + // want to make sure that it works well in tandem with the "Limit allowed + // HTML tags" filter, which it is typically used with. + $html_filter = $this->filters['filter_html']; + $html_filter->setConfiguration(array( + 'settings' => array( + 'allowed_html' => '<img>', + 'filter_html_help' => 1, + 'filter_html_nofollow' => 0, + ) + )); + $test_with_html_filter = function ($input) use ($filter, $html_filter) { + // 1. Apply HTML filter's processing step. + $output = $html_filter->process($input, 'und'); + // 2. Apply caption filter's processing step. + $output = $filter->process($output, 'und'); + return $output->getProcessedText(); + }; + // Editor XSS filter. + $test_editor_xss_filter = function ($input) { + $dummy_filter_format = FilterFormat::create(); + return Standard::filterXss($input, $dummy_filter_format); + }; + + // All the tricky cases encountered at https://drupal.org/node/2105841. + // A plain URL preceded by text. + $input = '<img data-caption="See http://drupal.org" src="llama.jpg" />'; + $expected = '<figure><img src="llama.jpg" /><figcaption>See http://drupal.org</figcaption></figure>'; + $this->assertIdentical($expected, $test_with_html_filter($input)); + $this->assertIdentical($input, $test_editor_xss_filter($input)); + + // An anchor. + $input = '<img data-caption="This is a <a href="http://drupal.org">quick</a> test…" src="llama.jpg" />'; + $expected = '<figure><img src="llama.jpg" /><figcaption>This is a <a href="http://drupal.org">quick</a> test…</figcaption></figure>'; + $this->assertIdentical($expected, $test_with_html_filter($input)); + $this->assertIdentical($input, $test_editor_xss_filter($input)); + + // A plain URL surrounded by parentheses. + $input = '<img data-caption="(http://drupal.org)" src="llama.jpg" />'; + $expected = '<figure><img src="llama.jpg" /><figcaption>(http://drupal.org)</figcaption></figure>'; + $this->assertIdentical($expected, $test_with_html_filter($input)); + $this->assertIdentical($input, $test_editor_xss_filter($input)); + + // A source being credited. + $input = '<img data-caption="Source: Wikipedia" src="llama.jpg" />'; + $expected = '<figure><img src="llama.jpg" /><figcaption>Source: Wikipedia</figcaption></figure>'; + $this->assertIdentical($expected, $test_with_html_filter($input)); + $this->assertIdentical($input, $test_editor_xss_filter($input)); + + // A source being credited, without a space after the colon. + $input = '<img data-caption="Source:Wikipedia" src="llama.jpg" />'; + $expected = '<figure><img src="llama.jpg" /><figcaption>Source:Wikipedia</figcaption></figure>'; + $this->assertIdentical($expected, $test_with_html_filter($input)); + $this->assertIdentical($input, $test_editor_xss_filter($input)); + + // A pretty crazy edge case where we have two colons. + $input = '<img data-caption="Interesting (Scope resolution operator ::)" src="llama.jpg" />'; + $expected = '<figure><img src="llama.jpg" /><figcaption>Interesting (Scope resolution operator ::)</figcaption></figure>'; + $this->assertIdentical($expected, $test_with_html_filter($input)); + $this->assertIdentical($input, $test_editor_xss_filter($input)); + + // An evil anchor (to ensure XSS filtering is applied to the caption also). + $input = '<img data-caption="This is an <a href="javascript:alert();">evil</a> test…" src="llama.jpg" />'; + $expected = '<figure><img src="llama.jpg" /><figcaption>This is an <a href="alert();">evil</a> test…</figcaption></figure>'; + $this->assertIdentical($expected, $test_with_html_filter($input)); + $expected_xss_filtered = '<img data-caption="This is an <a href="alert();">evil</a> test…" src="llama.jpg" />'; + $this->assertIdentical($expected_xss_filtered, $test_editor_xss_filter($input)); } /** diff --git a/core/tests/Drupal/Tests/Component/Utility/XssTest.php b/core/tests/Drupal/Tests/Component/Utility/XssTest.php index 1ffdfe0de6ea873cf95d7466000c2d4a7d7bd2b8..e704538bf0b471396a9dad1e0bbb401553a46e79 100644 --- a/core/tests/Drupal/Tests/Component/Utility/XssTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/XssTest.php @@ -488,6 +488,37 @@ public function testQuestionSign() { $this->assertTrue(stripos($value, '<?xml') === FALSE, 'HTML tag stripping evasion -- starting with a question sign (processing instructions).'); } + /** + * Check that strings in HTML attributes are are correctly processed. + * + * @covers ::attributes + * @dataProvider providerTestAttributes + */ + public function testAttribute($value, $expected, $message, $allowed_tags = NULL) { + $value = Xss::filter($value, $allowed_tags); + $this->assertEquals($expected, $value, $message); + } + + /** + * Data provider for testFilterXssAdminNotNormalized(). + */ + public function providerTestAttributes() { + return array( + array( + '<img src="http://example.com/foo.jpg" title="Example: title" alt="Example: alt">', + '<img src="http://example.com/foo.jpg" title="Example: title" alt="Example: alt">', + 'Image tag with alt and title attribute', + array('img') + ), + array( + '<img src="http://example.com/foo.jpg" data-caption="Drupal 8: The best release ever.">', + '<img src="http://example.com/foo.jpg" data-caption="Drupal 8: The best release ever.">', + 'Image tag with data attribute', + array('img') + ), + ); + } + /** * Checks that \Drupal\Component\Utility\Xss::filterAdmin() correctly strips unallowed tags. */