From c5d96cb883bba5b9e2c29eb3fe0453d6ce67637c Mon Sep 17 00:00:00 2001 From: Alex Pott <alex.a.pott@googlemail.com> Date: Sun, 20 Sep 2015 18:43:02 +0100 Subject: [PATCH] Issue #2565895 by dawehner, stefan.r, googletorp, effulgentsia, lauriii, jcnventura, David_Rothstein, catch, xjm, joelpittet, pwolanin, plach, larowlan, cilefen: Add a new :placeholder to SafeMarkup::format() for URLs that handles bad protocols --- .../Component/Utility/PlaceholderTrait.php | 8 + .../Drupal/Component/Utility/SafeMarkup.php | 34 +++- .../Drupal/Component/Utility/UrlHelper.php | 22 ++- .../Utility/SafeMarkupKernelTest.php | 153 ++++++++++++++++++ .../Component/Utility/SafeMarkupTest.php | 27 ++++ 5 files changed, 238 insertions(+), 6 deletions(-) create mode 100644 core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php diff --git a/core/lib/Drupal/Component/Utility/PlaceholderTrait.php b/core/lib/Drupal/Component/Utility/PlaceholderTrait.php index f4b1e3876da9..5922b3f8d9c6 100644 --- a/core/lib/Drupal/Component/Utility/PlaceholderTrait.php +++ b/core/lib/Drupal/Component/Utility/PlaceholderTrait.php @@ -48,6 +48,14 @@ protected static function placeholderFormat($string, array $args, &$safe = TRUE) $args[$key] = '<em class="placeholder">' . $value . '</em>'; break; + case ':': + // URL attributes must be escaped unconditionally (even if they were + // already marked safe) since content that has been filtered for XSS + // can still contain characters that are unsafe for use in attributes. + // @todo decide what to do about non-URL attribute values (#2570431) + $args[$key] = Html::escape(UrlHelper::stripDangerousProtocols($value)); + break; + case '!': // Pass-through. if (!SafeMarkup::isSafe($value)) { diff --git a/core/lib/Drupal/Component/Utility/SafeMarkup.php b/core/lib/Drupal/Component/Utility/SafeMarkup.php index 00b09f257434..c1c86f56d654 100644 --- a/core/lib/Drupal/Component/Utility/SafeMarkup.php +++ b/core/lib/Drupal/Component/Utility/SafeMarkup.php @@ -162,12 +162,19 @@ public static function checkPlain($text) { /** * Formats a string for HTML display by replacing variable placeholders. * - * This function replaces variable placeholders in a string with the requested + * This method replaces variable placeholders in a string with the requested * values and escapes the values so they can be safely displayed as HTML. It * should be used on any unknown text that is intended to be printed to an * HTML page (especially text that may have come from untrusted users, since * in that case it prevents cross-site scripting and other security problems). * + * This method is not intended for passing arbitrary user input into any + * HTML attribute value, as only URL attributes such as "src" and "href" are + * supported (using ":variable"). Never use this method on unsafe HTML + * attributes such as "on*" and "style" and take care when using this with + * unsupported attributes such as "title" or "alt" as this can lead to + * unexpected output. + * * In most cases, you should use t() rather than calling this function * directly, since it will translate the text (on non-English-only sites) in * addition to formatting it. @@ -180,13 +187,27 @@ public static function checkPlain($text) { * any key in $args are replaced with the corresponding value, after * optional sanitization and formatting. The type of sanitization and * formatting depends on the first character of the key: - * - @variable: Escaped to HTML using self::escape(). Use this as the - * default choice for anything displayed on a page on the site. - * - %variable: Escaped to HTML wrapped in <em> tags, which makes the - * following HTML code: + * - @variable: Escaped to HTML using Html::escape() unless the value is + * already HTML-safe. Use this as the default choice for anything + * displayed on a page on the site, but not within HTML attributes. + * - %variable: Escaped to HTML just like @variable, but also wrapped in + * <em> tags, which makes the following HTML code: * @code * <em class="placeholder">text output here.</em> * @endcode + * As with @variable, do not use this within HTML attributes. + * - :variable: Escaped to HTML using Html::escape() and filtered for + * dangerous protocols using UrlHelper::stripDangerousProtocols(). Use + * this when passing in a URL, such as when using the "src" or "href" + * attributes, ensuring the value is always wrapped in quotes: + * - Secure: <a href=":variable">@variable</a> + * - Insecure: <a href=:variable>@variable</a> + * When ":variable" comes from arbitrary user input, the result is secure, + * but not guaranteed to be a valid URL (which means the resulting output + * could fail HTML validation). To guarantee a valid URL, use + * Url::fromUri($user_input)->toString() (which either throws an exception + * or returns a well-formed URL) before passing the result into a + * ":variable" placeholder. * - !variable: Inserted as is, with no sanitization or formatting. Only * use this when the resulting string is being generated for one of: * - Non-HTML usage, such as a plain-text email. @@ -202,6 +223,9 @@ public static function checkPlain($text) { * @ingroup sanitization * * @see t() + * @see \Drupal\Component\Utility\Html::escape() + * @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() + * @see \Drupal\Core\Url::fromUri() */ public static function format($string, array $args) { $safe = TRUE; diff --git a/core/lib/Drupal/Component/Utility/UrlHelper.php b/core/lib/Drupal/Component/Utility/UrlHelper.php index 2f26b26b5ea4..1763c23611c3 100644 --- a/core/lib/Drupal/Component/Utility/UrlHelper.php +++ b/core/lib/Drupal/Component/Utility/UrlHelper.php @@ -304,7 +304,22 @@ public static function setAllowedProtocols(array $protocols = array()) { * \Drupal\Component\Utility\Xss::filter(), but those functions return an * HTML-encoded string, so this function can be called independently when the * output needs to be a plain-text string for passing to functions that will - * call \Drupal\Component\Utility\Html::escape() separately. + * call Html::escape() separately. The exact behavior depends on the value: + * - If the value is a well-formed (per RFC 3986) relative URL or + * absolute URL that does not use a dangerous protocol (like + * "javascript:"), then the URL remains unchanged. This includes all + * URLs generated via Url::toString() and UrlGeneratorTrait::url(). + * - If the value is a well-formed absolute URL with a dangerous protocol, + * the protocol is stripped. This process is repeated on the remaining URL + * until it is stripped down to a safe protocol. + * - If the value is not a well-formed URL, the same sanitization behavior as + * for well-formed URLs will be invoked, which strips most substrings that + * precede a ":". The result can be used in URL attributes such as "href" + * or "src" (only after calling Html::escape() separately), but this may not + * produce valid HTML (e.g., malformed URLs within "href" attributes fail + * HTML validation). This can be avoided by using + * Url::fromUri($possibly_not_a_url)->toString(), which either throws an + * exception or returns a well-formed URL. * * @param string $uri * A plain-text URI that might contain dangerous protocols. @@ -314,6 +329,11 @@ public static function setAllowedProtocols(array $protocols = array()) { * strings, this return value must not be output to an HTML page without * being sanitized first. However, it can be passed to functions * expecting plain-text strings. + * + * @see \Drupal\Component\Utility\Html::escape() + * @see \Drupal\Core\Url::toString() + * @see \Drupal\Core\Routing\UrlGeneratorTrait::url() + * @see \Drupal\Core\Url::fromUri() */ public static function stripDangerousProtocols($uri) { $allowed_protocols = array_flip(static::$allowedProtocols); diff --git a/core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php b/core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php new file mode 100644 index 000000000000..273a240faea3 --- /dev/null +++ b/core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php @@ -0,0 +1,153 @@ +<?php + +/** + * @file + * Contains \Drupal\KernelTests\Component\Utility\SafeMarkupKernelTest. + */ + +namespace Drupal\KernelTests\Component\Utility; + +use Drupal\Component\FileCache\FileCacheFactory; +use Drupal\Component\Utility\SafeMarkup; +use Drupal\Core\Site\Settings; +use Drupal\Core\Url; +use Drupal\KernelTests\KernelTestBase; + +/** + * Provides a test covering integration of SafeMarkup with other systems. + * + * @group Utility +*/ +class SafeMarkupKernelTest extends KernelTestBase { + + /** + * {@inheritdoc} + */ + public static $modules = ['system']; + + /** + * {@inheritdoc} + */ + protected function setUp() { + parent::setUp(); + + $this->installSchema('system', 'router'); + $this->container->get('router.builder')->rebuild(); + } + + /** + * Gets arguments for SafeMarkup::format() based on Url::fromUri() parameters. + * + * @param string $uri + * The URI of the resource. + * + * @param array $options + * The options to pass to Url::fromUri(). + * + * @return array + * Array containing: + * - ':url': A URL string. + */ + protected static function getSafeMarkupUriArgs($uri, $options = []) { + $args[':url'] = Url::fromUri($uri, $options)->toString(); + return $args; + } + + /** + * Tests URL ":placeholders" in SafeMarkup::format(). + * + * @dataProvider providerTestSafeMarkupUri + */ + public function testSafeMarkupUri($string, $uri, $options, $expected) { + $args = self::getSafeMarkupUriArgs($uri, $options); + $this->assertEquals($expected, SafeMarkup::format($string, $args)); + } + + /** + * @return array + */ + public function providerTestSafeMarkupUri() { + $data = []; + $data['routed-url'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + 'route:system.admin', + [], + 'Hey giraffe <a href="/admin">MUUUH</a>', + ]; + $data['routed-with-query'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + 'route:system.admin', + ['query' => ['bar' => 'baz#']], + 'Hey giraffe <a href="/admin?bar=baz%23">MUUUH</a>', + ]; + $data['routed-with-fragment'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + 'route:system.admin', + ['fragment' => 'bar<'], + 'Hey giraffe <a href="/admin#bar&lt;">MUUUH</a>', + ]; + $data['unrouted-url'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + 'base://foo', + [], + 'Hey giraffe <a href="/foo">MUUUH</a>', + ]; + $data['unrouted-with-query'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + 'base://foo', + ['query' => ['bar' => 'baz#']], + 'Hey giraffe <a href="/foo?bar=baz%23">MUUUH</a>', + ]; + $data['unrouted-with-fragment'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + 'base://foo', + ['fragment' => 'bar<'], + 'Hey giraffe <a href="/foo#bar&lt;">MUUUH</a>', + ]; + $data['mailto-protocol'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + 'mailto:test@example.com', + [], + 'Hey giraffe <a href="mailto:test@example.com">MUUUH</a>', + ]; + + return $data; + } + + /** + * @dataProvider providerTestSafeMarkupUriWithException + * @expectedException \InvalidArgumentException + */ + public function testSafeMarkupUriWithExceptionUri($string, $uri) { + // Should throw an \InvalidArgumentException, due to Uri::toString(). + $args = self::getSafeMarkupUriArgs($uri); + + SafeMarkup::format($string, $args); + } + + /** + * @return array + */ + public function providerTestSafeMarkupUriWithException() { + $data = []; + $data['js-protocol'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + "javascript:alert('xss')", + ]; + $data['js-with-fromCharCode'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + "javascript:alert(String.fromCharCode(88,83,83))", + ]; + $data['non-url-with-colon'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + "llamas: they are not URLs", + ]; + $data['non-url-with-html'] = [ + 'Hey giraffe <a href=":url">MUUUH</a>', + '<span>not a url</span>', + ]; + + return $data; + } + +} diff --git a/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php b/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php index fae15bb55c07..12f201256f90 100644 --- a/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php +++ b/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php @@ -10,6 +10,7 @@ use Drupal\Component\Utility\SafeMarkup; use Drupal\Component\Utility\SafeStringInterface; use Drupal\Component\Utility\SafeStringTrait; +use Drupal\Component\Utility\UrlHelper; use Drupal\Tests\UnitTestCase; /** @@ -20,6 +21,16 @@ */ class SafeMarkupTest extends UnitTestCase { + /** + * {@inheritdoc} + */ + protected function tearDown() { + parent::tearDown(); + + UrlHelper::setAllowedProtocols(['http', 'https']); + } + + /** * Helper function to add a string to the safe list for testing. * @@ -198,6 +209,8 @@ function providerCheckPlain() { * Whether the result is expected to be safe for HTML display. */ public function testFormat($string, array $args, $expected, $message, $expected_is_safe) { + UrlHelper::setAllowedProtocols(['http', 'https', 'mailto']); + $result = SafeMarkup::format($string, $args); $this->assertEquals($expected, $result, $message); $this->assertEquals($expected_is_safe, SafeMarkup::isSafe($result), 'SafeMarkup::format correctly sets the result as safe or not safe.'); @@ -221,6 +234,20 @@ function providerFormat() { $tests[] = array('Verbatim text: !value', array('!value' => '<script>'), 'Verbatim text: <script>', 'SafeMarkup::format replaces verbatim string as-is.', FALSE); $tests[] = array('Verbatim text: !value', array('!value' => SafeMarkupTestSafeString::create('<span>Safe HTML</span>')), 'Verbatim text: <span>Safe HTML</span>', 'SafeMarkup::format replaces verbatim string as-is.', TRUE); + $tests['javascript-protocol-url'] = ['Simple text <a href=":url">giraffe</a>', [':url' => 'javascript://example.com?foo&bar'], 'Simple text <a href="//example.com?foo&bar">giraffe</a>', 'Support for filtering bad protocols', TRUE]; + $tests['external-url'] = ['Simple text <a href=":url">giraffe</a>', [':url' => 'http://example.com?foo&bar'], 'Simple text <a href="http://example.com?foo&bar">giraffe</a>', 'Support for filtering bad protocols', TRUE]; + $tests['relative-url'] = ['Simple text <a href=":url">giraffe</a>', [':url' => '/node/1?foo&bar'], 'Simple text <a href="/node/1?foo&bar">giraffe</a>', 'Support for filtering bad protocols', TRUE]; + $tests['fragment-with-special-chars'] = ['Simple text <a href=":url">giraffe</a>', [':url' => 'http://example.com/#<'], 'Simple text <a href="http://example.com/#&lt;">giraffe</a>', 'Support for filtering bad protocols', TRUE]; + $tests['mailto-protocol'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => 'mailto:test@example.com'], 'Hey giraffe <a href="mailto:test@example.com">MUUUH</a>', '', TRUE]; + $tests['js-with-fromCharCode'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => "javascript:alert(String.fromCharCode(88,83,83))"], 'Hey giraffe <a href="alert(String.fromCharCode(88,83,83))">MUUUH</a>', '', TRUE]; + + // Test some "URL" values that are not RFC 3986 compliant URLs. The result + // of SafeMarkup::format() should still be valid HTML (other than the + // value of the "href" attribute not being a valid URL), and not + // vulnerable to XSS. + $tests['non-url-with-colon'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => "llamas: they are not URLs"], 'Hey giraffe <a href=" they are not URLs">MUUUH</a>', '', TRUE]; + $tests['non-url-with-html'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => "<span>not a url</span>"], 'Hey giraffe <a href="<span>not a url</span>">MUUUH</a>', '', TRUE]; + return $tests; } -- GitLab