From c5d96cb883bba5b9e2c29eb3fe0453d6ce67637c Mon Sep 17 00:00:00 2001
From: Alex Pott <alex.a.pott@googlemail.com>
Date: Sun, 20 Sep 2015 18:43:02 +0100
Subject: [PATCH] Issue #2565895 by dawehner, stefan.r, googletorp,
 effulgentsia, lauriii, jcnventura, David_Rothstein, catch, xjm, joelpittet,
 pwolanin, plach, larowlan, cilefen: Add a new :placeholder to
 SafeMarkup::format() for URLs that handles bad protocols

---
 .../Component/Utility/PlaceholderTrait.php    |   8 +
 .../Drupal/Component/Utility/SafeMarkup.php   |  34 +++-
 .../Drupal/Component/Utility/UrlHelper.php    |  22 ++-
 .../Utility/SafeMarkupKernelTest.php          | 153 ++++++++++++++++++
 .../Component/Utility/SafeMarkupTest.php      |  27 ++++
 5 files changed, 238 insertions(+), 6 deletions(-)
 create mode 100644 core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php

diff --git a/core/lib/Drupal/Component/Utility/PlaceholderTrait.php b/core/lib/Drupal/Component/Utility/PlaceholderTrait.php
index f4b1e3876da9..5922b3f8d9c6 100644
--- a/core/lib/Drupal/Component/Utility/PlaceholderTrait.php
+++ b/core/lib/Drupal/Component/Utility/PlaceholderTrait.php
@@ -48,6 +48,14 @@ protected static function placeholderFormat($string, array $args, &$safe = TRUE)
           $args[$key] = '<em class="placeholder">' . $value . '</em>';
           break;
 
+        case ':':
+          // URL attributes must be escaped unconditionally (even if they were
+          // already marked safe) since content that has been filtered for XSS
+          // can still contain characters that are unsafe for use in attributes.
+          // @todo decide what to do about non-URL attribute values (#2570431)
+          $args[$key] = Html::escape(UrlHelper::stripDangerousProtocols($value));
+          break;
+
         case '!':
           // Pass-through.
           if (!SafeMarkup::isSafe($value)) {
diff --git a/core/lib/Drupal/Component/Utility/SafeMarkup.php b/core/lib/Drupal/Component/Utility/SafeMarkup.php
index 00b09f257434..c1c86f56d654 100644
--- a/core/lib/Drupal/Component/Utility/SafeMarkup.php
+++ b/core/lib/Drupal/Component/Utility/SafeMarkup.php
@@ -162,12 +162,19 @@ public static function checkPlain($text) {
   /**
    * Formats a string for HTML display by replacing variable placeholders.
    *
-   * This function replaces variable placeholders in a string with the requested
+   * This method replaces variable placeholders in a string with the requested
    * values and escapes the values so they can be safely displayed as HTML. It
    * should be used on any unknown text that is intended to be printed to an
    * HTML page (especially text that may have come from untrusted users, since
    * in that case it prevents cross-site scripting and other security problems).
    *
+   * This method is not intended for passing arbitrary user input into any
+   * HTML attribute value, as only URL attributes such as "src" and "href" are
+   * supported (using ":variable"). Never use this method on unsafe HTML
+   * attributes such as "on*" and "style" and take care when using this with
+   * unsupported attributes such as "title" or "alt" as this can lead to
+   * unexpected output.
+   *
    * In most cases, you should use t() rather than calling this function
    * directly, since it will translate the text (on non-English-only sites) in
    * addition to formatting it.
@@ -180,13 +187,27 @@ public static function checkPlain($text) {
    *   any key in $args are replaced with the corresponding value, after
    *   optional sanitization and formatting. The type of sanitization and
    *   formatting depends on the first character of the key:
-   *   - @variable: Escaped to HTML using self::escape(). Use this as the
-   *     default choice for anything displayed on a page on the site.
-   *   - %variable: Escaped to HTML wrapped in <em> tags, which makes the
-   *     following HTML code:
+   *   - @variable: Escaped to HTML using Html::escape() unless the value is
+   *     already HTML-safe. Use this as the default choice for anything
+   *     displayed on a page on the site, but not within HTML attributes.
+   *   - %variable: Escaped to HTML just like @variable, but also wrapped in
+   *     <em> tags, which makes the following HTML code:
    *     @code
    *       <em class="placeholder">text output here.</em>
    *     @endcode
+   *     As with @variable, do not use this within HTML attributes.
+   *   - :variable: Escaped to HTML using Html::escape() and filtered for
+   *     dangerous protocols using UrlHelper::stripDangerousProtocols(). Use
+   *     this when passing in a URL, such as when using the "src" or "href"
+   *     attributes, ensuring the value is always wrapped in quotes:
+   *     - Secure: <a href=":variable">@variable</a>
+   *     - Insecure: <a href=:variable>@variable</a>
+   *     When ":variable" comes from arbitrary user input, the result is secure,
+   *     but not guaranteed to be a valid URL (which means the resulting output
+   *     could fail HTML validation). To guarantee a valid URL, use
+   *     Url::fromUri($user_input)->toString() (which either throws an exception
+   *     or returns a well-formed URL) before passing the result into a
+   *     ":variable" placeholder.
    *   - !variable: Inserted as is, with no sanitization or formatting. Only
    *     use this when the resulting string is being generated for one of:
    *     - Non-HTML usage, such as a plain-text email.
@@ -202,6 +223,9 @@ public static function checkPlain($text) {
    * @ingroup sanitization
    *
    * @see t()
+   * @see \Drupal\Component\Utility\Html::escape()
+   * @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols()
+   * @see \Drupal\Core\Url::fromUri()
    */
   public static function format($string, array $args) {
     $safe = TRUE;
diff --git a/core/lib/Drupal/Component/Utility/UrlHelper.php b/core/lib/Drupal/Component/Utility/UrlHelper.php
index 2f26b26b5ea4..1763c23611c3 100644
--- a/core/lib/Drupal/Component/Utility/UrlHelper.php
+++ b/core/lib/Drupal/Component/Utility/UrlHelper.php
@@ -304,7 +304,22 @@ public static function setAllowedProtocols(array $protocols = array()) {
    * \Drupal\Component\Utility\Xss::filter(), but those functions return an
    * HTML-encoded string, so this function can be called independently when the
    * output needs to be a plain-text string for passing to functions that will
-   * call \Drupal\Component\Utility\Html::escape() separately.
+   * call Html::escape() separately. The exact behavior depends on the value:
+   * - If the value is a well-formed (per RFC 3986) relative URL or
+   *   absolute URL that does not use a dangerous protocol (like
+   *   "javascript:"), then the URL remains unchanged. This includes all
+   *   URLs generated via Url::toString() and UrlGeneratorTrait::url().
+   * - If the value is a well-formed absolute URL with a dangerous protocol,
+   *   the protocol is stripped. This process is repeated on the remaining URL
+   *   until it is stripped down to a safe protocol.
+   * - If the value is not a well-formed URL, the same sanitization behavior as
+   *   for well-formed URLs will be invoked, which strips most substrings that
+   *   precede a ":". The result can be used in URL attributes such as "href"
+   *   or "src" (only after calling Html::escape() separately), but this may not
+   *   produce valid HTML (e.g., malformed URLs within "href" attributes fail
+   *   HTML validation). This can be avoided by using
+   *   Url::fromUri($possibly_not_a_url)->toString(), which either throws an
+   *   exception or returns a well-formed URL.
    *
    * @param string $uri
    *   A plain-text URI that might contain dangerous protocols.
@@ -314,6 +329,11 @@ public static function setAllowedProtocols(array $protocols = array()) {
    *   strings, this return value must not be output to an HTML page without
    *   being sanitized first. However, it can be passed to functions
    *   expecting plain-text strings.
+   *
+   * @see \Drupal\Component\Utility\Html::escape()
+   * @see \Drupal\Core\Url::toString()
+   * @see \Drupal\Core\Routing\UrlGeneratorTrait::url()
+   * @see \Drupal\Core\Url::fromUri()
    */
   public static function stripDangerousProtocols($uri) {
     $allowed_protocols = array_flip(static::$allowedProtocols);
diff --git a/core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php b/core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php
new file mode 100644
index 000000000000..273a240faea3
--- /dev/null
+++ b/core/tests/Drupal/KernelTests/Component/Utility/SafeMarkupKernelTest.php
@@ -0,0 +1,153 @@
+<?php
+
+/**
+ * @file
+ * Contains \Drupal\KernelTests\Component\Utility\SafeMarkupKernelTest.
+ */
+
+namespace Drupal\KernelTests\Component\Utility;
+
+use Drupal\Component\FileCache\FileCacheFactory;
+use Drupal\Component\Utility\SafeMarkup;
+use Drupal\Core\Site\Settings;
+use Drupal\Core\Url;
+use Drupal\KernelTests\KernelTestBase;
+
+/**
+ * Provides a test covering integration of SafeMarkup with other systems.
+ *
+ * @group Utility
+*/
+class SafeMarkupKernelTest extends KernelTestBase {
+
+  /**
+   * {@inheritdoc}
+   */
+  public static $modules = ['system'];
+
+  /**
+   * {@inheritdoc}
+   */
+  protected function setUp() {
+    parent::setUp();
+
+    $this->installSchema('system', 'router');
+    $this->container->get('router.builder')->rebuild();
+  }
+
+  /**
+   * Gets arguments for SafeMarkup::format() based on Url::fromUri() parameters.
+   *
+   * @param string $uri
+   *   The URI of the resource.
+   *
+   * @param array $options
+   *   The options to pass to Url::fromUri().
+   *
+   * @return array
+   *   Array containing:
+   *   - ':url': A URL string.
+   */
+  protected static function getSafeMarkupUriArgs($uri, $options = []) {
+    $args[':url'] = Url::fromUri($uri, $options)->toString();
+    return $args;
+  }
+
+  /**
+   * Tests URL ":placeholders" in SafeMarkup::format().
+   *
+   * @dataProvider providerTestSafeMarkupUri
+   */
+  public function testSafeMarkupUri($string, $uri, $options, $expected) {
+    $args = self::getSafeMarkupUriArgs($uri, $options);
+    $this->assertEquals($expected, SafeMarkup::format($string, $args));
+  }
+
+  /**
+   * @return array
+   */
+  public function providerTestSafeMarkupUri() {
+    $data = [];
+    $data['routed-url'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      'route:system.admin',
+      [],
+      'Hey giraffe <a href="/admin">MUUUH</a>',
+    ];
+    $data['routed-with-query'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      'route:system.admin',
+      ['query' => ['bar' => 'baz#']],
+      'Hey giraffe <a href="/admin?bar=baz%23">MUUUH</a>',
+    ];
+    $data['routed-with-fragment'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      'route:system.admin',
+      ['fragment' => 'bar&lt;'],
+      'Hey giraffe <a href="/admin#bar&amp;lt;">MUUUH</a>',
+    ];
+    $data['unrouted-url'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      'base://foo',
+      [],
+      'Hey giraffe <a href="/foo">MUUUH</a>',
+    ];
+    $data['unrouted-with-query'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      'base://foo',
+      ['query' => ['bar' => 'baz#']],
+      'Hey giraffe <a href="/foo?bar=baz%23">MUUUH</a>',
+    ];
+    $data['unrouted-with-fragment'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      'base://foo',
+      ['fragment' => 'bar&lt;'],
+      'Hey giraffe <a href="/foo#bar&amp;lt;">MUUUH</a>',
+    ];
+    $data['mailto-protocol'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      'mailto:test@example.com',
+      [],
+      'Hey giraffe <a href="mailto:test@example.com">MUUUH</a>',
+    ];
+
+    return $data;
+  }
+
+  /**
+   * @dataProvider providerTestSafeMarkupUriWithException
+   * @expectedException \InvalidArgumentException
+   */
+  public function testSafeMarkupUriWithExceptionUri($string, $uri) {
+    // Should throw an \InvalidArgumentException, due to Uri::toString().
+    $args = self::getSafeMarkupUriArgs($uri);
+
+    SafeMarkup::format($string, $args);
+  }
+
+  /**
+   * @return array
+   */
+  public function providerTestSafeMarkupUriWithException() {
+    $data = [];
+    $data['js-protocol'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      "javascript:alert('xss')",
+    ];
+    $data['js-with-fromCharCode'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      "javascript:alert(String.fromCharCode(88,83,83))",
+    ];
+    $data['non-url-with-colon'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      "llamas: they are not URLs",
+    ];
+    $data['non-url-with-html'] = [
+      'Hey giraffe <a href=":url">MUUUH</a>',
+      '<span>not a url</span>',
+    ];
+
+    return $data;
+  }
+
+}
diff --git a/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php b/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php
index fae15bb55c07..12f201256f90 100644
--- a/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php
+++ b/core/tests/Drupal/Tests/Component/Utility/SafeMarkupTest.php
@@ -10,6 +10,7 @@
 use Drupal\Component\Utility\SafeMarkup;
 use Drupal\Component\Utility\SafeStringInterface;
 use Drupal\Component\Utility\SafeStringTrait;
+use Drupal\Component\Utility\UrlHelper;
 use Drupal\Tests\UnitTestCase;
 
 /**
@@ -20,6 +21,16 @@
  */
 class SafeMarkupTest extends UnitTestCase {
 
+  /**
+   * {@inheritdoc}
+   */
+  protected function tearDown() {
+    parent::tearDown();
+
+    UrlHelper::setAllowedProtocols(['http', 'https']);
+  }
+
+
   /**
    * Helper function to add a string to the safe list for testing.
    *
@@ -198,6 +209,8 @@ function providerCheckPlain() {
    *   Whether the result is expected to be safe for HTML display.
    */
   public function testFormat($string, array $args, $expected, $message, $expected_is_safe) {
+    UrlHelper::setAllowedProtocols(['http', 'https', 'mailto']);
+
     $result = SafeMarkup::format($string, $args);
     $this->assertEquals($expected, $result, $message);
     $this->assertEquals($expected_is_safe, SafeMarkup::isSafe($result), 'SafeMarkup::format correctly sets the result as safe or not safe.');
@@ -221,6 +234,20 @@ function providerFormat() {
     $tests[] = array('Verbatim text: !value', array('!value' => '<script>'), 'Verbatim text: <script>', 'SafeMarkup::format replaces verbatim string as-is.', FALSE);
     $tests[] = array('Verbatim text: !value', array('!value' => SafeMarkupTestSafeString::create('<span>Safe HTML</span>')), 'Verbatim text: <span>Safe HTML</span>', 'SafeMarkup::format replaces verbatim string as-is.', TRUE);
 
+    $tests['javascript-protocol-url'] = ['Simple text <a href=":url">giraffe</a>', [':url' => 'javascript://example.com?foo&bar'], 'Simple text <a href="//example.com?foo&amp;bar">giraffe</a>', 'Support for filtering bad protocols', TRUE];
+    $tests['external-url'] = ['Simple text <a href=":url">giraffe</a>', [':url' => 'http://example.com?foo&bar'], 'Simple text <a href="http://example.com?foo&amp;bar">giraffe</a>', 'Support for filtering bad protocols', TRUE];
+    $tests['relative-url'] = ['Simple text <a href=":url">giraffe</a>', [':url' => '/node/1?foo&bar'], 'Simple text <a href="/node/1?foo&amp;bar">giraffe</a>', 'Support for filtering bad protocols', TRUE];
+    $tests['fragment-with-special-chars'] = ['Simple text <a href=":url">giraffe</a>', [':url' => 'http://example.com/#&lt;'], 'Simple text <a href="http://example.com/#&amp;lt;">giraffe</a>', 'Support for filtering bad protocols', TRUE];
+    $tests['mailto-protocol'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => 'mailto:test@example.com'], 'Hey giraffe <a href="mailto:test@example.com">MUUUH</a>', '', TRUE];
+    $tests['js-with-fromCharCode'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => "javascript:alert(String.fromCharCode(88,83,83))"], 'Hey giraffe <a href="alert(String.fromCharCode(88,83,83))">MUUUH</a>', '', TRUE];
+
+    // Test some "URL" values that are not RFC 3986 compliant URLs. The result
+    // of SafeMarkup::format() should still be valid HTML (other than the
+    // value of the "href" attribute not being a valid URL), and not
+    // vulnerable to XSS.
+    $tests['non-url-with-colon'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => "llamas: they are not URLs"], 'Hey giraffe <a href=" they are not URLs">MUUUH</a>', '', TRUE];
+    $tests['non-url-with-html'] = ['Hey giraffe <a href=":url">MUUUH</a>', [':url' => "<span>not a url</span>"], 'Hey giraffe <a href="&lt;span&gt;not a url&lt;/span&gt;">MUUUH</a>', '', TRUE];
+
     return $tests;
   }
 
-- 
GitLab