Commit 2febbfff authored by Alex Pott's avatar Alex Pott
Browse files

Issue #2105841 by mr.baileys, Wim Leers, cs_shadow, sanduhrs, chx, webflo:...

Issue #2105841 by mr.baileys, Wim Leers, cs_shadow, sanduhrs, chx, webflo: Xss::filter() ignores malicious content in data-attributes and mangles image captions
parent e7d2cd56
Loading
Loading
Loading
Loading
+18 −3
Original line number Diff line number Diff line
@@ -192,6 +192,7 @@ protected static function attributes($attributes) {
    $mode = 0;
    $attribute_name = '';
    $skip = FALSE;
    $skip_protocol_filtering = FALSE;

    while (strlen($attributes) != 0) {
      // Was the last operation successful?
@@ -203,6 +204,20 @@ protected static function attributes($attributes) {
          if (preg_match('/^([-a-zA-Z]+)/', $attributes, $match)) {
            $attribute_name = strtolower($match[1]);
            $skip = ($attribute_name == 'style' || substr($attribute_name, 0, 2) == 'on');

            // Values for attributes of type URI should be filtered for
            // potentially malicious protocols (for example, an href-attribute
            // starting with "javascript:"). However, for some non-URI
            // attributes performing this filtering causes valid and safe data
            // to be mangled. We prevent this by skipping protocol filtering on
            // such attributes.
            // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol()
            // @see http://www.w3.org/TR/html4/index/attributes.html
            $skip_protocol_filtering = substr($attribute_name, 0, 5) === 'data-' || in_array($attribute_name, array(
              'title',
              'alt',
            ));

            $working = $mode = 1;
            $attributes = preg_replace('/^[-a-zA-Z]+/', '', $attributes);
          }
@@ -228,7 +243,7 @@ protected static function attributes($attributes) {
        case 2:
          // Attribute value, a URL after href= for instance.
          if (preg_match('/^"([^"]*)"(\s+|$)/', $attributes, $match)) {
            $thisval = UrlHelper::filterBadProtocol($match[1]);
            $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);

            if (!$skip) {
              $attributes_array[] = "$attribute_name=\"$thisval\"";
@@ -240,7 +255,7 @@ protected static function attributes($attributes) {
          }

          if (preg_match("/^'([^']*)'(\s+|$)/", $attributes, $match)) {
            $thisval = UrlHelper::filterBadProtocol($match[1]);
            $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);

            if (!$skip) {
              $attributes_array[] = "$attribute_name='$thisval'";
@@ -251,7 +266,7 @@ protected static function attributes($attributes) {
          }

          if (preg_match("%^([^\s\"']+)(\s+|$)%", $attributes, $match)) {
            $thisval = UrlHelper::filterBadProtocol($match[1]);
            $thisval = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);

            if (!$skip) {
              $attributes_array[] = "$attribute_name=\"$thisval\"";
+35 −1
Original line number Diff line number Diff line
@@ -7,7 +7,9 @@

namespace Drupal\editor\EditorXssFilter;

use Drupal\Component\Utility\Html;
use Drupal\Component\Utility\Xss;
use Drupal\Component\Utility\SafeMarkup;
use Drupal\filter\FilterFormatInterface;
use Drupal\editor\EditorXssFilterInterface;

@@ -85,7 +87,39 @@ public static function filterXss($html, FilterFormatInterface $format, FilterFor
    // Also blacklist tags that are explicitly forbidden in either text format.
    $blacklisted_tags = array_merge($blacklisted_tags, $forbidden_tags);

    return static::filter($html, $blacklisted_tags);
    $output = static::filter($html, $blacklisted_tags);

    // Since data-attributes can contain encoded HTML markup that could be
    // decoded and interpreted by editors, we need to apply XSS filtering to
    // their contents.
    return static::filterXssDataAttributes($output);
  }

  /**
   * Applies a very permissive XSS/HTML filter to data-attributes.
   *
   * @param string $html
   *   The string to apply the data-attributes filtering to.
   *
   * @return string
   *   The filtered string.
   */
  protected static function filterXssDataAttributes($html) {
    if (stristr($html, 'data-') !== FALSE) {
      $dom = Html::load($html);
      $xpath = new \DOMXPath($dom);
      foreach ($xpath->query('//@*[starts-with(name(.), "data-")]') as $node) {
        // The data-attributes contain an HTML-encoded value, so we need to
        // decode the value, apply XSS filtering and then re-save as encoded
        // value. There is no need to explicitly decode $node->value, since the
        // DOMAttr::value getter returns the decoded value.
        $value = Xss::filterAdmin($node->value);
        $node->value = SafeMarkup::checkPlain($value);
      }
      $html = Html::serialize($dom);
    }

    return $html;
  }


+11 −0
Original line number Diff line number Diff line
@@ -512,6 +512,17 @@ public function providerTestFilterXss() {
    // sites, it only forbids linking to any protocols other than those that are
    // whitelisted.

    // Test XSS filtering on data-attributes.
    // @see \Drupal\editor\EditorXssFilter::filterXssDataAttributes()

    // The following two test cases verify that XSS attack vectors are filtered.
    $data[] = array('<img src="butterfly.jpg" data-caption="&lt;script&gt;alert();&lt;/script&gt;" />', '<img src="butterfly.jpg" data-caption="alert();" />');
    $data[] = array('<img src="butterfly.jpg" data-caption="&lt;EMBED SRC=&quot;http://ha.ckers.org/xss.swf&quot; AllowScriptAccess=&quot;always&quot;&gt;&lt;/EMBED&gt;" />', '<img src="butterfly.jpg" data-caption="" />');

    // When including HTML-tags as visible content, they are double-escaped.
    // This test case ensures that we leave that content unchanged.
    $data[] = array('<img src="butterfly.jpg" data-caption="&amp;lt;script&amp;gt;alert();&amp;lt;/script&amp;gt;" />', '<img src="butterfly.jpg" data-caption="&amp;lt;script&amp;gt;alert();&amp;lt;/script&amp;gt;" />');

    return $data;
  }

+70 −0
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@

use Drupal\Component\Utility\Html;
use Drupal\Component\Utility\SafeMarkup;
use Drupal\editor\EditorXssFilter\Standard;
use Drupal\filter\Entity\FilterFormat;
use Drupal\filter\FilterPluginCollection;
use Drupal\simpletest\KernelTestBase;

@@ -176,6 +178,74 @@ function testCaptionFilter() {
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
    $this->assertIdentical($attached_library, $output->getAssets());

    // So far we've tested that the caption filter works correctly. But we also
    // want to make sure that it works well in tandem with the "Limit allowed
    // HTML tags" filter, which it is typically used with.
    $html_filter = $this->filters['filter_html'];
    $html_filter->setConfiguration(array(
      'settings' => array(
        'allowed_html' => '<img>',
        'filter_html_help' => 1,
        'filter_html_nofollow' => 0,
      )
    ));
    $test_with_html_filter = function ($input) use ($filter, $html_filter) {
      // 1. Apply HTML filter's processing step.
      $output = $html_filter->process($input, 'und');
      // 2. Apply caption filter's processing step.
      $output = $filter->process($output, 'und');
      return $output->getProcessedText();
    };
    // Editor XSS filter.
    $test_editor_xss_filter = function ($input) {
      $dummy_filter_format = FilterFormat::create();
      return Standard::filterXss($input, $dummy_filter_format);
    };

    // All the tricky cases encountered at https://drupal.org/node/2105841.
    // A plain URL preceded by text.
    $input = '<img data-caption="See http://drupal.org" src="llama.jpg" />';
    $expected = '<figure><img src="llama.jpg" /><figcaption>See http://drupal.org</figcaption></figure>';
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // An anchor.
    $input = '<img data-caption="This is a &lt;a href=&quot;http://drupal.org&quot;&gt;quick&lt;/a&gt; test…" src="llama.jpg" />';
    $expected = '<figure><img src="llama.jpg" /><figcaption>This is a <a href="http://drupal.org">quick</a> test…</figcaption></figure>';
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A plain URL surrounded by parentheses.
    $input = '<img data-caption="(http://drupal.org)" src="llama.jpg" />';
    $expected = '<figure><img src="llama.jpg" /><figcaption>(http://drupal.org)</figcaption></figure>';
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A source being credited.
    $input = '<img data-caption="Source: Wikipedia" src="llama.jpg" />';
    $expected = '<figure><img src="llama.jpg" /><figcaption>Source: Wikipedia</figcaption></figure>';
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A source being credited, without a space after the colon.
    $input = '<img data-caption="Source:Wikipedia" src="llama.jpg" />';
    $expected = '<figure><img src="llama.jpg" /><figcaption>Source:Wikipedia</figcaption></figure>';
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A pretty crazy edge case where we have two colons.
    $input = '<img data-caption="Interesting (Scope resolution operator ::)" src="llama.jpg" />';
    $expected = '<figure><img src="llama.jpg" /><figcaption>Interesting (Scope resolution operator ::)</figcaption></figure>';
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // An evil anchor (to ensure XSS filtering is applied to the caption also).
    $input = '<img data-caption="This is an &lt;a href=&quot;javascript:alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg" />';
    $expected = '<figure><img src="llama.jpg" /><figcaption>This is an <a href="alert();">evil</a> test…</figcaption></figure>';
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $expected_xss_filtered = '<img data-caption="This is an &lt;a href=&quot;alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg" />';
    $this->assertIdentical($expected_xss_filtered, $test_editor_xss_filter($input));
  }

  /**
+31 −0
Original line number Diff line number Diff line
@@ -488,6 +488,37 @@ public function testQuestionSign() {
    $this->assertTrue(stripos($value, '<?xml') === FALSE, 'HTML tag stripping evasion -- starting with a question sign (processing instructions).');
  }

  /**
   * Check that strings in HTML attributes are are correctly processed.
   *
   * @covers ::attributes
   * @dataProvider providerTestAttributes
   */
  public function testAttribute($value, $expected, $message, $allowed_tags = NULL) {
    $value = Xss::filter($value, $allowed_tags);
    $this->assertEquals($expected, $value, $message);
  }

  /**
   * Data provider for testFilterXssAdminNotNormalized().
   */
  public function providerTestAttributes() {
    return array(
      array(
        '<img src="http://example.com/foo.jpg" title="Example: title" alt="Example: alt">',
        '<img src="http://example.com/foo.jpg" title="Example: title" alt="Example: alt">',
        'Image tag with alt and title attribute',
        array('img')
      ),
      array(
        '<img src="http://example.com/foo.jpg" data-caption="Drupal 8: The best release ever.">',
        '<img src="http://example.com/foo.jpg" data-caption="Drupal 8: The best release ever.">',
        'Image tag with data attribute',
        array('img')
      ),
    );
  }

  /**
   * Checks that \Drupal\Component\Utility\Xss::filterAdmin() correctly strips unallowed tags.
   */