From f09046af0199a78f05e5aad068b60ac92cbccd74 Mon Sep 17 00:00:00 2001
From: Nathaniel Catchpole <catch@35733.no-reply.drupal.org>
Date: Wed, 30 Sep 2015 11:46:49 +0100
Subject: [PATCH] Issue #2549077 by pwolanin, Wim Leers, phenaproxima, lauriii:
 Allow the "Limit allowed HTML tags" filter to also restrict HTML attributes,
 and only allow a small whitelist of attributes by default

---
 core/lib/Drupal/Component/Utility/Html.php    |   6 +-
 .../ckeditor/js/views/ControllerView.js       |   7 +-
 .../src/Plugin/CKEditorPlugin/Internal.php    |  10 +
 .../ckeditor/src/Tests/CKEditorTest.php       |  40 +--
 core/modules/editor/js/editor.admin.js        |  33 ++-
 .../filter/filter.filter_html.admin.js        | 192 ++++++++++---
 core/modules/filter/filter.module             |  20 --
 .../migration_templates/d6_filter_format.yml  |   4 +-
 .../migration_templates/d7_filter_format.yml  |   4 +-
 .../filter/src/Plugin/Filter/FilterHtml.php   | 260 +++++++++++++++++-
 .../Plugin/migrate/process/FilterSettings.php |  55 ++++
 .../filter/src/Tests/FilterAPITest.php        |  21 +-
 .../src/Tests/FilterHtmlImageSecureTest.php   |   2 +-
 .../filter/src/Tests/FilterUnitTest.php       |  96 +++++--
 .../Migrate/d6/MigrateFilterFormatTest.php    |   2 +-
 .../Migrate/d7/MigrateFilterFormatTest.php    |   2 +-
 .../install/filter.format.filtered_html.yml   |   2 +-
 .../filter/tests/src/Unit/FilterHtmlTest.php  |  87 ++++++
 .../src/Tests/Table/d7/Filter.php             |   5 +-
 .../src/Tests/Update/FilterHtmlUpdateTest.php |  56 ++++
 core/modules/system/system.install            |  39 +++
 .../install/filter.format.basic_html.yml      |   8 +-
 .../install/filter.format.restricted_html.yml |   8 +-
 23 files changed, 811 insertions(+), 148 deletions(-)
 create mode 100644 core/modules/filter/src/Plugin/migrate/process/FilterSettings.php
 create mode 100644 core/modules/filter/tests/src/Unit/FilterHtmlTest.php
 create mode 100644 core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php

diff --git a/core/lib/Drupal/Component/Utility/Html.php b/core/lib/Drupal/Component/Utility/Html.php
index 69adbebe7960..0c811f28522f 100644
--- a/core/lib/Drupal/Component/Utility/Html.php
+++ b/core/lib/Drupal/Component/Utility/Html.php
@@ -262,9 +262,9 @@ public static function load($html) {
 <body>!html</body>
 </html>
 EOD;
-    // PHP's \DOMDocument serialization adds straw whitespace in case the markup
-    // of the wrapping document contains newlines, so ensure to remove all
-    // newlines before injecting the actual HTML body to process.
+    // PHP's \DOMDocument serialization adds extra whitespace when the markup
+    // of the wrapping document contains newlines, so ensure we remove all
+    // newlines before injecting the actual HTML body to be processed.
     $document = strtr($document, array("\n" => '', '!html' => $html));
 
     $dom = new \DOMDocument();
diff --git a/core/modules/ckeditor/js/views/ControllerView.js b/core/modules/ckeditor/js/views/ControllerView.js
index f1c4ddfbb7c4..7cfaeeed0cc8 100644
--- a/core/modules/ckeditor/js/views/ControllerView.js
+++ b/core/modules/ckeditor/js/views/ControllerView.js
@@ -221,6 +221,11 @@
       // the feature that was just added or removed. Not every feature has
       // such metadata.
       var featureName = this.model.get('buttonsToFeatures')[button.toLowerCase()];
+      // Features without an associated command do not have a 'feature name' by
+      // default, so we use the lowercased button name instead.
+      if (!featureName) {
+        featureName = button.toLowerCase();
+      }
       var featuresMetadata = this.model.get('featuresMetadata');
       if (!featuresMetadata[featureName]) {
         featuresMetadata[featureName] = new Drupal.EditorFeature(featureName);
@@ -301,7 +306,6 @@
     broadcastConfigurationChanges: function ($ckeditorToolbar) {
       var view = this;
       var hiddenEditorConfig = this.model.get('hiddenEditorConfig');
-      var featuresMetadata = this.model.get('featuresMetadata');
       var getFeatureForButton = this.getFeatureForButton.bind(this);
       var getCKEditorFeatures = this.getCKEditorFeatures.bind(this);
       $ckeditorToolbar
@@ -335,6 +339,7 @@
           getCKEditorFeatures(hiddenEditorConfig, function (features) {
             // Trigger a standardized text editor configuration event for each
             // feature that was modified by the configuration changes.
+            var featuresMetadata = view.model.get('featuresMetadata');
             for (var name in features) {
               if (features.hasOwnProperty(name)) {
                 var feature = features[name];
diff --git a/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php b/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php
index 7b4afcdd50dd..412af45350bb 100644
--- a/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php
+++ b/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php
@@ -516,6 +516,13 @@ protected function generateACFSettings(Editor $editor) {
         }
         // Tell CKEditor the tag is allowed, along with some tags.
         elseif (is_array($attributes)) {
+          // Set defaults (these will be overridden below if more specific
+          // values are present).
+          $allowed[$tag] = array(
+            'attributes' => FALSE,
+            'styles' => FALSE,
+            'classes' => FALSE,
+          );
           // Configure allowed attributes, allowed "style" attribute values and
           // allowed "class" attribute values.
           // CKEditor only allows specific values for the "class" and "style"
@@ -580,6 +587,9 @@ protected function generateACFSettings(Editor $editor) {
         }
       }
 
+      ksort($allowed);
+      ksort($disallowed);
+
       return array($allowed, $disallowed);
     }
   }
diff --git a/core/modules/ckeditor/src/Tests/CKEditorTest.php b/core/modules/ckeditor/src/Tests/CKEditorTest.php
index f84286233c6a..dd9a7bf99f70 100644
--- a/core/modules/ckeditor/src/Tests/CKEditorTest.php
+++ b/core/modules/ckeditor/src/Tests/CKEditorTest.php
@@ -54,7 +54,7 @@ protected function setUp() {
         'filter_html' => array(
           'status' => 1,
           'settings' => array(
-            'allowed_html' => '<h2> <h3> <h4> <h5> <h6> <p> <br> <strong> <a>',
+            'allowed_html' => '<h2 id> <h3> <h4> <h5> <h6> <p> <br> <strong> <a href hreflang>',
           )
         ),
       ),
@@ -96,6 +96,7 @@ function testGetJSSettings() {
     );
     $expected_config = $this->castSafeStrings($expected_config);
     ksort($expected_config);
+    ksort($expected_config['allowedContent']);
     $this->assertIdentical($expected_config, $this->castSafeStrings($this->ckeditor->getJSSettings($editor)), 'Generated JS settings are correct for default configuration.');
 
     // Customize the configuration: add button, have two contextually enabled
@@ -122,12 +123,13 @@ function testGetJSSettings() {
     // Change the allowed HTML tags; the "allowedContent" and "format_tags"
     // settings for CKEditor should automatically be updated as well.
     $format = $editor->getFilterFormat();
-    $format->filters('filter_html')->settings['allowed_html'] .= '<pre> <h3>';
+    $format->filters('filter_html')->settings['allowed_html'] .= '<pre> <h1>';
     $format->save();
 
-    $expected_config['allowedContent']['pre'] = array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE);
-    $expected_config['allowedContent']['h3'] = array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE);
-    $expected_config['format_tags'] = 'p;h2;h3;h4;h5;h6;pre';
+    $expected_config['allowedContent']['pre'] = array('attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE);
+    $expected_config['allowedContent']['h1'] = array('attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE);
+    $expected_config['format_tags'] = 'p;h1;h2;h3;h4;h5;h6;pre';
+    ksort($expected_config['allowedContent']);
     $this->assertIdentical($expected_config, $this->castSafeStrings($this->ckeditor->getJSSettings($editor)), 'Generated JS settings are correct for customized configuration.');
 
     // Disable the filter_html filter: allow *all *tags.
@@ -179,14 +181,17 @@ function testGetJSSettings() {
       ),
       'a' => array(
         'attributes' => 'href,rel,class,target',
+        'styles' => FALSE,
         'classes' => 'external',
       ),
       'span' => array(
         'attributes' => 'class,property,rel,style',
         'styles' => 'font-size',
+        'classes' => FALSE,
       ),
       '*' => array(
         'attributes' => 'class,data-*',
+        'styles' => FALSE,
         'classes' => 'is-a-hipster-llama,and-more',
       ),
       'del' => array(
@@ -206,6 +211,8 @@ function testGetJSSettings() {
     );
     $expected_config['format_tags'] = 'p';
     ksort($expected_config);
+    ksort($expected_config['allowedContent']);
+    ksort($expected_config['disallowedContent']);
     $this->assertIdentical($expected_config, $this->castSafeStrings($this->ckeditor->getJSSettings($editor)), 'Generated JS settings are correct for customized configuration.');
   }
 
@@ -421,17 +428,18 @@ protected function getDefaultInternalConfig() {
   }
 
   protected function getDefaultAllowedContentConfig() {
-    return array(
-      'h2' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'h3' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'h4' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'h5' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'h6' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'p' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'br' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'strong' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-      'a' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE),
-    );
+    return [
+      'h2' => ['attributes' => 'id', 'styles' => FALSE, 'classes' => FALSE],
+      'h3' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE],
+      'h4' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE],
+      'h5' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE],
+      'h6' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE],
+      'p' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE],
+      'br' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE],
+      'strong' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE],
+      'a' => ['attributes' => 'href,hreflang', 'styles' => FALSE, 'classes' => FALSE],
+      '*' => ['attributes' => 'lang,dir', 'styles' => FALSE, 'classes' => FALSE],
+    ];
   }
 
   protected function getDefaultDisallowedContentConfig() {
diff --git a/core/modules/editor/js/editor.admin.js b/core/modules/editor/js/editor.admin.js
index 14599d06ce1e..bc7ebccee770 100644
--- a/core/modules/editor/js/editor.admin.js
+++ b/core/modules/editor/js/editor.admin.js
@@ -821,17 +821,32 @@
    * @see Drupal.FilterStatus
    */
   Drupal.FilterHTMLRule = function () {
-    return {
-      // Allow or forbid tags.
+    // Allow or forbid tags.
+    this.tags = [];
+    this.allow = null;
+
+    // Apply restrictions to properties set on tags.
+    this.restrictedTags = {
       tags: [],
-      allow: null,
-      // Apply restrictions to properties set on tags.
-      restrictedTags: {
-        tags: [],
-        allowed: {attributes: [], styles: [], classes: []},
-        forbidden: {attributes: [], styles: [], classes: []}
-      }
+      allowed: {attributes: [], styles: [], classes: []},
+      forbidden: {attributes: [], styles: [], classes: []}
     };
+
+    return this;
+  };
+
+  Drupal.FilterHTMLRule.prototype.clone = function () {
+    var clone = new Drupal.FilterHTMLRule();
+    clone.tags = this.tags.slice(0);
+    clone.allow = this.allow;
+    clone.restrictedTags.tags = this.restrictedTags.tags.slice(0);
+    clone.restrictedTags.allowed.attributes = this.restrictedTags.allowed.attributes.slice(0);
+    clone.restrictedTags.allowed.styles = this.restrictedTags.allowed.styles.slice(0);
+    clone.restrictedTags.allowed.classes = this.restrictedTags.allowed.classes.slice(0);
+    clone.restrictedTags.forbidden.attributes = this.restrictedTags.forbidden.attributes.slice(0);
+    clone.restrictedTags.forbidden.styles = this.restrictedTags.forbidden.styles.slice(0);
+    clone.restrictedTags.forbidden.classes = this.restrictedTags.forbidden.classes.slice(0);
+    return clone;
   };
 
   /**
diff --git a/core/modules/filter/filter.filter_html.admin.js b/core/modules/filter/filter.filter_html.admin.js
index a52edd18c8f0..5de2d11d5896 100644
--- a/core/modules/filter/filter.filter_html.admin.js
+++ b/core/modules/filter/filter.filter_html.admin.js
@@ -23,23 +23,15 @@
        */
       getRules: function () {
         var currentValue = $('#edit-filters-filter-html-settings-allowed-html').val();
-        var rules = [];
-        var rule;
+        var rules = Drupal.behaviors.filterFilterHtmlUpdating._parseSetting(currentValue);
 
         // Build a FilterHTMLRule that reflects the hard-coded behavior that
         // strips all "style" attribute and all "on*" attributes.
-        rule = new Drupal.FilterHTMLRule();
+        var rule = new Drupal.FilterHTMLRule();
         rule.restrictedTags.tags = ['*'];
         rule.restrictedTags.forbidden.attributes = ['style', 'on*'];
         rules.push(rule);
 
-        // Build a FilterHTMLRule that reflects the current settings.
-        rule = new Drupal.FilterHTMLRule();
-        var behavior = Drupal.behaviors.filterFilterHtmlUpdating;
-        rule.allow = true;
-        rule.tags = behavior._parseSetting(currentValue);
-        rules.push(rule);
-
         return rules;
       }
     };
@@ -63,8 +55,12 @@
     // The description for the "Allowed HTML tags" field.
     $allowedHTMLDescription: null,
 
-    // The user-entered tag list of $allowedHTMLFormItem.
-    userTags: null,
+    /**
+     * The parsed, user-entered tag list of $allowedHTMLFormItem
+     *
+     * @var {Object.<string, Drupal.FilterHTMLRule>}
+     */
+    userTags: {},
 
     // The auto-created tag list thus far added.
     autoTags: null,
@@ -116,9 +112,10 @@
       this.$allowedHTMLDescription.find('.editor-update-message').remove();
 
       // If any auto-created tags: insert message and update form item.
-      if (this.autoTags.length > 0) {
+      if (!_.isEmpty(this.autoTags)) {
         this.$allowedHTMLDescription.append(Drupal.theme('filterFilterHTMLUpdateMessage', this.autoTags));
-        this.$allowedHTMLFormItem.val(this._generateSetting(this.userTags) + ' ' + this._generateSetting(this.autoTags));
+        var userTagsWithoutOverrides = _.omit(this.userTags, _.keys(this.autoTags));
+        this.$allowedHTMLFormItem.val(this._generateSetting(userTagsWithoutOverrides) + ' ' + this._generateSetting(this.autoTags));
       }
       // Restore to original state.
       else {
@@ -142,45 +139,170 @@
      *   A list of new allowed tags.
      */
     _calculateAutoAllowedTags: function (userAllowedTags, newFeatures) {
-      return _
-        .chain(newFeatures)
-        // Reduce multiple features' rules.
-        .reduce(function (memo, featureRules) {
-          // Reduce a single features' rules' required tags.
-          return _.union(memo, _.reduce(featureRules, function (memo, featureRule) {
-            return _.union(memo, featureRule.required.tags);
-          }, []));
-        }, [])
-        // All new features' required tags are "new allowed tags", except
-        // for those that are already allowed in the original allowed tags.
-        .difference(userAllowedTags)
-        .value();
+      var featureName;
+      var feature;
+      var featureRule;
+      var filterRule;
+      var tag;
+      var editorRequiredTags = {};
+      // Map the newly added Text Editor features to Drupal.FilterHtmlRule
+      // objects (to allow comparing userTags with autoTags).
+      for (featureName in newFeatures) {
+        if (newFeatures.hasOwnProperty(featureName)) {
+          feature = newFeatures[featureName];
+          for (var f = 0; f < feature.length; f++) {
+            featureRule = feature[f];
+            for (var t = 0; t < featureRule.required.tags.length; t++) {
+              tag = featureRule.required.tags[t];
+              if (!_.has(editorRequiredTags, tag)) {
+                filterRule = new Drupal.FilterHTMLRule();
+                filterRule.restrictedTags.tags = [tag];
+                // @todo Neither Drupal.FilterHtmlRule nor
+                //   Drupal.EditorFeatureHTMLRule allow for generic attribute
+                //   value restrictions, only for the "class" and "style"
+                //   attribute's values to be restricted. The filter_html filter
+                //   always disallows the "style" attribute, so we only need to
+                //   support "class" attribute value restrictions. Fix once
+                //   https://www.drupal.org/node/2567801 lands.
+                filterRule.restrictedTags.allowed.attributes = featureRule.required.attributes.slice(0);
+                filterRule.restrictedTags.allowed.classes = featureRule.required.classes.slice(0);
+                editorRequiredTags[tag] = filterRule;
+              }
+              // The tag is already allowed, add any additionally allowed
+              // attributes.
+              else {
+                filterRule = editorRequiredTags[tag];
+                filterRule.restrictedTags.allowed.attributes = _.union(filterRule.restrictedTags.allowed.attributes, featureRule.required.attributes);
+                filterRule.restrictedTags.allowed.classes = _.union(filterRule.restrictedTags.allowed.classes, featureRule.required.classes);
+              }
+            }
+          }
+        }
+      }
+
+      // Now compare userAllowedTags with editorRequiredTags, and build
+      // autoAllowedTags, which contains:
+      // - any tags in editorRequiredTags but not in userAllowedTags (i.e. tags
+      //   that are additionally going to be allowed)
+      // - any tags in editorRequiredTags that already exists in userAllowedTags
+      //   but does not allow all attributes or attribute values
+      var autoAllowedTags = {};
+      for (tag in editorRequiredTags) {
+        // If userAllowedTags does not contain a rule for this editor-required
+        // tag, then add it to the list of automatically allowed tags.
+        if (!_.has(userAllowedTags, tag)) {
+          autoAllowedTags[tag] = editorRequiredTags[tag];
+        }
+        // Otherwise, if userAllowedTags already allows this tag, then check if
+        // additional attributes and classes on this tag are required by the
+        // editor.
+        else {
+          var requiredAttributes = editorRequiredTags[tag].restrictedTags.allowed.attributes;
+          var allowedAttributes = userAllowedTags[tag].restrictedTags.allowed.attributes;
+          var needsAdditionalAttributes = requiredAttributes.length && _.difference(requiredAttributes, allowedAttributes).length;
+          var requiredClasses = editorRequiredTags[tag].restrictedTags.allowed.classes;
+          var allowedClasses = userAllowedTags[tag].restrictedTags.allowed.classes;
+          var needsAdditionalClasses = requiredClasses.length && _.difference(requiredClasses, allowedClasses).length;
+          if (needsAdditionalAttributes || needsAdditionalClasses) {
+            autoAllowedTags[tag] = userAllowedTags[tag].clone();
+          }
+          if (needsAdditionalAttributes) {
+            autoAllowedTags[tag].restrictedTags.allowed.attributes = _.union(allowedAttributes, requiredAttributes);
+          }
+          if (needsAdditionalClasses) {
+            autoAllowedTags[tag].restrictedTags.allowed.classes = _.union(allowedClasses, requiredClasses);
+          }
+        }
+      }
+
+      return autoAllowedTags;
     },
 
     /**
      * Parses the value of this.$allowedHTMLFormItem.
      *
      * @param {string} setting
-     *   The string representation of the setting. e.g. "<p> <br> <a>"
+     *   The string representation of the setting. For example:
+     *     <p class="callout"> <br> <a href hreflang>
      *
-     * @return {Array}
-     *   The array representation of the setting. e.g. ['p', 'br', 'a']
+     * @return {Object.<string, Drupal.FilterHTMLRule>}
+     *   The corresponding text filter HTML rule objects, one per tag, keyed by
+     *   tag name.
      */
     _parseSetting: function (setting) {
-      return setting.length ? setting.substring(1, setting.length - 1).split('> <') : [];
+      var node;
+      var tag;
+      var rule;
+      var attributes;
+      var attribute;
+      var allowedTags = setting.match(/(<[^>]+>)/g);
+      var sandbox = document.createElement('div');
+      var rules = {};
+      for (var t = 0; t < allowedTags.length; t++) {
+        // Let the browser do the parsing work for us.
+        sandbox.innerHTML = allowedTags[t];
+        node = sandbox.firstChild;
+        tag = node.tagName.toLowerCase();
+
+        // Build the Drupal.FilterHtmlRule object.
+        rule = new Drupal.FilterHTMLRule();
+        // We create one rule per allowed tag, so always one tag.
+        rule.restrictedTags.tags = [tag];
+        // Add the attribute restrictions.
+        attributes = node.attributes;
+        for (var i = 0; i < attributes.length; i++) {
+          attribute = attributes.item(i);
+          var attributeName = attribute.nodeName;
+          // @todo Drupal.FilterHtmlRule does not allow for generic attribute
+          //   value restrictions, only for the "class" and "style" attribute's
+          //   values. The filter_html filter always disallows the "style"
+          //   attribute, so we only need to support "class" attribute value
+          //   restrictions. Fix once https://www.drupal.org/node/2567801 lands.
+          if (attributeName === 'class') {
+            var attributeValue = attribute.textContent;
+            rule.restrictedTags.allowed.classes = attributeValue.split(' ');
+          }
+          else {
+            rule.restrictedTags.allowed.attributes.push(attributeName);
+          }
+        }
+
+        rules[tag] = rule;
+      }
+      return rules;
     },
 
     /**
      * Generates the value of this.$allowedHTMLFormItem.
      *
-     * @param {Array} tags
-     *   The array representation of the setting. e.g. ['p', 'br', 'a']
+     * @param {Object.<string, Drupal.FilterHTMLRule>} tags
+     *   The parsed representation of the setting.
      *
      * @return {Array}
      *   The string representation of the setting. e.g. "<p> <br> <a>"
      */
     _generateSetting: function (tags) {
-      return tags.length ? '<' + tags.join('> <') + '>' : '';
+      return _.reduce(tags, function (setting, rule, tag) {
+        if (setting.length) {
+          setting += ' ';
+        }
+
+        setting += '<' + tag;
+        if (rule.restrictedTags.allowed.attributes.length) {
+          setting += ' ' + rule.restrictedTags.allowed.attributes.join(' ');
+        }
+        // @todo Drupal.FilterHtmlRule does not allow for generic attribute
+        //   value restrictions, only for the "class" and "style" attribute's
+        //   values. The filter_html filter always disallows the "style"
+        //   attribute, so we only need to support "class" attribute value
+        //   restrictions. Fix once https://www.drupal.org/node/2567801 lands.
+        if (rule.restrictedTags.allowed.classes.length) {
+          setting += ' class="' + rule.restrictedTags.allowed.classes.join(' ') + '"';
+        }
+
+        setting += '>';
+        return setting;
+      }, '');
     }
 
   };
@@ -196,7 +318,7 @@
    */
   Drupal.theme.filterFilterHTMLUpdateMessage = function (tags) {
     var html = '';
-    var tagList = '<' + tags.join('> <') + '>';
+    var tagList = Drupal.behaviors.filterFilterHtmlUpdating._generateSetting(tags);
     html += '<p class="editor-update-message">';
     html += Drupal.t('Based on the text editor configuration, these tags have automatically been added: <strong>@tag-list</strong>.', {'@tag-list': tagList});
     html += '</p>';
diff --git a/core/modules/filter/filter.module b/core/modules/filter/filter.module
index 46da1f0f5238..6c4db0a56f7d 100644
--- a/core/modules/filter/filter.module
+++ b/core/modules/filter/filter.module
@@ -13,7 +13,6 @@
 use Drupal\Core\Routing\RouteMatchInterface;
 use Drupal\Core\Session\AccountInterface;
 use Drupal\Core\Template\Attribute;
-use Drupal\filter\Entity\FilterFormat;
 use Drupal\filter\FilterFormatInterface;
 
 /**
@@ -450,25 +449,6 @@ function template_preprocess_filter_tips(&$variables) {
  * Filters implemented by the Filter module.
  */
 
-/**
- * Provides filtering of input into accepted HTML.
- */
-function _filter_html($text, $filter) {
-  $allowed_tags = preg_split('/\s+|<|>/', $filter->settings['allowed_html'], -1, PREG_SPLIT_NO_EMPTY);
-  $text = Xss::filter($text, $allowed_tags);
-
-  if ($filter->settings['filter_html_nofollow']) {
-    $html_dom = Html::load($text);
-    $links = $html_dom->getElementsByTagName('a');
-    foreach ($links as $link) {
-      $link->setAttribute('rel', 'nofollow');
-    }
-    $text = Html::serialize($html_dom);
-  }
-
-  return trim($text);
-}
-
 /**
  * Converts text into hyperlinks automatically.
  *
diff --git a/core/modules/filter/migration_templates/d6_filter_format.yml b/core/modules/filter/migration_templates/d6_filter_format.yml
index df8faa5471ee..ffcbbb51b90d 100644
--- a/core/modules/filter/migration_templates/d6_filter_format.yml
+++ b/core/modules/filter/migration_templates/d6_filter_format.yml
@@ -34,7 +34,9 @@ process:
             - filter_url
             - filter_htmlcorrector
             - filter_html_escape
-      settings: settings
+      settings:
+        plugin: filter_settings
+        source: settings
       status:
         plugin: default_value
         default_value: true
diff --git a/core/modules/filter/migration_templates/d7_filter_format.yml b/core/modules/filter/migration_templates/d7_filter_format.yml
index 617611d4af67..c671b75efc91 100644
--- a/core/modules/filter/migration_templates/d7_filter_format.yml
+++ b/core/modules/filter/migration_templates/d7_filter_format.yml
@@ -19,7 +19,9 @@ process:
         source: name
         map:
           php_code: filter_null
-      settings: settings
+      settings:
+        plugin: filter_settings
+        source: settings
       status:
         plugin: default_value
         default_value: true
diff --git a/core/modules/filter/src/Plugin/Filter/FilterHtml.php b/core/modules/filter/src/Plugin/Filter/FilterHtml.php
index c282f0aeed41..38d798a7a148 100644
--- a/core/modules/filter/src/Plugin/Filter/FilterHtml.php
+++ b/core/modules/filter/src/Plugin/Filter/FilterHtml.php
@@ -7,6 +7,7 @@
 
 namespace Drupal\filter\Plugin\Filter;
 
+use Drupal\Component\Utility\Xss;
 use Drupal\Core\Form\FormStateInterface;
 use Drupal\Component\Utility\Html;
 use Drupal\filter\FilterProcessResult;
@@ -15,12 +16,16 @@
 /**
  * Provides a filter to limit allowed HTML tags.
  *
+ * The attributes in the annotation show examples of allowing all attributes
+ * by only having the attribute name, or allowing a fixed list of values, or
+ * allowing a value with a wildcard prefix.
+ *
  * @Filter(
  *   id = "filter_html",
- *   title = @Translation("Limit allowed HTML tags"),
+ *   title = @Translation("Limit allowed HTML tags and correct faulty HTML"),
  *   type = Drupal\filter\Plugin\FilterInterface::TYPE_HTML_RESTRICTOR,
  *   settings = {
- *     "allowed_html" = "<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h2> <h3> <h4> <h5> <h6>",
+ *     "allowed_html" = "<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type='1 A I'> <li> <dl> <dt> <dd> <h2 id='jump-*'> <h3 id> <h4 id> <h5 id> <h6 id>",
  *     "filter_html_help" = TRUE,
  *     "filter_html_nofollow" = FALSE
  *   },
@@ -29,6 +34,13 @@
  */
 class FilterHtml extends FilterBase {
 
+  /**
+   * The processed HTML restrictions.
+   *
+   * @var array
+   */
+  protected $restrictions;
+
   /**
    * {@inheritdoc}
    */
@@ -39,6 +51,7 @@ public function settingsForm(array $form, FormStateInterface $form_state) {
       '#default_value' => $this->settings['allowed_html'],
       '#maxlength' => 1024,
       '#description' => $this->t('A list of HTML tags that can be used. JavaScript event attributes, JavaScript URLs, and CSS are always stripped.'),
+      '#size' => 250,
       '#attached' => array(
         'library' => array(
           'filter/drupal.filter.filter_html.admin',
@@ -58,25 +71,252 @@ public function settingsForm(array $form, FormStateInterface $form_state) {
     return $form;
   }
 
+  /**
+   * {@inheritdoc}
+   */
+  public function setConfiguration(array $configuration) {
+    parent::setConfiguration($configuration);
+    // Force restrictions to be calculated again.
+    $this->restrictions = NULL;
+  }
+
   /**
    * {@inheritdoc}
    */
   public function process($text, $langcode) {
-    return new FilterProcessResult(_filter_html($text, $this));
+    $restrictions = $this->getHtmlRestrictions();
+    // Split the work into two parts. For filtering HTML tags out of the content
+    // we rely on the well-tested Xss::filter() code. Since there is no '*' tag
+    // that needs to be removed from the list.
+    unset($restrictions['allowed']['*']);
+    $text = Xss::filter($text, array_keys($restrictions['allowed']));
+    // After we've done tag filtering, we do attribute and attribute value
+    // filtering as the second part.
+    return new FilterProcessResult($this->filterAttributes($text));
+  }
+
+  /**
+   * Provides filtering of tag attributes into accepted HTML.
+   *
+   * @param string $text
+   *   The HTML text string to be filtered.
+   *
+   * @return string
+   *   Filtered HTML with attributes filtered according to the settings.
+   */
+  public function filterAttributes($text) {
+    $restrictions = $this->getHTMLRestrictions();
+    $global_allowed_attributes = array_filter($restrictions['allowed']['*']);
+    unset($restrictions['allowed']['*']);
+
+    // Apply attribute restrictions to tags.
+    $html_dom = Html::load($text);
+    $xpath = new \DOMXPath($html_dom);
+    foreach ($restrictions['allowed'] as $allowed_tag => $tag_attributes) {
+      // By default, no attributes are allowed for a tag, but due to the
+      // globally whitelisted attributes, it is impossible for a tag to actually
+      // completely disallow attributes.
+      if ($tag_attributes === FALSE) {
+        $tag_attributes = [];
+      }
+      $allowed_attributes = ['exact' => [], 'prefix' => []];
+      foreach (($global_allowed_attributes + $tag_attributes) as $name => $values) {
+        // A trailing * indicates wildcard, but it must have some prefix.
+        if (substr($name, -1) === '*' && $name[0] !== '*') {
+          $allowed_attributes['prefix'][str_replace('*', '', $name)] = $this->prepareAttributeValues($values);
+        }
+        else {
+          $allowed_attributes['exact'][$name] = $this->prepareAttributeValues($values);
+        }
+      }
+      krsort($allowed_attributes['prefix']);
+
+      // Find all matching elements that have any attributes and filter the
+      // attributes by name and value.
+      foreach ($xpath->query('//' . $allowed_tag . '[@*]') as $element) {
+        $this->filterElementAttributes($element, $allowed_attributes);
+      }
+    }
+
+    if ($this->settings['filter_html_nofollow']) {
+      $links = $html_dom->getElementsByTagName('a');
+      foreach ($links as $link) {
+        $link->setAttribute('rel', 'nofollow');
+      }
+    }
+    $text = Html::serialize($html_dom);
+
+    return trim($text);
+  }
+
+  /**
+   * Filter attributes on an element by name and value according to a whitelist.
+   *
+   * @param \DOMElement $element
+   *   The element to be processed.
+   * @param array $allowed_attributes
+   *   The attributes whitelist as an array of names and values.
+   */
+  protected function filterElementAttributes(\DOMElement $element, array $allowed_attributes) {
+    $modified_attributes = [];
+    foreach($element->attributes as $name => $attribute) {
+      // Remove attributes not in the whitelist.
+      $allowed_value = $this->findAllowedValue($allowed_attributes, $name);
+      if (empty($allowed_value)) {
+        $modified_attributes[$name] = FALSE;
+      }
+      elseif ($allowed_value !== TRUE) {
+        // Check the attribute values whitelist.
+        $attribute_values = preg_split('/\s+/', $attribute->value, -1, PREG_SPLIT_NO_EMPTY);
+        $modified_attributes[$name] = [];
+        foreach ($attribute_values as $value) {
+          if ($this->findAllowedValue($allowed_value, $value)) {
+            $modified_attributes[$name][] = $value;
+          }
+        }
+      }
+    }
+    // If the $allowed_value was TRUE for an attribute name, it does not
+    // appear in this array so the value on the DOM element is left unchanged.
+    foreach ($modified_attributes as $name => $values) {
+      if ($values) {
+        $element->setAttribute($name, implode(' ', $values));
+      }
+      else {
+        $element->removeAttribute($name);
+      }
+    }
+  }
+
+  /**
+   * Helper function to handle prefix matching.
+   *
+   * @param array $allowed
+   *   Array of allowed names and prefixes.
+   * @param string $name
+   *   The name to find or match against a prefix.
+   *
+   * @return bool|array
+   */
+  protected function findAllowedValue(array $allowed, $name) {
+    if (isset($allowed['exact'][$name])) {
+      return $allowed['exact'][$name];
+    }
+    // Handle prefix (wildcard) matches.
+    foreach ($allowed['prefix'] as $prefix => $value) {
+      if (strpos($name, $prefix) === 0) {
+        return $value;
+      }
+    }
+    return FALSE;
+  }
+
+  /**
+   * Helper function to prepare attribute values including wildcards.
+   *
+   * Splits the values into two lists, one for values that must match exactly
+   * and the other for values that are wildcard prefixes.
+   *
+   * @param bool|array $attribute_values
+   *   TRUE, FALSE, or an array of allowed values.
+   *
+   * @return bool|array
+   */
+  protected function prepareAttributeValues($attribute_values) {
+    if ($attribute_values === TRUE || $attribute_values === FALSE) {
+      return $attribute_values;
+    }
+    $result = ['exact' => [], 'prefix' => []];
+    foreach ($attribute_values as $name => $allowed) {
+      // A trailing * indicates wildcard, but it must have some prefix.
+      if (substr($name, -1) === '*' && $name[0] !== '*') {
+        $result['prefix'][str_replace('*', '', $name)] = $allowed;
+      }
+      else {
+        $result['exact'][$name] = $allowed;
+      }
+    }
+    krsort($result['prefix']);
+    return $result;
   }
 
   /**
    * {@inheritdoc}
    */
   public function getHTMLRestrictions() {
-    $restrictions = array('allowed' => array());
-    $tags = preg_split('/\s+|<|>/', $this->settings['allowed_html'], -1, PREG_SPLIT_NO_EMPTY);
-    // List the allowed HTML tags.
-    foreach ($tags as $tag) {
-      $restrictions['allowed'][$tag] = TRUE;
+    if ($this->restrictions) {
+      return $this->restrictions;
     }
-    // The 'style' and 'on*' ('onClick' etc.) attributes are always forbidden.
-    $restrictions['allowed']['*'] = array('style' => FALSE, 'on*' => FALSE);
+
+    // Parse the allowed HTML setting, and gradually make the whitelist more
+    // specific.
+    $restrictions = ['allowed' => []];
+
+    // Make all the tags self-closing, so they will be parsed into direct
+    // children of the body tag in the DomDocument.
+    $html = str_replace('>', ' />', $this->settings['allowed_html']);
+    // Protect any trailing * characters in attribute names, since DomDocument
+    // strips them as invalid.
+    $star_protector = '__zqh6vxfbk3cg__';
+    $html = str_replace('*', $star_protector, $html);
+    $body_child_nodes = Html::load($html)->getElementsByTagName('body')->item(0)->childNodes;
+
+    foreach ($body_child_nodes as $node) {
+      if ($node->nodeType !== XML_ELEMENT_NODE) {
+        // Skip the empty text nodes inside tags.
+        continue;
+      }
+      $tag = $node->tagName;
+      if ($node->hasAttributes()) {
+        // Mark the tag as allowed, assigning TRUE for each attribute name if
+        // all values are allowed, or an array of specific allowed values.
+        $restrictions['allowed'][$tag] = [];
+        // Iterate over any attributes, and mark them as allowed.
+        foreach ($node->attributes as $name => $attribute) {
+          // Put back any trailing * on wildcard attribute name.
+          $name = str_replace($star_protector, '*', $name);
+          if ($attribute->value === '') {
+            // If the value is the empty string all values are allowed.
+            $restrictions['allowed'][$tag][$name] = TRUE;
+          }
+          else {
+            // A non-empty attribute value is assigned, mark each of the
+            // specified attribute values as allowed.
+            foreach (preg_split('/\s+/', $attribute->value, -1, PREG_SPLIT_NO_EMPTY) as $value) {
+              // Put back any trailing * on wildcard attribute value.
+              $value = str_replace($star_protector, '*', $value);
+              $restrictions['allowed'][$tag][$name][$value] = TRUE;
+            }
+          }
+        }
+      }
+      else {
+        // Mark the tag as allowed, but with no attributes allowed.
+        $restrictions['allowed'][$tag] = FALSE;
+      }
+    }
+
+    // The 'style' and 'on*' ('onClick' etc.) attributes are always forbidden,
+    // and are removed by Xss::filter().
+    // The 'lang', and 'dir' attributes apply to all elements and are always
+    // allowed. The value whitelist for the 'dir' attribute is enforced by
+    // self::filterAttributes().  Note that those two attributes are in the
+    // short list of globally usable attributes in HTML5. They are always
+    // allowed since the correct values of lang and dir may only be known to
+    // the content author. Of the other global attributes, they are not usually
+    // added by hand to content, and especially the class attribute can have
+    // undesired visual effects by allowing content authors to apply any
+    // available style, so specific values should be explicitly whitelisted.
+    // @see http://www.w3.org/TR/html5/dom.html#global-attributes
+    $restrictions['allowed']['*'] = [
+      'style' => FALSE,
+      'on*' => FALSE,
+      'lang' => TRUE,
+      'dir' => ['ltr' => TRUE, 'rtl' => TRUE],
+    ];
+    // Save this calculated result for re-use.
+    $this->restrictions = $restrictions;
+
     return $restrictions;
   }
 
diff --git a/core/modules/filter/src/Plugin/migrate/process/FilterSettings.php b/core/modules/filter/src/Plugin/migrate/process/FilterSettings.php
new file mode 100644
index 000000000000..4a7e45b5076c
--- /dev/null
+++ b/core/modules/filter/src/Plugin/migrate/process/FilterSettings.php
@@ -0,0 +1,55 @@
+<?php
+
+/**
+ * @file
+ * Contains \Drupal\migrate\Plugin\migrate\process\FilterSettings.
+ */
+
+namespace Drupal\filter\Plugin\migrate\process;
+
+use Drupal\migrate\ProcessPluginBase;
+use Drupal\migrate\MigrateExecutableInterface;
+use Drupal\migrate\Row;
+
+/**
+ * Adds the default allowed attributes to filter_html's allowed_html setting.
+ *
+ * E.g. map '<a>' to '<a href hreflang dir>'.
+ *
+ * @MigrateProcessPlugin(
+ *   id = "filter_settings",
+ *   handle_multiples = TRUE
+ * )
+ */
+class FilterSettings extends ProcessPluginBase {
+
+  /**
+   * Default attributes for migrating filter_html's 'allowed_html' setting.
+   *
+   * @var string[]
+   */
+  protected $allowedHtmlDefaultAttributes = [
+    '<a>' => '<a href hreflang>',
+    '<blockquote>' => '<blockquote cite>',
+    '<ol>' => '<ol start type>',
+    '<ul>' => '<ul type>',
+    '<img>' => '<img src alt height width>',
+    '<h2>' => '<h2 id>',
+    '<h3>' => '<h3 id>',
+    '<h4>' => '<h4 id>',
+    '<h5>' => '<h5 id>',
+    '<h6>' => '<h6 id>',
+  ];
+
+  /**
+   * {@inheritdoc}
+   */
+  public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) {
+    // Only the filter_html filter's settings have a changed format.
+    if ($row->getDestinationProperty('id') === 'filter_html') {
+      $value['allowed_html'] = str_replace(array_keys($this->allowedHtmlDefaultAttributes), array_values($this->allowedHtmlDefaultAttributes), $value['allowed_html']);
+    }
+    return $value;
+  }
+
+}
diff --git a/core/modules/filter/src/Tests/FilterAPITest.php b/core/modules/filter/src/Tests/FilterAPITest.php
index 263b31ee48d6..b8a93f1ddf9a 100644
--- a/core/modules/filter/src/Tests/FilterAPITest.php
+++ b/core/modules/filter/src/Tests/FilterAPITest.php
@@ -108,7 +108,15 @@ function testFilterFormatAPI() {
     $filtered_html_format = entity_load('filter_format', 'filtered_html');
     $this->assertIdentical(
       $filtered_html_format->getHtmlRestrictions(),
-      array('allowed' => array('p' => TRUE, 'br' => TRUE, 'strong' => TRUE, 'a' => TRUE, '*' => array('style' => FALSE, 'on*' => FALSE))),
+      array(
+        'allowed' => array(
+          'p' => FALSE,
+          'br' => FALSE,
+          'strong' => FALSE,
+          'a' => array('href' => TRUE, 'hreflang' => TRUE),
+          '*' => array('style' => FALSE, 'on*' => FALSE, 'lang' => TRUE, 'dir' => array('ltr' => TRUE, 'rtl' => TRUE)),
+        ),
+      ),
       'FilterFormatInterface::getHtmlRestrictions() works as expected for the filtered_html format.'
     );
     $this->assertIdentical(
@@ -164,7 +172,7 @@ function testFilterFormatAPI() {
         'filter_html' => array(
           'status' => 1,
           'settings' => array(
-            'allowed_html' => '<p> <br> <a> <strong>',
+            'allowed_html' => '<p> <br> <a href> <strong>',
           ),
         ),
         'filter_test_restrict_tags_and_attributes' => array(
@@ -185,7 +193,14 @@ function testFilterFormatAPI() {
     $very_restricted_html_format->save();
     $this->assertIdentical(
       $very_restricted_html_format->getHtmlRestrictions(),
-      array('allowed' => array('p' => TRUE, 'br' => FALSE, 'a' => array('href' => TRUE), '*' => array('style' => FALSE, 'on*' => FALSE))),
+      array(
+        'allowed' => array(
+          'p' => FALSE,
+          'br' => FALSE,
+          'a' => array('href' => TRUE),
+          '*' => array('style' => FALSE, 'on*' => FALSE, 'lang' => TRUE, 'dir' => array('ltr' => TRUE, 'rtl' => TRUE)),
+        ),
+      ),
       'FilterFormatInterface::getHtmlRestrictions() works as expected for the very_restricted_html format.'
     );
     $this->assertIdentical(
diff --git a/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php b/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php
index e320af3e9b47..9d96c5f3b0f9 100644
--- a/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php
+++ b/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php
@@ -45,7 +45,7 @@ protected function setUp() {
         'filter_html' => array(
           'status' => 1,
           'settings' => array(
-            'allowed_html' => '<img> <a>',
+            'allowed_html' => '<img src testattribute> <a>',
           ),
         ),
         'filter_autop' => array(
diff --git a/core/modules/filter/src/Tests/FilterUnitTest.php b/core/modules/filter/src/Tests/FilterUnitTest.php
index f441342dff42..1e3d3337f897 100644
--- a/core/modules/filter/src/Tests/FilterUnitTest.php
+++ b/core/modules/filter/src/Tests/FilterUnitTest.php
@@ -9,6 +9,7 @@
 
 use Drupal\Component\Utility\Html;
 use Drupal\Component\Utility\SafeMarkup;
+use Drupal\Core\Language\Language;
 use Drupal\Core\Render\RenderContext;
 use Drupal\editor\EditorXssFilter\Standard;
 use Drupal\filter\Entity\FilterFormat;
@@ -190,7 +191,7 @@ function testCaptionFilter() {
     $html_filter = $this->filters['filter_html'];
     $html_filter->setConfiguration(array(
       'settings' => array(
-        'allowed_html' => '<img>',
+        'allowed_html' => '<img src data-align data-caption>',
         'filter_html_help' => 1,
         'filter_html_nofollow' => 0,
       )
@@ -399,7 +400,7 @@ function testHtmlFilter() {
     $filter = $this->filters['filter_html'];
     $filter->setConfiguration(array(
       'settings' => array(
-        'allowed_html' => '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>',
+        'allowed_html' => '<a> <p> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>',
         'filter_html_help' => 1,
         'filter_html_nofollow' => 0,
       )
@@ -407,41 +408,78 @@ function testHtmlFilter() {
 
     // HTML filter is not able to secure some tags, these should never be
     // allowed.
-    $f = _filter_html('<script />', $filter);
-    $this->assertNoNormalized($f, 'script', 'HTML filter should always remove script tags.');
+    $f = (string) $filter->process('<script />', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '', 'HTML filter should remove script tags.');
 
-    $f = _filter_html('<iframe />', $filter);
-    $this->assertNoNormalized($f, 'iframe', 'HTML filter should always remove iframe tags.');
+    $f = (string) $filter->process('<iframe />', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '', 'HTML filter should remove iframe tags.');
 
-    $f = _filter_html('<object />', $filter);
-    $this->assertNoNormalized($f, 'object', 'HTML filter should always remove object tags.');
+    $f = (string) $filter->process('<object />', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '', 'HTML filter should remove object tags.');
 
-    $f = _filter_html('<style />', $filter);
-    $this->assertNoNormalized($f, 'style', 'HTML filter should always remove style tags.');
+    $f = (string) $filter->process('<style />', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '', 'HTML filter should remove style tags.');
 
     // Some tags make CSRF attacks easier, let the user take the risk herself.
-    $f = _filter_html('<img />', $filter);
-    $this->assertNoNormalized($f, 'img', 'HTML filter should remove img tags on default.');
+    $f = (string) $filter->process('<img />', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '', 'HTML filter should remove img tags by default.');
 
-    $f = _filter_html('<input />', $filter);
-    $this->assertNoNormalized($f, 'img', 'HTML filter should remove input tags on default.');
+    $f = (string) $filter->process('<input />', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '', 'HTML filter should remove input tags by default.');
 
     // Filtering content of some attributes is infeasible, these shouldn't be
     // allowed too.
-    $f = _filter_html('<p style="display: none;" />', $filter);
-    $this->assertNoNormalized($f, 'style', 'HTML filter should remove style attribute on default.');
+    $f = (string) $filter->process('<p style="display: none;" />', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertNoNormalized($f, 'style', 'HTML filter should remove style attributes.');
+    $this->assertIdentical($f, '<p></p>');
 
-    $f = _filter_html('<p onerror="alert(0);" />', $filter);
-    $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove on* attributes on default.');
+    $f = (string) $filter->process('<p onerror="alert(0);"></p>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove on* attributes.');
+    $this->assertIdentical($f, '<p></p>');
 
-    $f = _filter_html('<code onerror>&nbsp;</code>', $filter);
-    $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove empty on* attributes on default.');
+    $f = (string) $filter->process('<code onerror>&nbsp;</code>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove empty on* attributes.');
+    // Note - this string has a decoded &nbsp; character.
+    $this->assertIdentical($f, '<code> </code>');
 
-    $f = _filter_html('<br>', $filter);
-    $this->assertNormalized($f, '<br>', 'HTML filter should allow line breaks.');
+    $f = (string) $filter->process('<br>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertNormalized($f, '<br />', 'HTML filter should allow line breaks.');
 
-    $f = _filter_html('<br />', $filter);
+    $f = (string) $filter->process('<br />', Language::LANGCODE_NOT_SPECIFIED);
     $this->assertNormalized($f, '<br />', 'HTML filter should allow self-closing line breaks.');
+
+    // All attributes of whitelisted tags are stripped by default.
+    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertNormalized($f, '<a>link</a>', 'HTML filter should remove attributes that are not explicitly allowed.');
+
+    // Now whitelist the "llama" attribute on <a>.
+    $filter->setConfiguration(array(
+      'settings' => array(
+        'allowed_html' => '<a href llama> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>',
+        'filter_html_help' => 1,
+        'filter_html_nofollow' => 0,
+      )
+    ));
+    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertNormalized($f, '<a llama="awesome">link</a>', 'HTML filter keeps explicitly allowed attributes, and removes attributes that are not explicitly allowed.');
+
+    // Restrict the whitelisted "llama" attribute on <a> to only allow the value
+    // "majestical", or "epic".
+    $filter->setConfiguration(array(
+      'settings' => array(
+        'allowed_html' => '<a href llama="majestical epic"> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>',
+        'filter_html_help' => 1,
+        'filter_html_nofollow' => 0,
+      )
+    ));
+    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '<a>link</a>', 'HTML filter removes allowed attributes that do not have an explicitly allowed value.');
+    $f = (string) $filter->process('<a kitten="cute" llama="majestical">link</a>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '<a llama="majestical">link</a>', 'HTML filter keeps explicitly allowed attributes with an attribute value that is also explicitly allowed.');
+    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertNormalized($f, '<a>link</a>', 'HTML filter removes allowed attributes that have a not explicitly allowed value.');
+    $f = (string) $filter->process('<a href="/beautiful-animals" kitten="cute" llama="epic majestical">link</a>', Language::LANGCODE_NOT_SPECIFIED);
+    $this->assertIdentical($f, '<a href="/beautiful-animals" llama="epic majestical">link</a>', 'HTML filter keeps explicitly allowed attributes with an attribute value that is also explicitly allowed.');
   }
 
   /**
@@ -452,7 +490,7 @@ function testNoFollowFilter() {
     $filter = $this->filters['filter_html'];
     $filter->setConfiguration(array(
       'settings' => array(
-        'allowed_html' => '<a>',
+        'allowed_html' => '<a href>',
         'filter_html_help' => 1,
         'filter_html_nofollow' => 1,
       )
@@ -460,19 +498,19 @@ function testNoFollowFilter() {
 
     // Test if the rel="nofollow" attribute is added, even if we try to prevent
     // it.
-    $f = _filter_html('<a href="http://www.example.com/">text</a>', $filter);
+    $f = (string) $filter->process('<a href="http://www.example.com/">text</a>', Language::LANGCODE_NOT_SPECIFIED);
     $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent -- no evasion.');
 
-    $f = _filter_html('<A href="http://www.example.com/">text</a>', $filter);
+    $f = (string) $filter->process('<A href="http://www.example.com/">text</a>', Language::LANGCODE_NOT_SPECIFIED);
     $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- capital A.');
 
-    $f = _filter_html("<a/href=\"http://www.example.com/\">text</a>", $filter);
+    $f = (string) $filter->process("<a/href=\"http://www.example.com/\">text</a>", Language::LANGCODE_NOT_SPECIFIED);
     $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- non whitespace character after tag name.');
 
-    $f = _filter_html("<\0a\0 href=\"http://www.example.com/\">text</a>", $filter);
+    $f = (string) $filter->process("<\0a\0 href=\"http://www.example.com/\">text</a>", Language::LANGCODE_NOT_SPECIFIED);
     $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- some nulls.');
 
-    $f = _filter_html('<a href="http://www.example.com/" rel="follow">text</a>', $filter);
+    $f = (string) $filter->process('<a href="http://www.example.com/" rel="follow">text</a>', Language::LANGCODE_NOT_SPECIFIED);
     $this->assertNoNormalized($f, 'rel="follow"', 'Spam deterrent evasion -- with rel set - rel="follow" removed.');
     $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- with rel set - rel="nofollow" added.');
   }
diff --git a/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php b/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php
index e070632b5684..2a3de4eba835 100644
--- a/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php
+++ b/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php
@@ -44,7 +44,7 @@ public function testFilterFormat() {
     $this->assertFalse(isset($filters['filter_html_image_secure']));
 
     // Check variables migrated into filter.
-    $this->assertIdentical('<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>', $filters['filter_html']['settings']['allowed_html']);
+    $this->assertIdentical('<a href hreflang> <em> <strong> <cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd>', $filters['filter_html']['settings']['allowed_html']);
     $this->assertIdentical(TRUE, $filters['filter_html']['settings']['filter_html_help']);
     $this->assertIdentical(FALSE, $filters['filter_html']['settings']['filter_html_nofollow']);
     $this->assertIdentical(72, $filters['filter_url']['settings']['filter_url_length']);
diff --git a/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php b/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php
index 0b84b8be5484..b96b99a2e46e 100644
--- a/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php
+++ b/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php
@@ -68,7 +68,7 @@ public function testFilterFormat() {
     /** @var \Drupal\filter\FilterFormatInterface $format */
     $format = FilterFormat::load('filtered_html');
     $config = $format->filters('filter_html')->getConfiguration();
-    $this->assertIdentical('<div> <span> <ul> <li>', $config['settings']['allowed_html']);
+    $this->assertIdentical('<div> <span> <ul type> <li> <ol start type> <a href hreflang> <img src alt height width>', $config['settings']['allowed_html']);
     $config = $format->filters('filter_url')->getConfiguration();
     $this->assertIdentical(128, $config['settings']['filter_url_length']);
 
diff --git a/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml b/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml
index a1a38d786006..ba403db3bf6d 100644
--- a/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml
+++ b/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml
@@ -12,4 +12,4 @@ filters:
     provider: filter
     status: true
     settings:
-      allowed_html: '<p> <br> <strong> <a>'
+      allowed_html: '<p> <br> <strong> <a href hreflang>'
diff --git a/core/modules/filter/tests/src/Unit/FilterHtmlTest.php b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php
new file mode 100644
index 000000000000..8c07f4b23765
--- /dev/null
+++ b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php
@@ -0,0 +1,87 @@
+<?php
+
+/**
+ * @file
+ * Contains \Drupal\Tests\filter\Unit\FilterHtmlTest.
+ */
+
+namespace Drupal\Tests\filter\Unit;
+
+use Drupal\Tests\UnitTestCase;
+use Drupal\filter\Plugin\Filter\FilterHtml;
+
+/**
+ * @coversDefaultClass \Drupal\filter\Plugin\Filter\FilterHtml
+ * @group filter
+ */
+class FilterHtmlTest extends UnitTestCase {
+
+  /**
+   * @var \Drupal\filter\Plugin\Filter\FilterHtml
+   */
+  protected $filter;
+
+  /**
+   * {@inheritdoc}
+   */
+  protected function setUp() {
+    parent::setUp();
+    $configuration['settings'] = [
+      'allowed_html' => '<a href> <p> <em> <strong> <cite> <blockquote> <code class="pretty boring align-*"> <ul alpaca-*="wooly-* strong"> <ol llama-*> <li> <dl> <dt> <dd> <br> <h3 id>',
+      'filter_html_help' => 1,
+      'filter_html_nofollow' => 0,
+    ];
+    $this->filter = new FilterHtml($configuration, 'filter_html', ['provider' => 'test']);
+    $this->filter->setStringTranslation($this->getStringTranslationStub());
+  }
+
+  /**
+   * @covers ::filterAttributes
+   *
+   * @dataProvider providerFilterAttributes
+   *
+   * @param string $html
+   *   Input HTML.
+   * @param array $expected
+   *   The expected output string.
+   */
+  public function testfilterAttributes($html, $expected) {
+    $this->assertSame($expected, $this->filter->filterAttributes($html));
+  }
+
+  /**
+   * Provides data for testfilterAttributes.
+   *
+   * @return array
+   *   An array of test data.
+   */
+  public function providerFilterAttributes() {
+    return [
+      ['<a href="/blog" title="Blog">Blog</a>', '<a href="/blog">Blog</a>'],
+      ['<p dir="rtl" />', '<p dir="rtl"></p>'],
+      ['<p dir="bogus" />', '<p></p>'],
+      ['<p id="first" />', '<p></p>'],
+      // The addition of xml:lang isn't especially desired, but is still valid
+      // HTML5. See https://www.drupal.org/node/1333730.
+      ['<p id="first" lang="en">text</p>', '<p lang="en" xml:lang="en">text</p>'],
+      ['<p style="display: none;" />', '<p></p>'],
+      ['<code class="pretty invalid">foreach ($a as $b) {}</code>', '<code class="pretty">foreach ($a as $b) {}</code>'],
+      ['<code class="boring pretty">foreach ($a as $b) {}</code>', '<code class="boring pretty">foreach ($a as $b) {}</code>'],
+      ['<code class="boring    pretty ">foreach ($a as $b) {}</code>', '<code class="boring pretty">foreach ($a as $b) {}</code>'],
+      ['<code class="invalid alpaca">foreach ($a as $b) {}</code>', '<code>foreach ($a as $b) {}</code>'],
+      ['<h3 class="big">a heading</h3>', '<h3>a heading</h3>'],
+      ['<h3 id="first">a heading</h3>', '<h3 id="first">a heading</h3>'],
+      // Wilcard value. Case matters, so upper case doesn't match.
+      ['<code class="align-left bold">foreach ($a as $b) {}</code>', '<code class="align-left">foreach ($a as $b) {}</code>'],
+      ['<code class="align-right ">foreach ($a as $b) {}</code>', '<code class="align-right">foreach ($a as $b) {}</code>'],
+      ['<code class="Align-right ">foreach ($a as $b) {}</code>', '<code>foreach ($a as $b) {}</code>'],
+      // Wilcard name, case is ignored.
+      ['<ol style="display: none;" llama-wim="noble majestic"></ol>', '<ol llama-wim="noble majestic"></ol>'],
+      ['<ol style="display: none;" LlamA-Wim="majestic"></ol>', '<ol llama-wim="majestic"></ol>'],
+      ['<ol style="display: none;" llama-="noble majestic"></ol>', '<ol llama-="noble majestic"></ol>'],
+      // Both wildcard names and values.
+      ['<ul style="display: none;" alpaca-wool="wooly-warm strong majestic"></ul>', '<ul alpaca-wool="wooly-warm strong"></ul>'],
+    ];
+  }
+
+}
diff --git a/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php b/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php
index 03661b3b2e97..d8b660d53dea 100644
--- a/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php
+++ b/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php
@@ -119,7 +119,7 @@ public function load() {
       'name' => 'filter_html',
       'weight' => '1',
       'status' => '1',
-      'settings' => 'a:3:{s:12:"allowed_html";s:22:"<div> <span> <ul> <li>";s:16:"filter_html_help";i:1;s:20:"filter_html_nofollow";i:0;}',
+      'settings' => 'a:3:{s:12:"allowed_html";s:37:"<div> <span> <ul> <li> <ol> <a> <img>";s:16:"filter_html_help";i:1;s:20:"filter_html_nofollow";i:0;}',
     ))->values(array(
       'format' => 'filtered_html',
       'module' => 'filter',
@@ -257,4 +257,5 @@ public function load() {
   }
 
 }
-#7f8ea668d5deed8ce2d6c782dad2bcd5
+
+#e0bd772d07df589752fa9372705aaa9d
diff --git a/core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php b/core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php
new file mode 100644
index 000000000000..c9585920fa61
--- /dev/null
+++ b/core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php
@@ -0,0 +1,56 @@
+<?php
+
+/**
+ * @file
+ * Contains \Drupal\system\Tests\Update\FilterHtmlUpdateTest.
+ */
+
+namespace Drupal\system\Tests\Update;
+
+use Drupal\filter\Entity\FilterFormat;
+use Drupal\system\Tests\Update\UpdatePathTestBase;
+
+/**
+ * Tests that the allowed html configutations are updated with attributes.
+ *
+ * @group Entity
+ */
+class FilterHtmlUpdateTest extends UpdatePathTestBase {
+
+  /**
+   * {@inheritdoc}
+   */
+  public function setDatabaseDumpFiles() {
+    $this->databaseDumpFiles = [
+      __DIR__ . '/../../../../system/tests/fixtures/update/drupal-8.bare.standard.php.gz',
+    ];
+  }
+
+  /**
+   * Tests system_update_8009().
+   */
+  public function testAllowedHtmlUpdate() {
+    // Make sure we have the expected values before the update.
+    $filters_before = [
+      'basic_html' => '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h4> <h5> <h6> <p> <br> <span> <img>',
+      'restricted_html' => '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h4> <h5> <h6>',
+    ];
+    foreach ($filters_before as $name => $before) {
+      $config = FilterFormat::load($name)->toArray();
+      $this->assertIdentical($before, $config['filters']['filter_html']['settings']['allowed_html']);
+    }
+
+    $this->runUpdates();
+
+    // Make sure we have the expected values after the update.
+    $filters_after = [
+      'basic_html' => '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h4 id> <h5 id> <h6 id> <p> <br> <span> <img src alt height width data-align data-caption>',
+      'restricted_html' => '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h4 id> <h5 id> <h6 id>',
+    ];
+    foreach ($filters_after as $name => $after) {
+      $config = FilterFormat::load($name)->toArray();
+      $this->assertIdentical($after, $config['filters']['filter_html']['settings']['allowed_html']);
+    }
+  }
+
+}
diff --git a/core/modules/system/system.install b/core/modules/system/system.install
index f391829669de..cb758fe82977 100644
--- a/core/modules/system/system.install
+++ b/core/modules/system/system.install
@@ -1662,6 +1662,45 @@ function system_update_8008() {
   }
 }
 
+/**
+ * Add allowed attributes to existing html filters.
+ */
+function system_update_8009() {
+  $default_mapping = [
+    '<a>' => '<a href hreflang>',
+    '<blockquote>' => '<blockquote cite>',
+    '<ol>' => '<ol start type>',
+    '<ul>' => '<ul type>',
+    '<img>' => '<img src alt height width>',
+    '<h2>' => '<h2 id>',
+    '<h3>' => '<h3 id>',
+    '<h4>' => '<h4 id>',
+    '<h5>' => '<h5 id>',
+    '<h6>' => '<h6 id>',
+  ];
+  $config_factory = \Drupal::configFactory();
+  foreach ($config_factory->listAll('filter.format') as $name) {
+    $allowed_html_mapping = $default_mapping;
+    $config = $config_factory->getEditable($name);
+    // The image alignment filter needs the data-align attribute.
+    $align_enabled = $config->get('filters.filter_align.status');
+    if ($align_enabled) {
+      $allowed_html_mapping['<img>'] = str_replace('>', ' data-align>', $allowed_html_mapping['<img>']);
+    }
+    // The image caption filter needs the data-caption attribute.
+    $caption_enabled = $config->get('filters.filter_caption.status');
+    if ($caption_enabled) {
+      $allowed_html_mapping['<img>'] = str_replace('>', ' data-caption>', $allowed_html_mapping['<img>']);
+    }
+    $allowed_html = $config->get('filters.filter_html.settings.allowed_html');
+    if (!empty($allowed_html)) {
+      $allowed_html = strtr($allowed_html, $allowed_html_mapping);
+      $config->set('filters.filter_html.settings.allowed_html', $allowed_html);
+      $config->save();
+    }
+  }
+}
+
 /**
  * @} End of "addtogroup updates-8.0.0-beta".
  */
diff --git a/core/profiles/standard/config/install/filter.format.basic_html.yml b/core/profiles/standard/config/install/filter.format.basic_html.yml
index a2562d26b54c..92224c23cf9d 100644
--- a/core/profiles/standard/config/install/filter.format.basic_html.yml
+++ b/core/profiles/standard/config/install/filter.format.basic_html.yml
@@ -15,7 +15,7 @@ filters:
     status: true
     weight: -10
     settings:
-      allowed_html: '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h2> <h3> <h4> <h5> <h6> <p> <br> <span> <img>'
+      allowed_html: '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h2 id> <h3 id> <h4 id> <h5 id> <h6 id> <p> <br> <span> <img src alt height width data-entity-type data-entity-uuid data-align data-caption>'
       filter_html_help: false
       filter_html_nofollow: false
   filter_align:
@@ -36,12 +36,6 @@ filters:
     status: true
     weight: 9
     settings: {  }
-  filter_htmlcorrector:
-    id: filter_htmlcorrector
-    provider: filter
-    status: true
-    weight: 10
-    settings: {  }
   editor_file_reference:
     id: editor_file_reference
     provider: editor
diff --git a/core/profiles/standard/config/install/filter.format.restricted_html.yml b/core/profiles/standard/config/install/filter.format.restricted_html.yml
index cbd40c067ec9..323d07774b7f 100644
--- a/core/profiles/standard/config/install/filter.format.restricted_html.yml
+++ b/core/profiles/standard/config/install/filter.format.restricted_html.yml
@@ -13,7 +13,7 @@ filters:
     status: true
     weight: -10
     settings:
-      allowed_html: '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h2> <h3> <h4> <h5> <h6>'
+      allowed_html: '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h2 id> <h3 id> <h4 id> <h5 id> <h6 id>'
       filter_html_help: true
       filter_html_nofollow: false
   filter_autop:
@@ -29,9 +29,3 @@ filters:
     weight: 0
     settings:
       filter_url_length: 72
-  filter_htmlcorrector:
-    id: filter_htmlcorrector
-    provider: filter
-    status: true
-    weight: 10
-    settings: {  }
-- 
GitLab