From f09046af0199a78f05e5aad068b60ac92cbccd74 Mon Sep 17 00:00:00 2001 From: Nathaniel Catchpole <catch@35733.no-reply.drupal.org> Date: Wed, 30 Sep 2015 11:46:49 +0100 Subject: [PATCH] Issue #2549077 by pwolanin, Wim Leers, phenaproxima, lauriii: Allow the "Limit allowed HTML tags" filter to also restrict HTML attributes, and only allow a small whitelist of attributes by default --- core/lib/Drupal/Component/Utility/Html.php | 6 +- .../ckeditor/js/views/ControllerView.js | 7 +- .../src/Plugin/CKEditorPlugin/Internal.php | 10 + .../ckeditor/src/Tests/CKEditorTest.php | 40 +-- core/modules/editor/js/editor.admin.js | 33 ++- .../filter/filter.filter_html.admin.js | 192 ++++++++++--- core/modules/filter/filter.module | 20 -- .../migration_templates/d6_filter_format.yml | 4 +- .../migration_templates/d7_filter_format.yml | 4 +- .../filter/src/Plugin/Filter/FilterHtml.php | 260 +++++++++++++++++- .../Plugin/migrate/process/FilterSettings.php | 55 ++++ .../filter/src/Tests/FilterAPITest.php | 21 +- .../src/Tests/FilterHtmlImageSecureTest.php | 2 +- .../filter/src/Tests/FilterUnitTest.php | 96 +++++-- .../Migrate/d6/MigrateFilterFormatTest.php | 2 +- .../Migrate/d7/MigrateFilterFormatTest.php | 2 +- .../install/filter.format.filtered_html.yml | 2 +- .../filter/tests/src/Unit/FilterHtmlTest.php | 87 ++++++ .../src/Tests/Table/d7/Filter.php | 5 +- .../src/Tests/Update/FilterHtmlUpdateTest.php | 56 ++++ core/modules/system/system.install | 39 +++ .../install/filter.format.basic_html.yml | 8 +- .../install/filter.format.restricted_html.yml | 8 +- 23 files changed, 811 insertions(+), 148 deletions(-) create mode 100644 core/modules/filter/src/Plugin/migrate/process/FilterSettings.php create mode 100644 core/modules/filter/tests/src/Unit/FilterHtmlTest.php create mode 100644 core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php diff --git a/core/lib/Drupal/Component/Utility/Html.php b/core/lib/Drupal/Component/Utility/Html.php index 69adbebe7960..0c811f28522f 100644 --- a/core/lib/Drupal/Component/Utility/Html.php +++ b/core/lib/Drupal/Component/Utility/Html.php @@ -262,9 +262,9 @@ public static function load($html) { <body>!html</body> </html> EOD; - // PHP's \DOMDocument serialization adds straw whitespace in case the markup - // of the wrapping document contains newlines, so ensure to remove all - // newlines before injecting the actual HTML body to process. + // PHP's \DOMDocument serialization adds extra whitespace when the markup + // of the wrapping document contains newlines, so ensure we remove all + // newlines before injecting the actual HTML body to be processed. $document = strtr($document, array("\n" => '', '!html' => $html)); $dom = new \DOMDocument(); diff --git a/core/modules/ckeditor/js/views/ControllerView.js b/core/modules/ckeditor/js/views/ControllerView.js index f1c4ddfbb7c4..7cfaeeed0cc8 100644 --- a/core/modules/ckeditor/js/views/ControllerView.js +++ b/core/modules/ckeditor/js/views/ControllerView.js @@ -221,6 +221,11 @@ // the feature that was just added or removed. Not every feature has // such metadata. var featureName = this.model.get('buttonsToFeatures')[button.toLowerCase()]; + // Features without an associated command do not have a 'feature name' by + // default, so we use the lowercased button name instead. + if (!featureName) { + featureName = button.toLowerCase(); + } var featuresMetadata = this.model.get('featuresMetadata'); if (!featuresMetadata[featureName]) { featuresMetadata[featureName] = new Drupal.EditorFeature(featureName); @@ -301,7 +306,6 @@ broadcastConfigurationChanges: function ($ckeditorToolbar) { var view = this; var hiddenEditorConfig = this.model.get('hiddenEditorConfig'); - var featuresMetadata = this.model.get('featuresMetadata'); var getFeatureForButton = this.getFeatureForButton.bind(this); var getCKEditorFeatures = this.getCKEditorFeatures.bind(this); $ckeditorToolbar @@ -335,6 +339,7 @@ getCKEditorFeatures(hiddenEditorConfig, function (features) { // Trigger a standardized text editor configuration event for each // feature that was modified by the configuration changes. + var featuresMetadata = view.model.get('featuresMetadata'); for (var name in features) { if (features.hasOwnProperty(name)) { var feature = features[name]; diff --git a/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php b/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php index 7b4afcdd50dd..412af45350bb 100644 --- a/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php +++ b/core/modules/ckeditor/src/Plugin/CKEditorPlugin/Internal.php @@ -516,6 +516,13 @@ protected function generateACFSettings(Editor $editor) { } // Tell CKEditor the tag is allowed, along with some tags. elseif (is_array($attributes)) { + // Set defaults (these will be overridden below if more specific + // values are present). + $allowed[$tag] = array( + 'attributes' => FALSE, + 'styles' => FALSE, + 'classes' => FALSE, + ); // Configure allowed attributes, allowed "style" attribute values and // allowed "class" attribute values. // CKEditor only allows specific values for the "class" and "style" @@ -580,6 +587,9 @@ protected function generateACFSettings(Editor $editor) { } } + ksort($allowed); + ksort($disallowed); + return array($allowed, $disallowed); } } diff --git a/core/modules/ckeditor/src/Tests/CKEditorTest.php b/core/modules/ckeditor/src/Tests/CKEditorTest.php index f84286233c6a..dd9a7bf99f70 100644 --- a/core/modules/ckeditor/src/Tests/CKEditorTest.php +++ b/core/modules/ckeditor/src/Tests/CKEditorTest.php @@ -54,7 +54,7 @@ protected function setUp() { 'filter_html' => array( 'status' => 1, 'settings' => array( - 'allowed_html' => '<h2> <h3> <h4> <h5> <h6> <p> <br> <strong> <a>', + 'allowed_html' => '<h2 id> <h3> <h4> <h5> <h6> <p> <br> <strong> <a href hreflang>', ) ), ), @@ -96,6 +96,7 @@ function testGetJSSettings() { ); $expected_config = $this->castSafeStrings($expected_config); ksort($expected_config); + ksort($expected_config['allowedContent']); $this->assertIdentical($expected_config, $this->castSafeStrings($this->ckeditor->getJSSettings($editor)), 'Generated JS settings are correct for default configuration.'); // Customize the configuration: add button, have two contextually enabled @@ -122,12 +123,13 @@ function testGetJSSettings() { // Change the allowed HTML tags; the "allowedContent" and "format_tags" // settings for CKEditor should automatically be updated as well. $format = $editor->getFilterFormat(); - $format->filters('filter_html')->settings['allowed_html'] .= '<pre> <h3>'; + $format->filters('filter_html')->settings['allowed_html'] .= '<pre> <h1>'; $format->save(); - $expected_config['allowedContent']['pre'] = array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE); - $expected_config['allowedContent']['h3'] = array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE); - $expected_config['format_tags'] = 'p;h2;h3;h4;h5;h6;pre'; + $expected_config['allowedContent']['pre'] = array('attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE); + $expected_config['allowedContent']['h1'] = array('attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE); + $expected_config['format_tags'] = 'p;h1;h2;h3;h4;h5;h6;pre'; + ksort($expected_config['allowedContent']); $this->assertIdentical($expected_config, $this->castSafeStrings($this->ckeditor->getJSSettings($editor)), 'Generated JS settings are correct for customized configuration.'); // Disable the filter_html filter: allow *all *tags. @@ -179,14 +181,17 @@ function testGetJSSettings() { ), 'a' => array( 'attributes' => 'href,rel,class,target', + 'styles' => FALSE, 'classes' => 'external', ), 'span' => array( 'attributes' => 'class,property,rel,style', 'styles' => 'font-size', + 'classes' => FALSE, ), '*' => array( 'attributes' => 'class,data-*', + 'styles' => FALSE, 'classes' => 'is-a-hipster-llama,and-more', ), 'del' => array( @@ -206,6 +211,8 @@ function testGetJSSettings() { ); $expected_config['format_tags'] = 'p'; ksort($expected_config); + ksort($expected_config['allowedContent']); + ksort($expected_config['disallowedContent']); $this->assertIdentical($expected_config, $this->castSafeStrings($this->ckeditor->getJSSettings($editor)), 'Generated JS settings are correct for customized configuration.'); } @@ -421,17 +428,18 @@ protected function getDefaultInternalConfig() { } protected function getDefaultAllowedContentConfig() { - return array( - 'h2' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'h3' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'h4' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'h5' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'h6' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'p' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'br' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'strong' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - 'a' => array('attributes' => TRUE, 'styles' => FALSE, 'classes' => TRUE), - ); + return [ + 'h2' => ['attributes' => 'id', 'styles' => FALSE, 'classes' => FALSE], + 'h3' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE], + 'h4' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE], + 'h5' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE], + 'h6' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE], + 'p' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE], + 'br' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE], + 'strong' => ['attributes' => FALSE, 'styles' => FALSE, 'classes' => FALSE], + 'a' => ['attributes' => 'href,hreflang', 'styles' => FALSE, 'classes' => FALSE], + '*' => ['attributes' => 'lang,dir', 'styles' => FALSE, 'classes' => FALSE], + ]; } protected function getDefaultDisallowedContentConfig() { diff --git a/core/modules/editor/js/editor.admin.js b/core/modules/editor/js/editor.admin.js index 14599d06ce1e..bc7ebccee770 100644 --- a/core/modules/editor/js/editor.admin.js +++ b/core/modules/editor/js/editor.admin.js @@ -821,17 +821,32 @@ * @see Drupal.FilterStatus */ Drupal.FilterHTMLRule = function () { - return { - // Allow or forbid tags. + // Allow or forbid tags. + this.tags = []; + this.allow = null; + + // Apply restrictions to properties set on tags. + this.restrictedTags = { tags: [], - allow: null, - // Apply restrictions to properties set on tags. - restrictedTags: { - tags: [], - allowed: {attributes: [], styles: [], classes: []}, - forbidden: {attributes: [], styles: [], classes: []} - } + allowed: {attributes: [], styles: [], classes: []}, + forbidden: {attributes: [], styles: [], classes: []} }; + + return this; + }; + + Drupal.FilterHTMLRule.prototype.clone = function () { + var clone = new Drupal.FilterHTMLRule(); + clone.tags = this.tags.slice(0); + clone.allow = this.allow; + clone.restrictedTags.tags = this.restrictedTags.tags.slice(0); + clone.restrictedTags.allowed.attributes = this.restrictedTags.allowed.attributes.slice(0); + clone.restrictedTags.allowed.styles = this.restrictedTags.allowed.styles.slice(0); + clone.restrictedTags.allowed.classes = this.restrictedTags.allowed.classes.slice(0); + clone.restrictedTags.forbidden.attributes = this.restrictedTags.forbidden.attributes.slice(0); + clone.restrictedTags.forbidden.styles = this.restrictedTags.forbidden.styles.slice(0); + clone.restrictedTags.forbidden.classes = this.restrictedTags.forbidden.classes.slice(0); + return clone; }; /** diff --git a/core/modules/filter/filter.filter_html.admin.js b/core/modules/filter/filter.filter_html.admin.js index a52edd18c8f0..5de2d11d5896 100644 --- a/core/modules/filter/filter.filter_html.admin.js +++ b/core/modules/filter/filter.filter_html.admin.js @@ -23,23 +23,15 @@ */ getRules: function () { var currentValue = $('#edit-filters-filter-html-settings-allowed-html').val(); - var rules = []; - var rule; + var rules = Drupal.behaviors.filterFilterHtmlUpdating._parseSetting(currentValue); // Build a FilterHTMLRule that reflects the hard-coded behavior that // strips all "style" attribute and all "on*" attributes. - rule = new Drupal.FilterHTMLRule(); + var rule = new Drupal.FilterHTMLRule(); rule.restrictedTags.tags = ['*']; rule.restrictedTags.forbidden.attributes = ['style', 'on*']; rules.push(rule); - // Build a FilterHTMLRule that reflects the current settings. - rule = new Drupal.FilterHTMLRule(); - var behavior = Drupal.behaviors.filterFilterHtmlUpdating; - rule.allow = true; - rule.tags = behavior._parseSetting(currentValue); - rules.push(rule); - return rules; } }; @@ -63,8 +55,12 @@ // The description for the "Allowed HTML tags" field. $allowedHTMLDescription: null, - // The user-entered tag list of $allowedHTMLFormItem. - userTags: null, + /** + * The parsed, user-entered tag list of $allowedHTMLFormItem + * + * @var {Object.<string, Drupal.FilterHTMLRule>} + */ + userTags: {}, // The auto-created tag list thus far added. autoTags: null, @@ -116,9 +112,10 @@ this.$allowedHTMLDescription.find('.editor-update-message').remove(); // If any auto-created tags: insert message and update form item. - if (this.autoTags.length > 0) { + if (!_.isEmpty(this.autoTags)) { this.$allowedHTMLDescription.append(Drupal.theme('filterFilterHTMLUpdateMessage', this.autoTags)); - this.$allowedHTMLFormItem.val(this._generateSetting(this.userTags) + ' ' + this._generateSetting(this.autoTags)); + var userTagsWithoutOverrides = _.omit(this.userTags, _.keys(this.autoTags)); + this.$allowedHTMLFormItem.val(this._generateSetting(userTagsWithoutOverrides) + ' ' + this._generateSetting(this.autoTags)); } // Restore to original state. else { @@ -142,45 +139,170 @@ * A list of new allowed tags. */ _calculateAutoAllowedTags: function (userAllowedTags, newFeatures) { - return _ - .chain(newFeatures) - // Reduce multiple features' rules. - .reduce(function (memo, featureRules) { - // Reduce a single features' rules' required tags. - return _.union(memo, _.reduce(featureRules, function (memo, featureRule) { - return _.union(memo, featureRule.required.tags); - }, [])); - }, []) - // All new features' required tags are "new allowed tags", except - // for those that are already allowed in the original allowed tags. - .difference(userAllowedTags) - .value(); + var featureName; + var feature; + var featureRule; + var filterRule; + var tag; + var editorRequiredTags = {}; + // Map the newly added Text Editor features to Drupal.FilterHtmlRule + // objects (to allow comparing userTags with autoTags). + for (featureName in newFeatures) { + if (newFeatures.hasOwnProperty(featureName)) { + feature = newFeatures[featureName]; + for (var f = 0; f < feature.length; f++) { + featureRule = feature[f]; + for (var t = 0; t < featureRule.required.tags.length; t++) { + tag = featureRule.required.tags[t]; + if (!_.has(editorRequiredTags, tag)) { + filterRule = new Drupal.FilterHTMLRule(); + filterRule.restrictedTags.tags = [tag]; + // @todo Neither Drupal.FilterHtmlRule nor + // Drupal.EditorFeatureHTMLRule allow for generic attribute + // value restrictions, only for the "class" and "style" + // attribute's values to be restricted. The filter_html filter + // always disallows the "style" attribute, so we only need to + // support "class" attribute value restrictions. Fix once + // https://www.drupal.org/node/2567801 lands. + filterRule.restrictedTags.allowed.attributes = featureRule.required.attributes.slice(0); + filterRule.restrictedTags.allowed.classes = featureRule.required.classes.slice(0); + editorRequiredTags[tag] = filterRule; + } + // The tag is already allowed, add any additionally allowed + // attributes. + else { + filterRule = editorRequiredTags[tag]; + filterRule.restrictedTags.allowed.attributes = _.union(filterRule.restrictedTags.allowed.attributes, featureRule.required.attributes); + filterRule.restrictedTags.allowed.classes = _.union(filterRule.restrictedTags.allowed.classes, featureRule.required.classes); + } + } + } + } + } + + // Now compare userAllowedTags with editorRequiredTags, and build + // autoAllowedTags, which contains: + // - any tags in editorRequiredTags but not in userAllowedTags (i.e. tags + // that are additionally going to be allowed) + // - any tags in editorRequiredTags that already exists in userAllowedTags + // but does not allow all attributes or attribute values + var autoAllowedTags = {}; + for (tag in editorRequiredTags) { + // If userAllowedTags does not contain a rule for this editor-required + // tag, then add it to the list of automatically allowed tags. + if (!_.has(userAllowedTags, tag)) { + autoAllowedTags[tag] = editorRequiredTags[tag]; + } + // Otherwise, if userAllowedTags already allows this tag, then check if + // additional attributes and classes on this tag are required by the + // editor. + else { + var requiredAttributes = editorRequiredTags[tag].restrictedTags.allowed.attributes; + var allowedAttributes = userAllowedTags[tag].restrictedTags.allowed.attributes; + var needsAdditionalAttributes = requiredAttributes.length && _.difference(requiredAttributes, allowedAttributes).length; + var requiredClasses = editorRequiredTags[tag].restrictedTags.allowed.classes; + var allowedClasses = userAllowedTags[tag].restrictedTags.allowed.classes; + var needsAdditionalClasses = requiredClasses.length && _.difference(requiredClasses, allowedClasses).length; + if (needsAdditionalAttributes || needsAdditionalClasses) { + autoAllowedTags[tag] = userAllowedTags[tag].clone(); + } + if (needsAdditionalAttributes) { + autoAllowedTags[tag].restrictedTags.allowed.attributes = _.union(allowedAttributes, requiredAttributes); + } + if (needsAdditionalClasses) { + autoAllowedTags[tag].restrictedTags.allowed.classes = _.union(allowedClasses, requiredClasses); + } + } + } + + return autoAllowedTags; }, /** * Parses the value of this.$allowedHTMLFormItem. * * @param {string} setting - * The string representation of the setting. e.g. "<p> <br> <a>" + * The string representation of the setting. For example: + * <p class="callout"> <br> <a href hreflang> * - * @return {Array} - * The array representation of the setting. e.g. ['p', 'br', 'a'] + * @return {Object.<string, Drupal.FilterHTMLRule>} + * The corresponding text filter HTML rule objects, one per tag, keyed by + * tag name. */ _parseSetting: function (setting) { - return setting.length ? setting.substring(1, setting.length - 1).split('> <') : []; + var node; + var tag; + var rule; + var attributes; + var attribute; + var allowedTags = setting.match(/(<[^>]+>)/g); + var sandbox = document.createElement('div'); + var rules = {}; + for (var t = 0; t < allowedTags.length; t++) { + // Let the browser do the parsing work for us. + sandbox.innerHTML = allowedTags[t]; + node = sandbox.firstChild; + tag = node.tagName.toLowerCase(); + + // Build the Drupal.FilterHtmlRule object. + rule = new Drupal.FilterHTMLRule(); + // We create one rule per allowed tag, so always one tag. + rule.restrictedTags.tags = [tag]; + // Add the attribute restrictions. + attributes = node.attributes; + for (var i = 0; i < attributes.length; i++) { + attribute = attributes.item(i); + var attributeName = attribute.nodeName; + // @todo Drupal.FilterHtmlRule does not allow for generic attribute + // value restrictions, only for the "class" and "style" attribute's + // values. The filter_html filter always disallows the "style" + // attribute, so we only need to support "class" attribute value + // restrictions. Fix once https://www.drupal.org/node/2567801 lands. + if (attributeName === 'class') { + var attributeValue = attribute.textContent; + rule.restrictedTags.allowed.classes = attributeValue.split(' '); + } + else { + rule.restrictedTags.allowed.attributes.push(attributeName); + } + } + + rules[tag] = rule; + } + return rules; }, /** * Generates the value of this.$allowedHTMLFormItem. * - * @param {Array} tags - * The array representation of the setting. e.g. ['p', 'br', 'a'] + * @param {Object.<string, Drupal.FilterHTMLRule>} tags + * The parsed representation of the setting. * * @return {Array} * The string representation of the setting. e.g. "<p> <br> <a>" */ _generateSetting: function (tags) { - return tags.length ? '<' + tags.join('> <') + '>' : ''; + return _.reduce(tags, function (setting, rule, tag) { + if (setting.length) { + setting += ' '; + } + + setting += '<' + tag; + if (rule.restrictedTags.allowed.attributes.length) { + setting += ' ' + rule.restrictedTags.allowed.attributes.join(' '); + } + // @todo Drupal.FilterHtmlRule does not allow for generic attribute + // value restrictions, only for the "class" and "style" attribute's + // values. The filter_html filter always disallows the "style" + // attribute, so we only need to support "class" attribute value + // restrictions. Fix once https://www.drupal.org/node/2567801 lands. + if (rule.restrictedTags.allowed.classes.length) { + setting += ' class="' + rule.restrictedTags.allowed.classes.join(' ') + '"'; + } + + setting += '>'; + return setting; + }, ''); } }; @@ -196,7 +318,7 @@ */ Drupal.theme.filterFilterHTMLUpdateMessage = function (tags) { var html = ''; - var tagList = '<' + tags.join('> <') + '>'; + var tagList = Drupal.behaviors.filterFilterHtmlUpdating._generateSetting(tags); html += '<p class="editor-update-message">'; html += Drupal.t('Based on the text editor configuration, these tags have automatically been added: <strong>@tag-list</strong>.', {'@tag-list': tagList}); html += '</p>'; diff --git a/core/modules/filter/filter.module b/core/modules/filter/filter.module index 46da1f0f5238..6c4db0a56f7d 100644 --- a/core/modules/filter/filter.module +++ b/core/modules/filter/filter.module @@ -13,7 +13,6 @@ use Drupal\Core\Routing\RouteMatchInterface; use Drupal\Core\Session\AccountInterface; use Drupal\Core\Template\Attribute; -use Drupal\filter\Entity\FilterFormat; use Drupal\filter\FilterFormatInterface; /** @@ -450,25 +449,6 @@ function template_preprocess_filter_tips(&$variables) { * Filters implemented by the Filter module. */ -/** - * Provides filtering of input into accepted HTML. - */ -function _filter_html($text, $filter) { - $allowed_tags = preg_split('/\s+|<|>/', $filter->settings['allowed_html'], -1, PREG_SPLIT_NO_EMPTY); - $text = Xss::filter($text, $allowed_tags); - - if ($filter->settings['filter_html_nofollow']) { - $html_dom = Html::load($text); - $links = $html_dom->getElementsByTagName('a'); - foreach ($links as $link) { - $link->setAttribute('rel', 'nofollow'); - } - $text = Html::serialize($html_dom); - } - - return trim($text); -} - /** * Converts text into hyperlinks automatically. * diff --git a/core/modules/filter/migration_templates/d6_filter_format.yml b/core/modules/filter/migration_templates/d6_filter_format.yml index df8faa5471ee..ffcbbb51b90d 100644 --- a/core/modules/filter/migration_templates/d6_filter_format.yml +++ b/core/modules/filter/migration_templates/d6_filter_format.yml @@ -34,7 +34,9 @@ process: - filter_url - filter_htmlcorrector - filter_html_escape - settings: settings + settings: + plugin: filter_settings + source: settings status: plugin: default_value default_value: true diff --git a/core/modules/filter/migration_templates/d7_filter_format.yml b/core/modules/filter/migration_templates/d7_filter_format.yml index 617611d4af67..c671b75efc91 100644 --- a/core/modules/filter/migration_templates/d7_filter_format.yml +++ b/core/modules/filter/migration_templates/d7_filter_format.yml @@ -19,7 +19,9 @@ process: source: name map: php_code: filter_null - settings: settings + settings: + plugin: filter_settings + source: settings status: plugin: default_value default_value: true diff --git a/core/modules/filter/src/Plugin/Filter/FilterHtml.php b/core/modules/filter/src/Plugin/Filter/FilterHtml.php index c282f0aeed41..38d798a7a148 100644 --- a/core/modules/filter/src/Plugin/Filter/FilterHtml.php +++ b/core/modules/filter/src/Plugin/Filter/FilterHtml.php @@ -7,6 +7,7 @@ namespace Drupal\filter\Plugin\Filter; +use Drupal\Component\Utility\Xss; use Drupal\Core\Form\FormStateInterface; use Drupal\Component\Utility\Html; use Drupal\filter\FilterProcessResult; @@ -15,12 +16,16 @@ /** * Provides a filter to limit allowed HTML tags. * + * The attributes in the annotation show examples of allowing all attributes + * by only having the attribute name, or allowing a fixed list of values, or + * allowing a value with a wildcard prefix. + * * @Filter( * id = "filter_html", - * title = @Translation("Limit allowed HTML tags"), + * title = @Translation("Limit allowed HTML tags and correct faulty HTML"), * type = Drupal\filter\Plugin\FilterInterface::TYPE_HTML_RESTRICTOR, * settings = { - * "allowed_html" = "<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h2> <h3> <h4> <h5> <h6>", + * "allowed_html" = "<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type='1 A I'> <li> <dl> <dt> <dd> <h2 id='jump-*'> <h3 id> <h4 id> <h5 id> <h6 id>", * "filter_html_help" = TRUE, * "filter_html_nofollow" = FALSE * }, @@ -29,6 +34,13 @@ */ class FilterHtml extends FilterBase { + /** + * The processed HTML restrictions. + * + * @var array + */ + protected $restrictions; + /** * {@inheritdoc} */ @@ -39,6 +51,7 @@ public function settingsForm(array $form, FormStateInterface $form_state) { '#default_value' => $this->settings['allowed_html'], '#maxlength' => 1024, '#description' => $this->t('A list of HTML tags that can be used. JavaScript event attributes, JavaScript URLs, and CSS are always stripped.'), + '#size' => 250, '#attached' => array( 'library' => array( 'filter/drupal.filter.filter_html.admin', @@ -58,25 +71,252 @@ public function settingsForm(array $form, FormStateInterface $form_state) { return $form; } + /** + * {@inheritdoc} + */ + public function setConfiguration(array $configuration) { + parent::setConfiguration($configuration); + // Force restrictions to be calculated again. + $this->restrictions = NULL; + } + /** * {@inheritdoc} */ public function process($text, $langcode) { - return new FilterProcessResult(_filter_html($text, $this)); + $restrictions = $this->getHtmlRestrictions(); + // Split the work into two parts. For filtering HTML tags out of the content + // we rely on the well-tested Xss::filter() code. Since there is no '*' tag + // that needs to be removed from the list. + unset($restrictions['allowed']['*']); + $text = Xss::filter($text, array_keys($restrictions['allowed'])); + // After we've done tag filtering, we do attribute and attribute value + // filtering as the second part. + return new FilterProcessResult($this->filterAttributes($text)); + } + + /** + * Provides filtering of tag attributes into accepted HTML. + * + * @param string $text + * The HTML text string to be filtered. + * + * @return string + * Filtered HTML with attributes filtered according to the settings. + */ + public function filterAttributes($text) { + $restrictions = $this->getHTMLRestrictions(); + $global_allowed_attributes = array_filter($restrictions['allowed']['*']); + unset($restrictions['allowed']['*']); + + // Apply attribute restrictions to tags. + $html_dom = Html::load($text); + $xpath = new \DOMXPath($html_dom); + foreach ($restrictions['allowed'] as $allowed_tag => $tag_attributes) { + // By default, no attributes are allowed for a tag, but due to the + // globally whitelisted attributes, it is impossible for a tag to actually + // completely disallow attributes. + if ($tag_attributes === FALSE) { + $tag_attributes = []; + } + $allowed_attributes = ['exact' => [], 'prefix' => []]; + foreach (($global_allowed_attributes + $tag_attributes) as $name => $values) { + // A trailing * indicates wildcard, but it must have some prefix. + if (substr($name, -1) === '*' && $name[0] !== '*') { + $allowed_attributes['prefix'][str_replace('*', '', $name)] = $this->prepareAttributeValues($values); + } + else { + $allowed_attributes['exact'][$name] = $this->prepareAttributeValues($values); + } + } + krsort($allowed_attributes['prefix']); + + // Find all matching elements that have any attributes and filter the + // attributes by name and value. + foreach ($xpath->query('//' . $allowed_tag . '[@*]') as $element) { + $this->filterElementAttributes($element, $allowed_attributes); + } + } + + if ($this->settings['filter_html_nofollow']) { + $links = $html_dom->getElementsByTagName('a'); + foreach ($links as $link) { + $link->setAttribute('rel', 'nofollow'); + } + } + $text = Html::serialize($html_dom); + + return trim($text); + } + + /** + * Filter attributes on an element by name and value according to a whitelist. + * + * @param \DOMElement $element + * The element to be processed. + * @param array $allowed_attributes + * The attributes whitelist as an array of names and values. + */ + protected function filterElementAttributes(\DOMElement $element, array $allowed_attributes) { + $modified_attributes = []; + foreach($element->attributes as $name => $attribute) { + // Remove attributes not in the whitelist. + $allowed_value = $this->findAllowedValue($allowed_attributes, $name); + if (empty($allowed_value)) { + $modified_attributes[$name] = FALSE; + } + elseif ($allowed_value !== TRUE) { + // Check the attribute values whitelist. + $attribute_values = preg_split('/\s+/', $attribute->value, -1, PREG_SPLIT_NO_EMPTY); + $modified_attributes[$name] = []; + foreach ($attribute_values as $value) { + if ($this->findAllowedValue($allowed_value, $value)) { + $modified_attributes[$name][] = $value; + } + } + } + } + // If the $allowed_value was TRUE for an attribute name, it does not + // appear in this array so the value on the DOM element is left unchanged. + foreach ($modified_attributes as $name => $values) { + if ($values) { + $element->setAttribute($name, implode(' ', $values)); + } + else { + $element->removeAttribute($name); + } + } + } + + /** + * Helper function to handle prefix matching. + * + * @param array $allowed + * Array of allowed names and prefixes. + * @param string $name + * The name to find or match against a prefix. + * + * @return bool|array + */ + protected function findAllowedValue(array $allowed, $name) { + if (isset($allowed['exact'][$name])) { + return $allowed['exact'][$name]; + } + // Handle prefix (wildcard) matches. + foreach ($allowed['prefix'] as $prefix => $value) { + if (strpos($name, $prefix) === 0) { + return $value; + } + } + return FALSE; + } + + /** + * Helper function to prepare attribute values including wildcards. + * + * Splits the values into two lists, one for values that must match exactly + * and the other for values that are wildcard prefixes. + * + * @param bool|array $attribute_values + * TRUE, FALSE, or an array of allowed values. + * + * @return bool|array + */ + protected function prepareAttributeValues($attribute_values) { + if ($attribute_values === TRUE || $attribute_values === FALSE) { + return $attribute_values; + } + $result = ['exact' => [], 'prefix' => []]; + foreach ($attribute_values as $name => $allowed) { + // A trailing * indicates wildcard, but it must have some prefix. + if (substr($name, -1) === '*' && $name[0] !== '*') { + $result['prefix'][str_replace('*', '', $name)] = $allowed; + } + else { + $result['exact'][$name] = $allowed; + } + } + krsort($result['prefix']); + return $result; } /** * {@inheritdoc} */ public function getHTMLRestrictions() { - $restrictions = array('allowed' => array()); - $tags = preg_split('/\s+|<|>/', $this->settings['allowed_html'], -1, PREG_SPLIT_NO_EMPTY); - // List the allowed HTML tags. - foreach ($tags as $tag) { - $restrictions['allowed'][$tag] = TRUE; + if ($this->restrictions) { + return $this->restrictions; } - // The 'style' and 'on*' ('onClick' etc.) attributes are always forbidden. - $restrictions['allowed']['*'] = array('style' => FALSE, 'on*' => FALSE); + + // Parse the allowed HTML setting, and gradually make the whitelist more + // specific. + $restrictions = ['allowed' => []]; + + // Make all the tags self-closing, so they will be parsed into direct + // children of the body tag in the DomDocument. + $html = str_replace('>', ' />', $this->settings['allowed_html']); + // Protect any trailing * characters in attribute names, since DomDocument + // strips them as invalid. + $star_protector = '__zqh6vxfbk3cg__'; + $html = str_replace('*', $star_protector, $html); + $body_child_nodes = Html::load($html)->getElementsByTagName('body')->item(0)->childNodes; + + foreach ($body_child_nodes as $node) { + if ($node->nodeType !== XML_ELEMENT_NODE) { + // Skip the empty text nodes inside tags. + continue; + } + $tag = $node->tagName; + if ($node->hasAttributes()) { + // Mark the tag as allowed, assigning TRUE for each attribute name if + // all values are allowed, or an array of specific allowed values. + $restrictions['allowed'][$tag] = []; + // Iterate over any attributes, and mark them as allowed. + foreach ($node->attributes as $name => $attribute) { + // Put back any trailing * on wildcard attribute name. + $name = str_replace($star_protector, '*', $name); + if ($attribute->value === '') { + // If the value is the empty string all values are allowed. + $restrictions['allowed'][$tag][$name] = TRUE; + } + else { + // A non-empty attribute value is assigned, mark each of the + // specified attribute values as allowed. + foreach (preg_split('/\s+/', $attribute->value, -1, PREG_SPLIT_NO_EMPTY) as $value) { + // Put back any trailing * on wildcard attribute value. + $value = str_replace($star_protector, '*', $value); + $restrictions['allowed'][$tag][$name][$value] = TRUE; + } + } + } + } + else { + // Mark the tag as allowed, but with no attributes allowed. + $restrictions['allowed'][$tag] = FALSE; + } + } + + // The 'style' and 'on*' ('onClick' etc.) attributes are always forbidden, + // and are removed by Xss::filter(). + // The 'lang', and 'dir' attributes apply to all elements and are always + // allowed. The value whitelist for the 'dir' attribute is enforced by + // self::filterAttributes(). Note that those two attributes are in the + // short list of globally usable attributes in HTML5. They are always + // allowed since the correct values of lang and dir may only be known to + // the content author. Of the other global attributes, they are not usually + // added by hand to content, and especially the class attribute can have + // undesired visual effects by allowing content authors to apply any + // available style, so specific values should be explicitly whitelisted. + // @see http://www.w3.org/TR/html5/dom.html#global-attributes + $restrictions['allowed']['*'] = [ + 'style' => FALSE, + 'on*' => FALSE, + 'lang' => TRUE, + 'dir' => ['ltr' => TRUE, 'rtl' => TRUE], + ]; + // Save this calculated result for re-use. + $this->restrictions = $restrictions; + return $restrictions; } diff --git a/core/modules/filter/src/Plugin/migrate/process/FilterSettings.php b/core/modules/filter/src/Plugin/migrate/process/FilterSettings.php new file mode 100644 index 000000000000..4a7e45b5076c --- /dev/null +++ b/core/modules/filter/src/Plugin/migrate/process/FilterSettings.php @@ -0,0 +1,55 @@ +<?php + +/** + * @file + * Contains \Drupal\migrate\Plugin\migrate\process\FilterSettings. + */ + +namespace Drupal\filter\Plugin\migrate\process; + +use Drupal\migrate\ProcessPluginBase; +use Drupal\migrate\MigrateExecutableInterface; +use Drupal\migrate\Row; + +/** + * Adds the default allowed attributes to filter_html's allowed_html setting. + * + * E.g. map '<a>' to '<a href hreflang dir>'. + * + * @MigrateProcessPlugin( + * id = "filter_settings", + * handle_multiples = TRUE + * ) + */ +class FilterSettings extends ProcessPluginBase { + + /** + * Default attributes for migrating filter_html's 'allowed_html' setting. + * + * @var string[] + */ + protected $allowedHtmlDefaultAttributes = [ + '<a>' => '<a href hreflang>', + '<blockquote>' => '<blockquote cite>', + '<ol>' => '<ol start type>', + '<ul>' => '<ul type>', + '<img>' => '<img src alt height width>', + '<h2>' => '<h2 id>', + '<h3>' => '<h3 id>', + '<h4>' => '<h4 id>', + '<h5>' => '<h5 id>', + '<h6>' => '<h6 id>', + ]; + + /** + * {@inheritdoc} + */ + public function transform($value, MigrateExecutableInterface $migrate_executable, Row $row, $destination_property) { + // Only the filter_html filter's settings have a changed format. + if ($row->getDestinationProperty('id') === 'filter_html') { + $value['allowed_html'] = str_replace(array_keys($this->allowedHtmlDefaultAttributes), array_values($this->allowedHtmlDefaultAttributes), $value['allowed_html']); + } + return $value; + } + +} diff --git a/core/modules/filter/src/Tests/FilterAPITest.php b/core/modules/filter/src/Tests/FilterAPITest.php index 263b31ee48d6..b8a93f1ddf9a 100644 --- a/core/modules/filter/src/Tests/FilterAPITest.php +++ b/core/modules/filter/src/Tests/FilterAPITest.php @@ -108,7 +108,15 @@ function testFilterFormatAPI() { $filtered_html_format = entity_load('filter_format', 'filtered_html'); $this->assertIdentical( $filtered_html_format->getHtmlRestrictions(), - array('allowed' => array('p' => TRUE, 'br' => TRUE, 'strong' => TRUE, 'a' => TRUE, '*' => array('style' => FALSE, 'on*' => FALSE))), + array( + 'allowed' => array( + 'p' => FALSE, + 'br' => FALSE, + 'strong' => FALSE, + 'a' => array('href' => TRUE, 'hreflang' => TRUE), + '*' => array('style' => FALSE, 'on*' => FALSE, 'lang' => TRUE, 'dir' => array('ltr' => TRUE, 'rtl' => TRUE)), + ), + ), 'FilterFormatInterface::getHtmlRestrictions() works as expected for the filtered_html format.' ); $this->assertIdentical( @@ -164,7 +172,7 @@ function testFilterFormatAPI() { 'filter_html' => array( 'status' => 1, 'settings' => array( - 'allowed_html' => '<p> <br> <a> <strong>', + 'allowed_html' => '<p> <br> <a href> <strong>', ), ), 'filter_test_restrict_tags_and_attributes' => array( @@ -185,7 +193,14 @@ function testFilterFormatAPI() { $very_restricted_html_format->save(); $this->assertIdentical( $very_restricted_html_format->getHtmlRestrictions(), - array('allowed' => array('p' => TRUE, 'br' => FALSE, 'a' => array('href' => TRUE), '*' => array('style' => FALSE, 'on*' => FALSE))), + array( + 'allowed' => array( + 'p' => FALSE, + 'br' => FALSE, + 'a' => array('href' => TRUE), + '*' => array('style' => FALSE, 'on*' => FALSE, 'lang' => TRUE, 'dir' => array('ltr' => TRUE, 'rtl' => TRUE)), + ), + ), 'FilterFormatInterface::getHtmlRestrictions() works as expected for the very_restricted_html format.' ); $this->assertIdentical( diff --git a/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php b/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php index e320af3e9b47..9d96c5f3b0f9 100644 --- a/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php +++ b/core/modules/filter/src/Tests/FilterHtmlImageSecureTest.php @@ -45,7 +45,7 @@ protected function setUp() { 'filter_html' => array( 'status' => 1, 'settings' => array( - 'allowed_html' => '<img> <a>', + 'allowed_html' => '<img src testattribute> <a>', ), ), 'filter_autop' => array( diff --git a/core/modules/filter/src/Tests/FilterUnitTest.php b/core/modules/filter/src/Tests/FilterUnitTest.php index f441342dff42..1e3d3337f897 100644 --- a/core/modules/filter/src/Tests/FilterUnitTest.php +++ b/core/modules/filter/src/Tests/FilterUnitTest.php @@ -9,6 +9,7 @@ use Drupal\Component\Utility\Html; use Drupal\Component\Utility\SafeMarkup; +use Drupal\Core\Language\Language; use Drupal\Core\Render\RenderContext; use Drupal\editor\EditorXssFilter\Standard; use Drupal\filter\Entity\FilterFormat; @@ -190,7 +191,7 @@ function testCaptionFilter() { $html_filter = $this->filters['filter_html']; $html_filter->setConfiguration(array( 'settings' => array( - 'allowed_html' => '<img>', + 'allowed_html' => '<img src data-align data-caption>', 'filter_html_help' => 1, 'filter_html_nofollow' => 0, ) @@ -399,7 +400,7 @@ function testHtmlFilter() { $filter = $this->filters['filter_html']; $filter->setConfiguration(array( 'settings' => array( - 'allowed_html' => '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>', + 'allowed_html' => '<a> <p> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>', 'filter_html_help' => 1, 'filter_html_nofollow' => 0, ) @@ -407,41 +408,78 @@ function testHtmlFilter() { // HTML filter is not able to secure some tags, these should never be // allowed. - $f = _filter_html('<script />', $filter); - $this->assertNoNormalized($f, 'script', 'HTML filter should always remove script tags.'); + $f = (string) $filter->process('<script />', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '', 'HTML filter should remove script tags.'); - $f = _filter_html('<iframe />', $filter); - $this->assertNoNormalized($f, 'iframe', 'HTML filter should always remove iframe tags.'); + $f = (string) $filter->process('<iframe />', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '', 'HTML filter should remove iframe tags.'); - $f = _filter_html('<object />', $filter); - $this->assertNoNormalized($f, 'object', 'HTML filter should always remove object tags.'); + $f = (string) $filter->process('<object />', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '', 'HTML filter should remove object tags.'); - $f = _filter_html('<style />', $filter); - $this->assertNoNormalized($f, 'style', 'HTML filter should always remove style tags.'); + $f = (string) $filter->process('<style />', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '', 'HTML filter should remove style tags.'); // Some tags make CSRF attacks easier, let the user take the risk herself. - $f = _filter_html('<img />', $filter); - $this->assertNoNormalized($f, 'img', 'HTML filter should remove img tags on default.'); + $f = (string) $filter->process('<img />', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '', 'HTML filter should remove img tags by default.'); - $f = _filter_html('<input />', $filter); - $this->assertNoNormalized($f, 'img', 'HTML filter should remove input tags on default.'); + $f = (string) $filter->process('<input />', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '', 'HTML filter should remove input tags by default.'); // Filtering content of some attributes is infeasible, these shouldn't be // allowed too. - $f = _filter_html('<p style="display: none;" />', $filter); - $this->assertNoNormalized($f, 'style', 'HTML filter should remove style attribute on default.'); + $f = (string) $filter->process('<p style="display: none;" />', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNoNormalized($f, 'style', 'HTML filter should remove style attributes.'); + $this->assertIdentical($f, '<p></p>'); - $f = _filter_html('<p onerror="alert(0);" />', $filter); - $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove on* attributes on default.'); + $f = (string) $filter->process('<p onerror="alert(0);"></p>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove on* attributes.'); + $this->assertIdentical($f, '<p></p>'); - $f = _filter_html('<code onerror> </code>', $filter); - $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove empty on* attributes on default.'); + $f = (string) $filter->process('<code onerror> </code>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove empty on* attributes.'); + // Note - this string has a decoded character. + $this->assertIdentical($f, '<code>Â </code>'); - $f = _filter_html('<br>', $filter); - $this->assertNormalized($f, '<br>', 'HTML filter should allow line breaks.'); + $f = (string) $filter->process('<br>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNormalized($f, '<br />', 'HTML filter should allow line breaks.'); - $f = _filter_html('<br />', $filter); + $f = (string) $filter->process('<br />', Language::LANGCODE_NOT_SPECIFIED); $this->assertNormalized($f, '<br />', 'HTML filter should allow self-closing line breaks.'); + + // All attributes of whitelisted tags are stripped by default. + $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNormalized($f, '<a>link</a>', 'HTML filter should remove attributes that are not explicitly allowed.'); + + // Now whitelist the "llama" attribute on <a>. + $filter->setConfiguration(array( + 'settings' => array( + 'allowed_html' => '<a href llama> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>', + 'filter_html_help' => 1, + 'filter_html_nofollow' => 0, + ) + )); + $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNormalized($f, '<a llama="awesome">link</a>', 'HTML filter keeps explicitly allowed attributes, and removes attributes that are not explicitly allowed.'); + + // Restrict the whitelisted "llama" attribute on <a> to only allow the value + // "majestical", or "epic". + $filter->setConfiguration(array( + 'settings' => array( + 'allowed_html' => '<a href llama="majestical epic"> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>', + 'filter_html_help' => 1, + 'filter_html_nofollow' => 0, + ) + )); + $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '<a>link</a>', 'HTML filter removes allowed attributes that do not have an explicitly allowed value.'); + $f = (string) $filter->process('<a kitten="cute" llama="majestical">link</a>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '<a llama="majestical">link</a>', 'HTML filter keeps explicitly allowed attributes with an attribute value that is also explicitly allowed.'); + $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertNormalized($f, '<a>link</a>', 'HTML filter removes allowed attributes that have a not explicitly allowed value.'); + $f = (string) $filter->process('<a href="/beautiful-animals" kitten="cute" llama="epic majestical">link</a>', Language::LANGCODE_NOT_SPECIFIED); + $this->assertIdentical($f, '<a href="/beautiful-animals" llama="epic majestical">link</a>', 'HTML filter keeps explicitly allowed attributes with an attribute value that is also explicitly allowed.'); } /** @@ -452,7 +490,7 @@ function testNoFollowFilter() { $filter = $this->filters['filter_html']; $filter->setConfiguration(array( 'settings' => array( - 'allowed_html' => '<a>', + 'allowed_html' => '<a href>', 'filter_html_help' => 1, 'filter_html_nofollow' => 1, ) @@ -460,19 +498,19 @@ function testNoFollowFilter() { // Test if the rel="nofollow" attribute is added, even if we try to prevent // it. - $f = _filter_html('<a href="http://www.example.com/">text</a>', $filter); + $f = (string) $filter->process('<a href="http://www.example.com/">text</a>', Language::LANGCODE_NOT_SPECIFIED); $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent -- no evasion.'); - $f = _filter_html('<A href="http://www.example.com/">text</a>', $filter); + $f = (string) $filter->process('<A href="http://www.example.com/">text</a>', Language::LANGCODE_NOT_SPECIFIED); $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- capital A.'); - $f = _filter_html("<a/href=\"http://www.example.com/\">text</a>", $filter); + $f = (string) $filter->process("<a/href=\"http://www.example.com/\">text</a>", Language::LANGCODE_NOT_SPECIFIED); $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- non whitespace character after tag name.'); - $f = _filter_html("<\0a\0 href=\"http://www.example.com/\">text</a>", $filter); + $f = (string) $filter->process("<\0a\0 href=\"http://www.example.com/\">text</a>", Language::LANGCODE_NOT_SPECIFIED); $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- some nulls.'); - $f = _filter_html('<a href="http://www.example.com/" rel="follow">text</a>', $filter); + $f = (string) $filter->process('<a href="http://www.example.com/" rel="follow">text</a>', Language::LANGCODE_NOT_SPECIFIED); $this->assertNoNormalized($f, 'rel="follow"', 'Spam deterrent evasion -- with rel set - rel="follow" removed.'); $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- with rel set - rel="nofollow" added.'); } diff --git a/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php b/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php index e070632b5684..2a3de4eba835 100644 --- a/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php +++ b/core/modules/filter/src/Tests/Migrate/d6/MigrateFilterFormatTest.php @@ -44,7 +44,7 @@ public function testFilterFormat() { $this->assertFalse(isset($filters['filter_html_image_secure'])); // Check variables migrated into filter. - $this->assertIdentical('<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>', $filters['filter_html']['settings']['allowed_html']); + $this->assertIdentical('<a href hreflang> <em> <strong> <cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd>', $filters['filter_html']['settings']['allowed_html']); $this->assertIdentical(TRUE, $filters['filter_html']['settings']['filter_html_help']); $this->assertIdentical(FALSE, $filters['filter_html']['settings']['filter_html_nofollow']); $this->assertIdentical(72, $filters['filter_url']['settings']['filter_url_length']); diff --git a/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php b/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php index 0b84b8be5484..b96b99a2e46e 100644 --- a/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php +++ b/core/modules/filter/src/Tests/Migrate/d7/MigrateFilterFormatTest.php @@ -68,7 +68,7 @@ public function testFilterFormat() { /** @var \Drupal\filter\FilterFormatInterface $format */ $format = FilterFormat::load('filtered_html'); $config = $format->filters('filter_html')->getConfiguration(); - $this->assertIdentical('<div> <span> <ul> <li>', $config['settings']['allowed_html']); + $this->assertIdentical('<div> <span> <ul type> <li> <ol start type> <a href hreflang> <img src alt height width>', $config['settings']['allowed_html']); $config = $format->filters('filter_url')->getConfiguration(); $this->assertIdentical(128, $config['settings']['filter_url_length']); diff --git a/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml b/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml index a1a38d786006..ba403db3bf6d 100644 --- a/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml +++ b/core/modules/filter/tests/filter_test/config/install/filter.format.filtered_html.yml @@ -12,4 +12,4 @@ filters: provider: filter status: true settings: - allowed_html: '<p> <br> <strong> <a>' + allowed_html: '<p> <br> <strong> <a href hreflang>' diff --git a/core/modules/filter/tests/src/Unit/FilterHtmlTest.php b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php new file mode 100644 index 000000000000..8c07f4b23765 --- /dev/null +++ b/core/modules/filter/tests/src/Unit/FilterHtmlTest.php @@ -0,0 +1,87 @@ +<?php + +/** + * @file + * Contains \Drupal\Tests\filter\Unit\FilterHtmlTest. + */ + +namespace Drupal\Tests\filter\Unit; + +use Drupal\Tests\UnitTestCase; +use Drupal\filter\Plugin\Filter\FilterHtml; + +/** + * @coversDefaultClass \Drupal\filter\Plugin\Filter\FilterHtml + * @group filter + */ +class FilterHtmlTest extends UnitTestCase { + + /** + * @var \Drupal\filter\Plugin\Filter\FilterHtml + */ + protected $filter; + + /** + * {@inheritdoc} + */ + protected function setUp() { + parent::setUp(); + $configuration['settings'] = [ + 'allowed_html' => '<a href> <p> <em> <strong> <cite> <blockquote> <code class="pretty boring align-*"> <ul alpaca-*="wooly-* strong"> <ol llama-*> <li> <dl> <dt> <dd> <br> <h3 id>', + 'filter_html_help' => 1, + 'filter_html_nofollow' => 0, + ]; + $this->filter = new FilterHtml($configuration, 'filter_html', ['provider' => 'test']); + $this->filter->setStringTranslation($this->getStringTranslationStub()); + } + + /** + * @covers ::filterAttributes + * + * @dataProvider providerFilterAttributes + * + * @param string $html + * Input HTML. + * @param array $expected + * The expected output string. + */ + public function testfilterAttributes($html, $expected) { + $this->assertSame($expected, $this->filter->filterAttributes($html)); + } + + /** + * Provides data for testfilterAttributes. + * + * @return array + * An array of test data. + */ + public function providerFilterAttributes() { + return [ + ['<a href="/blog" title="Blog">Blog</a>', '<a href="/blog">Blog</a>'], + ['<p dir="rtl" />', '<p dir="rtl"></p>'], + ['<p dir="bogus" />', '<p></p>'], + ['<p id="first" />', '<p></p>'], + // The addition of xml:lang isn't especially desired, but is still valid + // HTML5. See https://www.drupal.org/node/1333730. + ['<p id="first" lang="en">text</p>', '<p lang="en" xml:lang="en">text</p>'], + ['<p style="display: none;" />', '<p></p>'], + ['<code class="pretty invalid">foreach ($a as $b) {}</code>', '<code class="pretty">foreach ($a as $b) {}</code>'], + ['<code class="boring pretty">foreach ($a as $b) {}</code>', '<code class="boring pretty">foreach ($a as $b) {}</code>'], + ['<code class="boring pretty ">foreach ($a as $b) {}</code>', '<code class="boring pretty">foreach ($a as $b) {}</code>'], + ['<code class="invalid alpaca">foreach ($a as $b) {}</code>', '<code>foreach ($a as $b) {}</code>'], + ['<h3 class="big">a heading</h3>', '<h3>a heading</h3>'], + ['<h3 id="first">a heading</h3>', '<h3 id="first">a heading</h3>'], + // Wilcard value. Case matters, so upper case doesn't match. + ['<code class="align-left bold">foreach ($a as $b) {}</code>', '<code class="align-left">foreach ($a as $b) {}</code>'], + ['<code class="align-right ">foreach ($a as $b) {}</code>', '<code class="align-right">foreach ($a as $b) {}</code>'], + ['<code class="Align-right ">foreach ($a as $b) {}</code>', '<code>foreach ($a as $b) {}</code>'], + // Wilcard name, case is ignored. + ['<ol style="display: none;" llama-wim="noble majestic"></ol>', '<ol llama-wim="noble majestic"></ol>'], + ['<ol style="display: none;" LlamA-Wim="majestic"></ol>', '<ol llama-wim="majestic"></ol>'], + ['<ol style="display: none;" llama-="noble majestic"></ol>', '<ol llama-="noble majestic"></ol>'], + // Both wildcard names and values. + ['<ul style="display: none;" alpaca-wool="wooly-warm strong majestic"></ul>', '<ul alpaca-wool="wooly-warm strong"></ul>'], + ]; + } + +} diff --git a/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php b/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php index 03661b3b2e97..d8b660d53dea 100644 --- a/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php +++ b/core/modules/migrate_drupal/src/Tests/Table/d7/Filter.php @@ -119,7 +119,7 @@ public function load() { 'name' => 'filter_html', 'weight' => '1', 'status' => '1', - 'settings' => 'a:3:{s:12:"allowed_html";s:22:"<div> <span> <ul> <li>";s:16:"filter_html_help";i:1;s:20:"filter_html_nofollow";i:0;}', + 'settings' => 'a:3:{s:12:"allowed_html";s:37:"<div> <span> <ul> <li> <ol> <a> <img>";s:16:"filter_html_help";i:1;s:20:"filter_html_nofollow";i:0;}', ))->values(array( 'format' => 'filtered_html', 'module' => 'filter', @@ -257,4 +257,5 @@ public function load() { } } -#7f8ea668d5deed8ce2d6c782dad2bcd5 + +#e0bd772d07df589752fa9372705aaa9d diff --git a/core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php b/core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php new file mode 100644 index 000000000000..c9585920fa61 --- /dev/null +++ b/core/modules/system/src/Tests/Update/FilterHtmlUpdateTest.php @@ -0,0 +1,56 @@ +<?php + +/** + * @file + * Contains \Drupal\system\Tests\Update\FilterHtmlUpdateTest. + */ + +namespace Drupal\system\Tests\Update; + +use Drupal\filter\Entity\FilterFormat; +use Drupal\system\Tests\Update\UpdatePathTestBase; + +/** + * Tests that the allowed html configutations are updated with attributes. + * + * @group Entity + */ +class FilterHtmlUpdateTest extends UpdatePathTestBase { + + /** + * {@inheritdoc} + */ + public function setDatabaseDumpFiles() { + $this->databaseDumpFiles = [ + __DIR__ . '/../../../../system/tests/fixtures/update/drupal-8.bare.standard.php.gz', + ]; + } + + /** + * Tests system_update_8009(). + */ + public function testAllowedHtmlUpdate() { + // Make sure we have the expected values before the update. + $filters_before = [ + 'basic_html' => '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h4> <h5> <h6> <p> <br> <span> <img>', + 'restricted_html' => '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h4> <h5> <h6>', + ]; + foreach ($filters_before as $name => $before) { + $config = FilterFormat::load($name)->toArray(); + $this->assertIdentical($before, $config['filters']['filter_html']['settings']['allowed_html']); + } + + $this->runUpdates(); + + // Make sure we have the expected values after the update. + $filters_after = [ + 'basic_html' => '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h4 id> <h5 id> <h6 id> <p> <br> <span> <img src alt height width data-align data-caption>', + 'restricted_html' => '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h4 id> <h5 id> <h6 id>', + ]; + foreach ($filters_after as $name => $after) { + $config = FilterFormat::load($name)->toArray(); + $this->assertIdentical($after, $config['filters']['filter_html']['settings']['allowed_html']); + } + } + +} diff --git a/core/modules/system/system.install b/core/modules/system/system.install index f391829669de..cb758fe82977 100644 --- a/core/modules/system/system.install +++ b/core/modules/system/system.install @@ -1662,6 +1662,45 @@ function system_update_8008() { } } +/** + * Add allowed attributes to existing html filters. + */ +function system_update_8009() { + $default_mapping = [ + '<a>' => '<a href hreflang>', + '<blockquote>' => '<blockquote cite>', + '<ol>' => '<ol start type>', + '<ul>' => '<ul type>', + '<img>' => '<img src alt height width>', + '<h2>' => '<h2 id>', + '<h3>' => '<h3 id>', + '<h4>' => '<h4 id>', + '<h5>' => '<h5 id>', + '<h6>' => '<h6 id>', + ]; + $config_factory = \Drupal::configFactory(); + foreach ($config_factory->listAll('filter.format') as $name) { + $allowed_html_mapping = $default_mapping; + $config = $config_factory->getEditable($name); + // The image alignment filter needs the data-align attribute. + $align_enabled = $config->get('filters.filter_align.status'); + if ($align_enabled) { + $allowed_html_mapping['<img>'] = str_replace('>', ' data-align>', $allowed_html_mapping['<img>']); + } + // The image caption filter needs the data-caption attribute. + $caption_enabled = $config->get('filters.filter_caption.status'); + if ($caption_enabled) { + $allowed_html_mapping['<img>'] = str_replace('>', ' data-caption>', $allowed_html_mapping['<img>']); + } + $allowed_html = $config->get('filters.filter_html.settings.allowed_html'); + if (!empty($allowed_html)) { + $allowed_html = strtr($allowed_html, $allowed_html_mapping); + $config->set('filters.filter_html.settings.allowed_html', $allowed_html); + $config->save(); + } + } +} + /** * @} End of "addtogroup updates-8.0.0-beta". */ diff --git a/core/profiles/standard/config/install/filter.format.basic_html.yml b/core/profiles/standard/config/install/filter.format.basic_html.yml index a2562d26b54c..92224c23cf9d 100644 --- a/core/profiles/standard/config/install/filter.format.basic_html.yml +++ b/core/profiles/standard/config/install/filter.format.basic_html.yml @@ -15,7 +15,7 @@ filters: status: true weight: -10 settings: - allowed_html: '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h2> <h3> <h4> <h5> <h6> <p> <br> <span> <img>' + allowed_html: '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h2 id> <h3 id> <h4 id> <h5 id> <h6 id> <p> <br> <span> <img src alt height width data-entity-type data-entity-uuid data-align data-caption>' filter_html_help: false filter_html_nofollow: false filter_align: @@ -36,12 +36,6 @@ filters: status: true weight: 9 settings: { } - filter_htmlcorrector: - id: filter_htmlcorrector - provider: filter - status: true - weight: 10 - settings: { } editor_file_reference: id: editor_file_reference provider: editor diff --git a/core/profiles/standard/config/install/filter.format.restricted_html.yml b/core/profiles/standard/config/install/filter.format.restricted_html.yml index cbd40c067ec9..323d07774b7f 100644 --- a/core/profiles/standard/config/install/filter.format.restricted_html.yml +++ b/core/profiles/standard/config/install/filter.format.restricted_html.yml @@ -13,7 +13,7 @@ filters: status: true weight: -10 settings: - allowed_html: '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <h2> <h3> <h4> <h5> <h6>' + allowed_html: '<a href hreflang> <em> <strong> <cite> <blockquote cite> <code> <ul type> <ol start type> <li> <dl> <dt> <dd> <h2 id> <h3 id> <h4 id> <h5 id> <h6 id>' filter_html_help: true filter_html_nofollow: false filter_autop: @@ -29,9 +29,3 @@ filters: weight: 0 settings: filter_url_length: 72 - filter_htmlcorrector: - id: filter_htmlcorrector - provider: filter - status: true - weight: 10 - settings: { } -- GitLab