diff --git a/core/includes/mail.inc b/core/includes/mail.inc index 7b08ad3376bcb4e3064376d2b14422aacaf03e98..0209bd501f74af9ab934531b37b0716ce83f0eae 100644 --- a/core/includes/mail.inc +++ b/core/includes/mail.inc @@ -5,6 +5,9 @@ * API functions for processing and sending e-mail. */ +use Drupal\Component\Utility\Html; +use Drupal\Component\Utility\Xss; + /** * Composes and optionally sends an e-mail message. * @@ -288,7 +291,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { $allowed_tags = isset($allowed_tags) ? array_intersect($supported_tags, $allowed_tags) : $supported_tags; // Make sure tags, entities and attributes are well-formed and properly nested. - $string = _filter_htmlcorrector(filter_xss($string, $allowed_tags)); + $string = Html::normalize(Xss::filter($string, $allowed_tags)); // Apply inline styles. $string = preg_replace('! +)[^>]*)?>!i', '/', $string); diff --git a/core/lib/Drupal/Component/Utility/Html.php b/core/lib/Drupal/Component/Utility/Html.php new file mode 100644 index 0000000000000000000000000000000000000000..bc0a91621433fb10589ff125854248cb2877e254 --- /dev/null +++ b/core/lib/Drupal/Component/Utility/Html.php @@ -0,0 +1,137 @@ + + + +!html + +EOD; + // PHP's \DOMDocument serialization adds straw whitespace in case the markup + // of the wrapping document contains newlines, so ensure to remove all + // newlines before injecting the actual HTML body to process. + $document = strtr($document, array("\n" => '', '!html' => $html)); + + $dom = new \DOMDocument(); + // Ignore warnings during HTML soup loading. + @$dom->loadHTML($document); + + return $dom; + } + + /** + * Converts the body of a \DOMDocument back to an HTML snippet. + * + * The function serializes the body part of a \DOMDocument back to an (X)HTML + * snippet. The resulting (X)HTML snippet will be properly formatted to be + * compatible with HTML user agents. + * + * @param \DOMDocument $document + * A \DOMDocument object to serialize, only the tags below the first + * node will be converted. + * + * @return string + * A valid (X)HTML snippet, as a string. + */ + public static function serialize(\DOMDocument $document) { + $body_node = $document->getElementsByTagName('body')->item(0); + $html = ''; + + foreach ($body_node->getElementsByTagName('script') as $node) { + static::escapeCdataElement($node); + } + foreach ($body_node->getElementsByTagName('style') as $node) { + static::escapeCdataElement($node, '/*', '*/'); + } + foreach ($body_node->childNodes as $node) { + $html .= $document->saveXML($node); + } + return $html; + } + + /** + * Adds comments around a childNodes as $child_node) { + if ($child_node instanceof \DOMCdataSection) { + $embed_prefix = "\n{$comment_end}\n"; + + // Prevent invalid cdata escaping as this would throw a DOM error. + // This is the same behavior as found in libxml2. + // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection + // Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting + $data = str_replace(']]>', ']]]]>', $child_node->data); + + $fragment = $node->ownerDocument->createDocumentFragment(); + $fragment->appendXML($embed_prefix . $data . $embed_suffix); + $node->appendChild($fragment); + $node->removeChild($child_node); + } + } + } + +} diff --git a/core/modules/editor/editor.module b/core/modules/editor/editor.module index a0bf85ea1f955dd7592bd3fc46e34e8b9c6a0750..544ff2e32afc6204635dc70a46195134e0b65715 100644 --- a/core/modules/editor/editor.module +++ b/core/modules/editor/editor.module @@ -5,6 +5,7 @@ * Adds bindings for client-side "text editors" to text formats. */ +use Drupal\Component\Utility\Html; use Drupal\Core\Entity\ContentEntityInterface; use Drupal\editor\Entity\Editor; use Drupal\Component\Utility\NestedArray; @@ -661,7 +662,7 @@ function _editor_get_processed_text_fields(ContentEntityInterface $entity) { * An array of all found UUIDs. */ function _editor_parse_file_uuids($text) { - $dom = filter_dom_load($text); + $dom = Html::load($text); $xpath = new \DOMXPath($dom); $uuids = array(); foreach ($xpath->query('//*[@data-editor-file-uuid]') as $node) { diff --git a/core/modules/field/field.module b/core/modules/field/field.module index 6a8b941125447bc85e32b34cf043f41a03e657ba..2dd3469c61eea2e409546f7d34688e225a5cd002 100644 --- a/core/modules/field/field.module +++ b/core/modules/field/field.module @@ -4,6 +4,8 @@ * Attach custom data fields to Drupal entities. */ +use Drupal\Component\Utility\Html; +use Drupal\Component\Utility\Xss; use Drupal\Core\Entity\ContentEntityInterface; use Drupal\Core\Template\Attribute; use Drupal\entity\Entity\EntityViewDisplay; @@ -304,7 +306,7 @@ function field_cache_clear() { * UTF-8. */ function field_filter_xss($string) { - return _filter_htmlcorrector(filter_xss($string, _field_filter_xss_allowed_tags())); + return Html::normalize(Xss::filter($string, _field_filter_xss_allowed_tags())); } /** diff --git a/core/modules/file/file.field.inc b/core/modules/file/file.field.inc index 3252cecf78f971d4032bb94dd4e7c856589b78c3..8a643322f96715f158f53ef9db4a90e60c1fbb1b 100644 --- a/core/modules/file/file.field.inc +++ b/core/modules/file/file.field.inc @@ -5,6 +5,7 @@ * Field module functionality for the File module. */ +use Drupal\Component\Utility\Html; use Drupal\field\FieldInterface; /** @@ -181,8 +182,8 @@ function theme_file_upload_help($variables) { $descriptions = array(); - if (strlen($description)) { - $descriptions[] = _filter_htmlcorrector($description); + if (!empty($description)) { + $descriptions[] = Html::normalize($description); } if (isset($cardinality)) { if ($cardinality == -1) { diff --git a/core/modules/filter/filter.module b/core/modules/filter/filter.module index 38af864a61372b4e4d01d540575cfe7ce55e9cac..e9c04d43fb349910fcfca8426ac94c9f08fd0818 100644 --- a/core/modules/filter/filter.module +++ b/core/modules/filter/filter.module @@ -5,6 +5,7 @@ * Framework for handling the filtering of content. */ +use Drupal\Component\Utility\Html; use Drupal\Component\Utility\String; use Drupal\Core\Cache\Cache; use Drupal\Core\Language\Language; @@ -693,103 +694,6 @@ function _filter_tips($format_id, $long = FALSE) { return $tips; } -/** - * Parses an HTML snippet and returns it as a DOM object. - * - * This function loads the body part of a partial (X)HTML document and returns - * a full DOMDocument object that represents this document. You can use - * filter_dom_serialize() to serialize this DOMDocument back to a XHTML - * snippet. - * - * @param $text - * The partial (X)HTML snippet to load. Invalid markup will be corrected on - * import. - * - * @return - * A DOMDocument that represents the loaded (X)HTML snippet. - */ -function filter_dom_load($text) { - $dom_document = new DOMDocument(); - // Ignore warnings during HTML soup loading. - @$dom_document->loadHTML('' . $text . ''); - - return $dom_document; -} - -/** - * Converts a DOM object back to an HTML snippet. - * - * The function serializes the body part of a DOMDocument back to an XHTML - * snippet. The resulting XHTML snippet will be properly formatted to be - * compatible with HTML user agents. - * - * @param $dom_document - * A DOMDocument object to serialize, only the tags below - * the first node will be converted. - * - * @return - * A valid (X)HTML snippet, as a string. - */ -function filter_dom_serialize($dom_document) { - $body_node = $dom_document->getElementsByTagName('body')->item(0); - $body_content = ''; - - foreach ($body_node->getElementsByTagName('script') as $node) { - filter_dom_serialize_escape_cdata_element($dom_document, $node); - } - - foreach ($body_node->getElementsByTagName('style') as $node) { - filter_dom_serialize_escape_cdata_element($dom_document, $node, '/*', '*/'); - } - - foreach ($body_node->childNodes as $child_node) { - $body_content .= $dom_document->saveXML($child_node); - } - return $body_content; -} - -/** - * Adds comments around the childNodes as $node) { - if (get_class($node) == 'DOMCdataSection') { - // See drupal_get_js(). This code is more or less duplicated there. - $embed_prefix = "\n{$comment_end}\n"; - - // Prevent invalid cdata escaping as this would throw a DOM error. - // This is the same behavior as found in libxml2. - // Related W3C standard: http://www.w3.org/TR/REC-xml/#dt-cdsection - // Fix explanation: http://en.wikipedia.org/wiki/CDATA#Nesting - $data = str_replace(']]>', ']]]]>', $node->data); - - $fragment = $dom_document->createDocumentFragment(); - $fragment->appendXML($embed_prefix . $data . $embed_suffix); - $dom_element->appendChild($fragment); - $dom_element->removeChild($node); - } - } -} - /** * Prepares variables for text format guideline templates. * @@ -878,12 +782,12 @@ function _filter_html($text, $filter) { $text = filter_xss($text, $allowed_tags); if ($filter->settings['filter_html_nofollow']) { - $html_dom = filter_dom_load($text); + $html_dom = Html::load($text); $links = $html_dom->getElementsByTagName('a'); foreach ($links as $link) { $link->setAttribute('rel', 'nofollow'); } - $text = filter_dom_serialize($html_dom); + $text = Html::serialize($html_dom); } return trim($text); @@ -1127,13 +1031,6 @@ function _filter_url_trim($text, $length = NULL) { return $text; } -/** - * Scans the input and makes sure that HTML tags are properly closed. - */ -function _filter_htmlcorrector($text) { - return filter_dom_serialize(filter_dom_load($text)); -} - /** * Converts line breaks into

and
in an intelligent fashion. * @@ -1219,7 +1116,7 @@ function _filter_html_image_secure_process($text) { // Find the directory on the server where index.php resides. $local_dir = DRUPAL_ROOT . '/'; - $html_dom = filter_dom_load($text); + $html_dom = Html::load($text); $images = $html_dom->getElementsByTagName('img'); foreach ($images as $image) { $src = $image->getAttribute('src'); @@ -1245,7 +1142,7 @@ function _filter_html_image_secure_process($text) { // indicator. See filter_filter_secure_image_alter(). \Drupal::moduleHandler()->alter('filter_secure_image', $image); } - $text = filter_dom_serialize($html_dom); + $text = Html::serialize($html_dom); return $text; } diff --git a/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterCaption.php b/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterCaption.php index 5b5ce632edcc6880ecece728a7e1f4b5d385d2b0..51cb70a37b62f5a0440f283cf731a3206f134e0f 100644 --- a/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterCaption.php +++ b/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterCaption.php @@ -7,6 +7,7 @@ namespace Drupal\filter\Plugin\Filter; +use Drupal\Component\Utility\Html; use Drupal\Component\Utility\String; use Drupal\Component\Utility\Unicode; use Drupal\Component\Utility\Xss; @@ -30,7 +31,7 @@ class FilterCaption extends FilterBase { public function process($text, $langcode, $cache, $cache_id) { if (stristr($text, 'data-caption') !== FALSE || stristr($text, 'data-align') !== FALSE) { - $dom = filter_dom_load($text); + $dom = Html::load($text); $xpath = new \DOMXPath($dom); foreach ($xpath->query('//*[@data-caption or @data-align]') as $node) { $caption = NULL; @@ -82,7 +83,7 @@ public function process($text, $langcode, $cache, $cache_id) { $altered_html = drupal_render($filter_caption); // Load the altered HTML into a new DOMDocument and retrieve the element. - $updated_node = filter_dom_load($altered_html)->getElementsByTagName('body') + $updated_node = Html::load($altered_html)->getElementsByTagName('body') ->item(0) ->childNodes ->item(0); @@ -94,7 +95,7 @@ public function process($text, $langcode, $cache, $cache_id) { $node->parentNode->replaceChild($updated_node, $node); } - return filter_dom_serialize($dom); + return Html::serialize($dom); } return $text; diff --git a/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterHtmlCorrector.php b/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterHtmlCorrector.php index 8d9b8e0abbbbc756ca3e2acfe6aa6b708bd58f46..bc0a036f51ccf3458be22e321d08c607ce6d9133 100644 --- a/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterHtmlCorrector.php +++ b/core/modules/filter/lib/Drupal/filter/Plugin/Filter/FilterHtmlCorrector.php @@ -7,6 +7,7 @@ namespace Drupal\filter\Plugin\Filter; +use Drupal\Component\Utility\Html; use Drupal\filter\Plugin\FilterBase; /** @@ -25,7 +26,7 @@ class FilterHtmlCorrector extends FilterBase { * {@inheritdoc} */ public function process($text, $langcode, $cache, $cache_id) { - return _filter_htmlcorrector($text); + return Html::normalize($text); } } diff --git a/core/modules/filter/lib/Drupal/filter/Tests/FilterUnitTest.php b/core/modules/filter/lib/Drupal/filter/Tests/FilterUnitTest.php index f686cee49ff979442d8892bdebbe5f1406bd5b78..a51d6b9bd1c72a8f337d860435448e522831ebe0 100644 --- a/core/modules/filter/lib/Drupal/filter/Tests/FilterUnitTest.php +++ b/core/modules/filter/lib/Drupal/filter/Tests/FilterUnitTest.php @@ -7,6 +7,7 @@ namespace Drupal\filter\Tests; +use Drupal\Component\Utility\Html; use Drupal\simpletest\DrupalUnitTestBase; use Drupal\filter\FilterBag; @@ -741,117 +742,117 @@ function testUrlFilterContent() { */ function testHtmlCorrectorFilter() { // Tag closing. - $f = _filter_htmlcorrector('

text'); + $f = Html::normalize('

text'); $this->assertEqual($f, '

text

', 'HTML corrector -- tag closing at the end of input.'); - $f = _filter_htmlcorrector('

text

text'); + $f = Html::normalize('

text

text'); $this->assertEqual($f, '

text

text

', 'HTML corrector -- tag closing.'); - $f = _filter_htmlcorrector("