Commit 9d912261 authored by Dries's avatar Dries

- Patch #559584 by tic2000, sun: filter_xss() and Line break filter break HTML...

- Patch #559584 by tic2000, sun: filter_xss() and Line break filter break HTML comments. Also added tests.
parent 9502260e
......@@ -1355,6 +1355,8 @@ function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite',
(
<(?=[^a-zA-Z!/]) # a lone <
| # or
<!--.*?--> # a comment
| # or
<[^>]*(>|$) # a string that starts with a <, up until the > or the end of the string
| # or
> # just a >
......@@ -1393,7 +1395,7 @@ function _filter_xss_split($m, $store = FALSE) {
return '&lt;';
}
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?|(<!--.*?-->)$%', $string, $matches)) {
// Seriously malformed
return '';
}
......@@ -1401,12 +1403,21 @@ function _filter_xss_split($m, $store = FALSE) {
$slash = trim($matches[1]);
$elem = &$matches[2];
$attrlist = &$matches[3];
$comment = &$matches[4];
if ($comment) {
$elem = '!--';
}
if (!isset($allowed_html[strtolower($elem)])) {
// Disallowed HTML element
return '';
}
if ($comment) {
return $comment;
}
if ($slash != '') {
return "</$elem>";
}
......
......@@ -1535,11 +1535,11 @@ function _filter_autop($text) {
// All block level tags
$block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';
// Split at <pre>, <script>, <style> and </pre>, </script>, </style> tags.
// Split at opening and closing PRE, SCRIPT, STYLE, OBJECT tags and comments.
// We don't apply any processing to the contents of these tags to avoid messing
// up code. We look for matched pairs and allow basic nesting. For example:
// "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
$chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
$chunks = preg_split('@(<!--.*?-->|</?(?:pre|script|style|object|!--)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting NULL as required).
$ignore = FALSE;
......@@ -1548,7 +1548,8 @@ function _filter_autop($text) {
foreach ($chunks as $i => $chunk) {
if ($i % 2) {
// Opening or closing tag?
$open = ($chunk[1] != '/');
$open = ($chunk[1] != '/' || $chunk[1] != '!');
$comment = (substr($chunk, 0, 4) == '<!--');
list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2);
if (!$ignore) {
if ($open) {
......@@ -1557,7 +1558,7 @@ function _filter_autop($text) {
}
}
// Only allow a matching tag to close it.
elseif (!$open && $ignoretag == $tag) {
elseif ((!$open && $ignoretag == $tag) || $comment) {
$ignore = FALSE;
$ignoretag = '';
}
......
This diff is collapsed.
......@@ -365,6 +365,9 @@ class CommonXssUnitTest extends DrupalUnitTestCase {
// Ignore PHP 5.3+ invalid multibyte sequence warning.
$text = @check_plain("Foo\xC0barbaz");
$this->assertEqual($text, '', 'check_plain() rejects invalid sequence "Foo\xC0barbaz"');
// Ignore PHP 5.3+ invalid multibyte sequence warning.
$text = @check_plain("\xc2\"");
$this->assertEqual($text, '', 'check_plain() rejects invalid sequence "\xc2\""');
$text = check_plain("Fooÿñ");
$this->assertEqual($text, "Fooÿñ", 'check_plain() accepts valid sequence "Fooÿñ"');
$text = filter_xss("Foo\xC0barbaz");
......@@ -379,6 +382,8 @@ class CommonXssUnitTest extends DrupalUnitTestCase {
function testEscaping() {
$text = check_plain("<script>");
$this->assertEqual($text, '&lt;script&gt;', 'check_plain() escapes &lt;script&gt;');
$text = check_plain('<>&"\'');
$this->assertEqual($text, '&lt;&gt;&amp;&quot;&#039;', 'check_plain() escapes reserved HTML characters.');
}
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment