Commit 7132e7d5 authored by Steven Wittens's avatar Steven Wittens
Browse files

#14757: Fix XSS vulnerability due to entity usage.

parent 11358d74
...@@ -587,6 +587,9 @@ function valid_input_data($data) { ...@@ -587,6 +587,9 @@ function valid_input_data($data) {
else { else {
// Detect dangerous input data. // Detect dangerous input data.
// Decode all normal character entities.
$data = decode_entities($data, array('<', '&', '"'));
// Check strings: // Check strings:
$match = preg_match('/\Wjavascript\s*:/i', $data); $match = preg_match('/\Wjavascript\s*:/i', $data);
$match += preg_match('/\Wexpression\s*\(/i', $data); $match += preg_match('/\Wexpression\s*\(/i', $data);
...@@ -1675,47 +1678,59 @@ function mime_header_encode($string, $charset = 'UTF-8') { ...@@ -1675,47 +1678,59 @@ function mime_header_encode($string, $charset = 'UTF-8') {
/** /**
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes. * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
*
* @param $text
* The text to decode entities in.
* @param $exclude
* An array of characters which should not be decoded. For example,
* array('<', '&', '"'). This affects both named and numerical entities.
*/ */
function decode_entities($text) { function decode_entities($text, $exclude = array()) {
static $table; static $table;
// We store named entities in a table for quick processing. // We store named entities in a table for quick processing.
if (!isset($table)) { if (!isset($table)) {
// Get all named HTML entities. // Get all named HTML entities.
$table = array_flip(get_html_translation_table(HTML_ENTITIES, $special)); $table = array_flip(get_html_translation_table(HTML_ENTITIES, $special));
// PHP gives us Windows-1252/ISO-8859-1 data, we need UTF-8. // PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map('utf8_encode', $table); $table = array_map('utf8_encode', $table);
} }
$text = strtr($text, $table); $text = strtr($text, array_diff($table, $exclude));
// Any remaining entities are numerical. Use a regexp to replace them. // Any remaining entities are numerical. Use a regexp to replace them.
return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2")', $text); return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text);
} }
/** /**
* Helper function for decode_entities * Helper function for decode_entities
*/ */
function _decode_entities($hex, $codepoint) { function _decode_entities($hex, $codepoint, $original, $exclude) {
if ($hex != '') { if ($hex != '') {
$codepoint = base_convert($codepoint, 16, 10); $codepoint = base_convert($codepoint, 16, 10);
} }
if ($codepoint < 0x80) { if ($codepoint < 0x80) {
return chr($codepoint); $str = chr($codepoint);
} }
else if ($codepoint < 0x800) { else if ($codepoint < 0x800) {
return chr(0xC0 | ($codepoint >> 6)) $str = chr(0xC0 | ($codepoint >> 6))
. chr(0x80 | ($codepoint & 0x3F)); . chr(0x80 | ($codepoint & 0x3F));
} }
else if ($codepoint < 0x10000) { else if ($codepoint < 0x10000) {
return chr(0xE0 | ( $codepoint >> 12)) $str = chr(0xE0 | ( $codepoint >> 12))
. chr(0x80 | (($codepoint >> 6) & 0x3F)) . chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F)); . chr(0x80 | ( $codepoint & 0x3F));
} }
else if ($codepoint < 0x200000) { else if ($codepoint < 0x200000) {
return chr(0xF0 | ( $codepoint >> 18)) $str = chr(0xF0 | ( $codepoint >> 18))
. chr(0x80 | (($codepoint >> 12) & 0x3F)) . chr(0x80 | (($codepoint >> 12) & 0x3F))
. chr(0x80 | (($codepoint >> 6) & 0x3F)) . chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F)); . chr(0x80 | ( $codepoint & 0x3F));
} }
if (in_array($str, $exclude)) {
return $original;
}
else {
return $str;
}
} }
/** /**
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment