Commit d0755828 authored by Steven Wittens's avatar Steven Wittens

#14757: Fix XSS vulnerability due to entity usage.

parent c825c00e
......@@ -583,6 +583,9 @@ function valid_input_data($data) {
else {
// Detect dangerous input data.
// Decode all normal character entities.
$data = decode_entities($data, array('<', '&', '"'));
// Check strings:
$match = preg_match('/\Wjavascript\s*:/i', $data);
$match += preg_match('/\Wexpression\s*\(/i', $data);
......@@ -1800,6 +1803,63 @@ function mime_header_encode($string, $charset = 'UTF-8') {
return $string;
}
/**
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
*
* @param $text
* The text to decode entities in.
* @param $exclude
* An array of characters which should not be decoded. For example,
* array('<', '&', '"'). This affects both named and numerical entities.
*/
function decode_entities($text, $exclude = array()) {
static $table;
// We store named entities in a table for quick processing.
if (!isset($table)) {
// Get all named HTML entities.
$table = array_flip(get_html_translation_table(HTML_ENTITIES, $special));
// PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map('utf8_encode', $table);
}
$text = strtr($text, array_diff($table, $exclude));
// Any remaining entities are numerical. Use a regexp to replace them.
return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text);
}
/**
* Helper function for decode_entities
*/
function _decode_entities($hex, $codepoint, $original, $exclude) {
if ($hex != '') {
$codepoint = base_convert($codepoint, 16, 10);
}
if ($codepoint < 0x80) {
$str = chr($codepoint);
}
else if ($codepoint < 0x800) {
$str = chr(0xC0 | ($codepoint >> 6))
. chr(0x80 | ($codepoint & 0x3F));
}
else if ($codepoint < 0x10000) {
$str = chr(0xE0 | ( $codepoint >> 12))
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
else if ($codepoint < 0x200000) {
$str = chr(0xF0 | ( $codepoint >> 18))
. chr(0x80 | (($codepoint >> 12) & 0x3F))
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
if (in_array($str, $exclude)) {
return $original;
}
else {
return $str;
}
}
/**
* Evaluate a string of PHP code.
*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment