diff --git a/includes/common.inc b/includes/common.inc index 097ab84c24880dadef0df77895fe4fd2ff4cb4f4..4a029dcc31c3b2da00e5556c7f8078221168b938 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -587,6 +587,9 @@ function valid_input_data($data) { else { // Detect dangerous input data. + // Decode all normal character entities. + $data = decode_entities($data, array('<', '&', '"')); + // Check strings: $match = preg_match('/\Wjavascript\s*:/i', $data); $match += preg_match('/\Wexpression\s*\(/i', $data); @@ -1675,47 +1678,59 @@ function mime_header_encode($string, $charset = 'UTF-8') { /** * Decode all HTML entities (including numerical ones) to regular UTF-8 bytes. + * + * @param $text + * The text to decode entities in. + * @param $exclude + * An array of characters which should not be decoded. For example, + * array('<', '&', '"'). This affects both named and numerical entities. */ -function decode_entities($text) { +function decode_entities($text, $exclude = array()) { static $table; // We store named entities in a table for quick processing. if (!isset($table)) { // Get all named HTML entities. $table = array_flip(get_html_translation_table(HTML_ENTITIES, $special)); - // PHP gives us Windows-1252/ISO-8859-1 data, we need UTF-8. + // PHP gives us ISO-8859-1 data, we need UTF-8. $table = array_map('utf8_encode', $table); } - $text = strtr($text, $table); + $text = strtr($text, array_diff($table, $exclude)); // Any remaining entities are numerical. Use a regexp to replace them. - return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2")', $text); + return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text); } /** * Helper function for decode_entities */ -function _decode_entities($hex, $codepoint) { +function _decode_entities($hex, $codepoint, $original, $exclude) { if ($hex != '') { $codepoint = base_convert($codepoint, 16, 10); } if ($codepoint < 0x80) { - return chr($codepoint); + $str = chr($codepoint); } else if ($codepoint < 0x800) { - return chr(0xC0 | ($codepoint >> 6)) + $str = chr(0xC0 | ($codepoint >> 6)) . chr(0x80 | ($codepoint & 0x3F)); } else if ($codepoint < 0x10000) { - return chr(0xE0 | ( $codepoint >> 12)) + $str = chr(0xE0 | ( $codepoint >> 12)) . chr(0x80 | (($codepoint >> 6) & 0x3F)) . chr(0x80 | ( $codepoint & 0x3F)); } else if ($codepoint < 0x200000) { - return chr(0xF0 | ( $codepoint >> 18)) + $str = chr(0xF0 | ( $codepoint >> 18)) . chr(0x80 | (($codepoint >> 12) & 0x3F)) . chr(0x80 | (($codepoint >> 6) & 0x3F)) . chr(0x80 | ( $codepoint & 0x3F)); } + if (in_array($str, $exclude)) { + return $original; + } + else { + return $str; + } } /**