Skip to content
Snippets Groups Projects
Commit 7132e7d5 authored by Steven Wittens's avatar Steven Wittens
Browse files

#14757: Fix XSS vulnerability due to entity usage.

parent 11358d74
No related branches found
No related tags found
2 merge requests!7452Issue #1797438. HTML5 validation is preventing form submit and not fully...,!789Issue #3210310: Adjust Database API to remove deprecated Drupal 9 code in Drupal 10
......@@ -587,6 +587,9 @@ function valid_input_data($data) {
else {
// Detect dangerous input data.
// Decode all normal character entities.
$data = decode_entities($data, array('<', '&', '"'));
// Check strings:
$match = preg_match('/\Wjavascript\s*:/i', $data);
$match += preg_match('/\Wexpression\s*\(/i', $data);
......@@ -1675,47 +1678,59 @@ function mime_header_encode($string, $charset = 'UTF-8') {
/**
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
*
* @param $text
* The text to decode entities in.
* @param $exclude
* An array of characters which should not be decoded. For example,
* array('<', '&', '"'). This affects both named and numerical entities.
*/
function decode_entities($text) {
function decode_entities($text, $exclude = array()) {
static $table;
// We store named entities in a table for quick processing.
if (!isset($table)) {
// Get all named HTML entities.
$table = array_flip(get_html_translation_table(HTML_ENTITIES, $special));
// PHP gives us Windows-1252/ISO-8859-1 data, we need UTF-8.
// PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map('utf8_encode', $table);
}
$text = strtr($text, $table);
$text = strtr($text, array_diff($table, $exclude));
// Any remaining entities are numerical. Use a regexp to replace them.
return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2")', $text);
return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text);
}
/**
* Helper function for decode_entities
*/
function _decode_entities($hex, $codepoint) {
function _decode_entities($hex, $codepoint, $original, $exclude) {
if ($hex != '') {
$codepoint = base_convert($codepoint, 16, 10);
}
if ($codepoint < 0x80) {
return chr($codepoint);
$str = chr($codepoint);
}
else if ($codepoint < 0x800) {
return chr(0xC0 | ($codepoint >> 6))
$str = chr(0xC0 | ($codepoint >> 6))
. chr(0x80 | ($codepoint & 0x3F));
}
else if ($codepoint < 0x10000) {
return chr(0xE0 | ( $codepoint >> 12))
$str = chr(0xE0 | ( $codepoint >> 12))
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
else if ($codepoint < 0x200000) {
return chr(0xF0 | ( $codepoint >> 18))
$str = chr(0xF0 | ( $codepoint >> 18))
. chr(0x80 | (($codepoint >> 12) & 0x3F))
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
if (in_array($str, $exclude)) {
return $original;
}
else {
return $str;
}
}
/**
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment