Commit 0f41706a authored by Steven Wittens's avatar Steven Wittens

- #19874: Fix aggregator escaping after check_plain bug

- Fix bug in decode_entities() with double-escaped entities.
parent 7d3d5532
......@@ -1736,6 +1736,7 @@ function mime_header_encode($string, $charset = 'UTF-8') {
/**
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
* Double-escaped entities will only be decoded once ("&amp;lt;" becomes "&lt;", not "<").
*
* @param $text
* The text to decode entities in.
......@@ -1751,20 +1752,33 @@ function decode_entities($text, $exclude = array()) {
$table = array_flip(get_html_translation_table(HTML_ENTITIES));
// PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map('utf8_encode', $table);
// Add apostrophe (XML)
$table['&apos;'] = "'";
}
$text = strtr($text, array_diff($table, $exclude));
$newtable = array_diff($table, $exclude);
// Any remaining entities are numerical. Use a regexp to replace them.
return preg_replace('/&#(x?)([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $exclude)', $text);
// Use a regexp to select all entities in one pass, to avoid decoding double-escaped entities twice.
return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text);
}
/**
* Helper function for decode_entities
*/
function _decode_entities($hex, $codepoint, $original, $exclude) {
if ($hex != '') {
function _decode_entities($prefix, $codepoint, $original, &$table, &$exclude) {
// Named entity
if (!$prefix) {
if (isset($table[$original])) {
return $table[$original];
}
else {
return $original;
}
}
// Hexadecimal numerical entity
if ($prefix == '#x') {
$codepoint = base_convert($codepoint, 16, 10);
}
// Encode codepoint as UTF-8 bytes
if ($codepoint < 0x80) {
$str = chr($codepoint);
}
......@@ -1783,6 +1797,7 @@ function _decode_entities($hex, $codepoint, $original, $exclude) {
. chr(0x80 | (($codepoint >> 6) & 0x3F))
. chr(0x80 | ( $codepoint & 0x3F));
}
// Check for excluded characters
if (in_array($str, $exclude)) {
return $original;
}
......
......@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
}
xml_parser_free($xml_parser);
// initialize the translation table:
$tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS));
$tt['&apos;'] = "'";
/*
** We reverse the array such that we store the first item last,
** and the last item first. In the database, the newest item
......@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
// Prepare the item:
foreach ($item as $key => $value) {
// TODO: Make handling of aggregated HTML more flexible/configurable.
$value = strtr(trim($value), $tt);
$value = decode_entities(trim($value));
$value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>');
$value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value);
$value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value);
......@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
$output .= " <div class=\"body\">\n";
$output .= ' <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n";
if ($item->description) {
$output .= ' <div class="description">'. check_plain($item->description) ."</div>\n";
$output .= ' <div class="description">'. $item->description ."</div>\n";
}
if ($item->ftitle && $item->fid) {
$output .= ' <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n";
......
......@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
}
xml_parser_free($xml_parser);
// initialize the translation table:
$tt = array_flip(get_html_translation_table(HTML_SPECIALCHARS));
$tt['&apos;'] = "'";
/*
** We reverse the array such that we store the first item last,
** and the last item first. In the database, the newest item
......@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
// Prepare the item:
foreach ($item as $key => $value) {
// TODO: Make handling of aggregated HTML more flexible/configurable.
$value = strtr(trim($value), $tt);
$value = decode_entities(trim($value));
$value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>');
$value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value);
$value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value);
......@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
$output .= " <div class=\"body\">\n";
$output .= ' <div class="title"><a href="'. check_url($item->link) .'">'. check_plain($item->title) ."</a></div>\n";
if ($item->description) {
$output .= ' <div class="description">'. check_plain($item->description) ."</div>\n";
$output .= ' <div class="description">'. $item->description ."</div>\n";
}
if ($item->ftitle && $item->fid) {
$output .= ' <div class="source">'. t('Source') .': '. l($item->ftitle, "aggregator/sources/$item->fid") ."</div>\n";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment