Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
project
drupal
Commits
0f41706a
Commit
0f41706a
authored
Apr 07, 2005
by
Steven Wittens
Browse files
-
#19874
: Fix aggregator escaping after check_plain bug
- Fix bug in decode_entities() with double-escaped entities.
parent
7d3d5532
Changes
3
Hide whitespace changes
Inline
Side-by-side
includes/common.inc
View file @
0f41706a
...
...
@@ -1736,6 +1736,7 @@ function mime_header_encode($string, $charset = 'UTF-8') {
/**
* Decode all HTML entities (including numerical ones) to regular UTF-8 bytes.
* Double-escaped entities will only be decoded once ("&lt;" becomes "<", not "<").
*
* @param $text
* The text to decode entities in.
...
...
@@ -1751,20 +1752,33 @@ function decode_entities($text, $exclude = array()) {
$table
=
array_flip
(
get_html_translation_table
(
HTML_ENTITIES
));
// PHP gives us ISO-8859-1 data, we need UTF-8.
$table
=
array_map
(
'utf8_encode'
,
$table
);
// Add apostrophe (XML)
$table
[
'''
]
=
"'"
;
}
$
text
=
strtr
(
$text
,
array_diff
(
$table
,
$exclude
)
)
;
$
newtable
=
array_diff
(
$table
,
$exclude
);
//
Any remaining entities are numerical. Use a regexp to replace them
.
return
preg_replace
(
'/&
#
(x?)([A-Za-z0-9]+);/e'
,
'_decode_entities("$1", "$2", "$0", $exclude)'
,
$text
);
//
Use a regexp to select all entities in one pass, to avoid decoding double-escaped entities twice
.
return
preg_replace
(
'/&(
#
x?)
?
([A-Za-z0-9]+);/e'
,
'_decode_entities("$1", "$2", "$0",
$newtable,
$exclude)'
,
$text
);
}
/**
* Helper function for decode_entities
*/
function
_decode_entities
(
$hex
,
$codepoint
,
$original
,
$exclude
)
{
if
(
$hex
!=
''
)
{
function
_decode_entities
(
$prefix
,
$codepoint
,
$original
,
&
$table
,
&
$exclude
)
{
// Named entity
if
(
!
$prefix
)
{
if
(
isset
(
$table
[
$original
]))
{
return
$table
[
$original
];
}
else
{
return
$original
;
}
}
// Hexadecimal numerical entity
if
(
$prefix
==
'#x'
)
{
$codepoint
=
base_convert
(
$codepoint
,
16
,
10
);
}
// Encode codepoint as UTF-8 bytes
if
(
$codepoint
<
0x80
)
{
$str
=
chr
(
$codepoint
);
}
...
...
@@ -1783,6 +1797,7 @@ function _decode_entities($hex, $codepoint, $original, $exclude) {
.
chr
(
0x80
|
((
$codepoint
>>
6
)
&
0x3F
))
.
chr
(
0x80
|
(
$codepoint
&
0x3F
));
}
// Check for excluded characters
if
(
in_array
(
$str
,
$exclude
))
{
return
$original
;
}
...
...
modules/aggregator.module
View file @
0f41706a
...
...
@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
}
xml_parser_free
(
$xml_parser
);
// initialize the translation table:
$tt
=
array_flip
(
get_html_translation_table
(
HTML_SPECIALCHARS
));
$tt
[
'''
]
=
"'"
;
/*
** We reverse the array such that we store the first item last,
** and the last item first. In the database, the newest item
...
...
@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
// Prepare the item:
foreach
(
$item
as
$key
=>
$value
)
{
// TODO: Make handling of aggregated HTML more flexible/configurable.
$value
=
strtr
(
trim
(
$value
)
,
$tt
);
$value
=
decode_entities
(
trim
(
$value
));
$value
=
strip_tags
(
$value
,
'<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>'
);
$value
=
preg_replace
(
'/\Wstyle\s*=[^>]+?>/i'
,
'>'
,
$value
);
$value
=
preg_replace
(
'/\Won[a-z]+\s*=[^>]+?>/i'
,
'>'
,
$value
);
...
...
@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
$output
.
=
" <div class=
\"
body
\"
>
\n
"
;
$output
.
=
' <div class="title"><a href="'
.
check_url
(
$item
->
link
)
.
'">'
.
check_plain
(
$item
->
title
)
.
"</a></div>
\n
"
;
if
(
$item
->
description
)
{
$output
.
=
' <div class="description">'
.
check_plain
(
$item
->
description
)
.
"</div>
\n
"
;
$output
.
=
' <div class="description">'
.
$item
->
description
.
"</div>
\n
"
;
}
if
(
$item
->
ftitle
&&
$item
->
fid
)
{
$output
.
=
' <div class="source">'
.
t
(
'Source'
)
.
': '
.
l
(
$item
->
ftitle
,
"aggregator/sources/
$item->fid
"
)
.
"</div>
\n
"
;
...
...
modules/aggregator/aggregator.module
View file @
0f41706a
...
...
@@ -468,10 +468,6 @@ function aggregator_parse_feed(&$data, $feed) {
}
xml_parser_free
(
$xml_parser
);
// initialize the translation table:
$tt
=
array_flip
(
get_html_translation_table
(
HTML_SPECIALCHARS
));
$tt
[
'''
]
=
"'"
;
/*
** We reverse the array such that we store the first item last,
** and the last item first. In the database, the newest item
...
...
@@ -486,7 +482,7 @@ function aggregator_parse_feed(&$data, $feed) {
// Prepare the item:
foreach
(
$item
as
$key
=>
$value
)
{
// TODO: Make handling of aggregated HTML more flexible/configurable.
$value
=
strtr
(
trim
(
$value
)
,
$tt
);
$value
=
decode_entities
(
trim
(
$value
));
$value
=
strip_tags
(
$value
,
'<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>'
);
$value
=
preg_replace
(
'/\Wstyle\s*=[^>]+?>/i'
,
'>'
,
$value
);
$value
=
preg_replace
(
'/\Won[a-z]+\s*=[^>]+?>/i'
,
'>'
,
$value
);
...
...
@@ -1112,7 +1108,7 @@ function theme_aggregator_page_item($item) {
$output
.
=
" <div class=
\"
body
\"
>
\n
"
;
$output
.
=
' <div class="title"><a href="'
.
check_url
(
$item
->
link
)
.
'">'
.
check_plain
(
$item
->
title
)
.
"</a></div>
\n
"
;
if
(
$item
->
description
)
{
$output
.
=
' <div class="description">'
.
check_plain
(
$item
->
description
)
.
"</div>
\n
"
;
$output
.
=
' <div class="description">'
.
$item
->
description
.
"</div>
\n
"
;
}
if
(
$item
->
ftitle
&&
$item
->
fid
)
{
$output
.
=
' <div class="source">'
.
t
(
'Source'
)
.
': '
.
l
(
$item
->
ftitle
,
"aggregator/sources/
$item->fid
"
)
.
"</div>
\n
"
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment