Commit 97fe7df1 authored by webchick's avatar webchick
Browse files

Issue #1130198 by pillarsdotnet, Damien Tournoud: Fixed Regression:...

Issue #1130198 by pillarsdotnet, Damien Tournoud: Fixed Regression: line-breaks are mangled by drupal_html_to_text().
parent 9c832e4c
......@@ -430,7 +430,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
$indent[] = count($lists) ? ' "' : '>';
break;
case 'li':
$indent[] = is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
$indent[] = isset($lists[0]) && is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
break;
case 'dd':
$indent[] = ' ';
......@@ -509,7 +509,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
$chunk = $casing($chunk);
}
// Format it and apply the current indentation.
$output .= drupal_wrap_mail($chunk, implode('', $indent));
$output .= drupal_wrap_mail($chunk, implode('', $indent)) . MAIL_LINE_ENDINGS;
// Remove non-quotation markers from indentation.
$indent = array_map('_drupal_html_to_text_clean', $indent);
}
......
<?php
/**
* @file
* Test the Drupal mailing system.
*/
class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
......@@ -63,3 +64,355 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
}
}
/**
* Unit tests for drupal_html_to_text().
*/
class DrupalHtmlToTextTestCase extends DrupalWebTestCase {
public static function getInfo() {
return array(
'name' => 'HTML to text conversion',
'description' => 'Tests drupal_html_to_text().',
'group' => 'Mail',
);
}
/**
* Converts a string to its PHP source equivalent for display in test messages.
*
* @param $text
* The text string to convert.
*
* @return
* An HTML representation of the text string that, when displayed in a
* browser, represents the PHP source code equivalent of $text.
*/
function stringToHtml($text) {
return '"' .
str_replace(
array("\n", ' '),
array('\n', '&nbsp;'),
check_plain($text)
) . '"';
}
/**
* Helper function for testing drupal_html_to_text().
*
* @param $html
* The source HTML string to be converted.
* @param $text
* The expected result of converting $html to text.
* @param $message
* A text message to display in the assertion message.
* @param $allowed_tags
* (optional) An array of allowed tags, or NULL to default to the full
* set of tags supported by drupal_html_to_text().
*/
function assertHtmlToText($html, $text, $message, $allowed_tags = NULL) {
preg_match_all('/<([a-z0-6]+)/', drupal_strtolower($html), $matches);
$tested_tags = implode(', ', array_unique($matches[1]));
$message .= ' (' . $tested_tags . ')';
$result = drupal_html_to_text($html, $allowed_tags);
$pass = $this->assertEqual($result, $text, check_plain($message));
$verbose = 'html = <pre>' . $this->stringToHtml($html)
. '</pre><br />' . 'result = <pre>' . $this->stringToHtml($result)
. '</pre><br />' . 'expected = <pre>' . $this->stringToHtml($text)
. '</pre>';
$this->verbose($verbose);
if (!$pass) {
$this->pass("Previous test verbose info:<br />$verbose");
}
}
/**
* Test all supported tags of drupal_html_to_text().
*/
function testTags() {
global $base_path, $base_url;
$tests = array(
// @todo Trailing linefeeds should be trimmed.
'<a href = "http://drupal.org">Drupal.org</a>' => "Drupal.org [1]\n\n[1] http://drupal.org\n",
// @todo Footer urls should be absolute.
"<a href = \"$base_path\">Homepage</a>" => "Homepage [1]\n\n[1] $base_url/\n",
'<address>Drupal</address>' => "Drupal\n",
// @todo The <address> tag is currently not supported.
'<address>Drupal</address><address>Drupal</address>' => "DrupalDrupal\n",
'<b>Drupal</b>' => "*Drupal*\n",
// @todo There should be a space between the '>' and the text.
'<blockquote>Drupal</blockquote>' => ">Drupal\n",
'<blockquote>Drupal</blockquote><blockquote>Drupal</blockquote>' => ">Drupal\n>Drupal\n",
'<br />Drupal<br />Drupal<br /><br />Drupal' => "Drupal\nDrupal\nDrupal\n",
'<br/>Drupal<br/>Drupal<br/><br/>Drupal' => "Drupal\nDrupal\nDrupal\n",
// @todo There should be two line breaks before the paragraph.
'<br/>Drupal<br/>Drupal<br/><br/>Drupal<p>Drupal</p>' => "Drupal\nDrupal\nDrupal\nDrupal\n\n",
'<div>Drupal</div>' => "Drupal\n",
// @todo The <div> tag is currently not supported.
'<div>Drupal</div><div>Drupal</div>' => "DrupalDrupal\n",
'<em>Drupal</em>' => "/Drupal/\n",
'<h1>Drupal</h1>' => "======== DRUPAL ==============================================================\n\n",
'<h1>Drupal</h1><p>Drupal</p>' => "======== DRUPAL ==============================================================\n\nDrupal\n\n",
'<h2>Drupal</h2>' => "-------- DRUPAL --------------------------------------------------------------\n\n",
'<h2>Drupal</h2><p>Drupal</p>' => "-------- DRUPAL --------------------------------------------------------------\n\nDrupal\n\n",
'<h3>Drupal</h3>' => ".... Drupal\n\n",
'<h3>Drupal</h3><p>Drupal</p>' => ".... Drupal\n\nDrupal\n\n",
'<h4>Drupal</h4>' => ".. Drupal\n\n",
'<h4>Drupal</h4><p>Drupal</p>' => ".. Drupal\n\nDrupal\n\n",
'<h5>Drupal</h5>' => "Drupal\n\n",
'<h5>Drupal</h5><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
'<h6>Drupal</h6>' => "Drupal\n\n",
'<h6>Drupal</h6><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
'<hr />Drupal<hr />' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n",
'<hr/>Drupal<hr/>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\n",
'<hr/>Drupal<hr/><p>Drupal</p>' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n\n",
'<i>Drupal</i>' => "/Drupal/\n",
'<p>Drupal</p>' => "Drupal\n\n",
'<p>Drupal</p><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
'<strong>Drupal</strong>' => "*Drupal*\n",
// @todo Tables are currently not supported.
'<table><tr><td>Drupal</td><td>Drupal</td></tr><tr><td>Drupal</td><td>Drupal</td></tr></table>' => "DrupalDrupalDrupalDrupal\n",
'<table><tr><td>Drupal</td></tr></table><p>Drupal</p>' => "Drupal\nDrupal\n\n",
// @todo The <u> tag is currently not supported.
'<u>Drupal</u>' => "Drupal\n",
'<ul><li>Drupal</li></ul>' => " * Drupal\n\n",
'<ul><li>Drupal <em>Drupal</em> Drupal</li></ul>' => " * Drupal /Drupal/ Drupal\n\n",
// @todo Lines containing nothing but spaces should be trimmed.
'<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => " * Drupal\n * 1) Drupal\n 2) Drupal\n \n\n",
'<ul><li>Drupal</li><li><ol><li>Drupal</li></ol></li><li>Drupal</li></ul>' => " * Drupal\n * 1) Drupal\n \n * Drupal\n\n",
'<ul><li>Drupal</li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n\n",
'<ul><li>Drupal</li></ul><p>Drupal</p>' => " * Drupal\n\nDrupal\n\n",
'<ol><li>Drupal</li></ol>' => " 1) Drupal\n\n",
'<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => " 1) Drupal\n 2) * Drupal\n * Drupal\n \n\n",
'<ol><li>Drupal</li><li>Drupal</li></ol>' => " 1) Drupal\n 2) Drupal\n\n",
'<ol>Drupal</ol>' => "Drupal\n\n",
'<ol><li>Drupal</li></ol><p>Drupal</p>' => " 1) Drupal\n\nDrupal\n\n",
'<dl><dt>Drupal</dt></dl>' => "Drupal\n\n",
'<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\n\n",
'<dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n Drupal\nDrupal\n Drupal\n\n",
'<dl><dt>Drupal</dt><dd>Drupal</dd></dl><p>Drupal</p>' => "Drupal\n Drupal\n\nDrupal\n\n",
'<dl><dt>Drupal<dd>Drupal</dl>' => "Drupal\n Drupal\n\n",
'<dl><dt>Drupal</dt></dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
// @todo Again, lines containing only spaces should be trimmed.
'<ul><li>Drupal</li><li><dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl></li><li>Drupal</li></ul>' => " * Drupal\n * Drupal\n Drupal\n Drupal\n Drupal\n \n * Drupal\n\n",
// Tests malformed HTML tags.
'<br>Drupal<br>Drupal' => "Drupal\nDrupal\n",
'<hr>Drupal<hr>Drupal' => "------------------------------------------------------------------------------\nDrupal\n------------------------------------------------------------------------------\nDrupal\n",
'<ol><li>Drupal<li>Drupal</ol>' => " 1) Drupal\n 2) Drupal\n\n",
'<ul><li>Drupal <em>Drupal</em> Drupal</ul></ul>' => " * Drupal /Drupal/ Drupal\n\n",
'<ul><li>Drupal<li>Drupal</ol>' => " * Drupal\n * Drupal\n\n",
'<ul><li>Drupal<li>Drupal</ul>' => " * Drupal\n * Drupal\n\n",
'<ul>Drupal</ul>' => "Drupal\n\n",
'Drupal</ul></ol></dl><li>Drupal' => "Drupal\n * Drupal\n",
'<dl>Drupal</dl>' => "Drupal\n\n",
'<dl>Drupal</dl><p>Drupal</p>' => "Drupal\n\nDrupal\n\n",
'<dt>Drupal</dt>' => "Drupal\n",
// Tests some unsupported HTML tags.
'<html>Drupal</html>' => "Drupal\n",
// @todo Perhaps the contents of <script> tags should be dropped.
'<script type="text/javascript">Drupal</script>' => "Drupal\n",
);
foreach ($tests as $html => $text) {
$this->assertHtmlToText($html, $text, 'Supported tags');
}
}
/**
* Test $allowed_tags argument of drupal_html_to_text().
*/
function testDrupalHtmlToTextArgs() {
// The second parameter of drupal_html_to_text() overrules the allowed tags.
$this->assertHtmlToText(
'Drupal <b>Drupal</b> Drupal',
"Drupal *Drupal* Drupal\n",
'Allowed <b> tag found',
array('b')
);
$this->assertHtmlToText(
'Drupal <h1>Drupal</h1> Drupal',
"Drupal Drupal Drupal\n",
'Disallowed <h1> tag not found',
array('b')
);
$this->assertHtmlToText(
'Drupal <p><em><b>Drupal</b></em><p> Drupal',
"Drupal Drupal Drupal\n",
'Disallowed <p>, <em>, and <b> tags not found',
array('a', 'br', 'h1')
);
$this->assertHtmlToText(
'<html><body>Drupal</body></html>',
"Drupal\n",
'Unsupported <html> and <body> tags not found',
array('html', 'body')
);
}
/**
* Test that whitespace is collapsed.
*/
function testDrupalHtmltoTextCollapsesWhitespace() {
$input = "<p>Drupal Drupal\n\nDrupal<pre>Drupal Drupal\n\nDrupal</pre>Drupal Drupal\n\nDrupal</p>";
// @todo The whitespace should be collapsed.
$collapsed = "Drupal Drupal\n\nDrupalDrupal Drupal\n\nDrupalDrupal Drupal\n\nDrupal\n\n";
$this->assertHtmlToText(
$input,
$collapsed,
'Whitespace is collapsed',
array('p')
);
}
/**
* Test that text separated by block-level tags in HTML get separated by
* (at least) a newline in the plaintext version.
*/
function testDrupalHtmlToTextBlockTagToNewline() {
$input = '[text]'
. '<blockquote>[blockquote]</blockquote>'
. '<br />[br]'
. '<dl><dt>[dl-dt]</dt>'
. '<dt>[dt]</dt>'
. '<dd>[dd]</dd>'
. '<dd>[dd-dl]</dd></dl>'
. '<h1>[h1]</h1>'
. '<h2>[h2]</h2>'
. '<h3>[h3]</h3>'
. '<h4>[h4]</h4>'
. '<h5>[h5]</h5>'
. '<h6>[h6]</h6>'
. '<hr />[hr]'
. '<ol><li>[ol-li]</li>'
. '<li>[li]</li>'
. '<li>[li-ol]</li></ol>'
. '<p>[p]</p>'
. '<ul><li>[ul-li]</li>'
. '<li>[li-ul]</li></ul>'
. '[text]';
$output = drupal_html_to_text($input);
$pass = $this->assertFalse(
preg_match('/\][^\n]*\[/s', $output),
'Block-level HTML tags should force newlines'
);
if (!$pass) {
$this->verbose($this->stringToHtml($output));
}
$output_upper = drupal_strtoupper($output);
$upper_input = drupal_strtoupper($input);
$upper_output = drupal_html_to_text($upper_input);
$pass = $this->assertEqual(
$upper_output,
$output_upper,
'Tag recognition should be case-insensitive'
);
if (!$pass) {
$this->verbose(
$upper_output
. '<br />should be equal to <br />'
. $output_upper
);
}
}
/**
* Test that headers are properly separated from surrounding text.
*/
function testHeaderSeparation() {
$html = 'Drupal<h1>Drupal</h1>Drupal';
// @todo There should be more space above the header than below it.
$text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n";
$this->assertHtmlToText($html, $text,
'Text before and after <h1> tag');
$html = '<p>Drupal</p><h1>Drupal</h1>Drupal';
// @todo There should be more space above the header than below it.
$text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n";
$this->assertHtmlToText($html, $text,
'Paragraph before and text after <h1> tag');
$html = 'Drupal<h1>Drupal</h1><p>Drupal</p>';
// @todo There should be more space above the header than below it.
$text = "Drupal\n======== DRUPAL ==============================================================\n\nDrupal\n\n";
$this->assertHtmlToText($html, $text,
'Text before and paragraph after <h1> tag');
$html = '<p>Drupal</p><h1>Drupal</h1><p>Drupal</p>';
$text = "Drupal\n\n======== DRUPAL ==============================================================\n\nDrupal\n\n";
$this->assertHtmlToText($html, $text,
'Paragraph before and after <h1> tag');
}
/**
* Test that footnote references are properly generated.
*/
function testFootnoteReferences() {
global $base_path, $base_url;
$source = '<a href="http://www.example.com/node/1">Host and path</a>'
. '<br /><a href="http://www.example.com">Host, no path</a>'
. '<br /><a href="' . $base_path . 'node/1">Path, no host</a>'
. '<br /><a href="node/1">Relative path</a>';
// @todo Footnote urls should be absolute.
$tt = "Host and path [1]"
. "\nHost, no path [2]"
// @todo The following two references should be combined.
. "\nPath, no host [3]"
. "\nRelative path [4]"
. "\n"
. "\n[1] http://www.example.com/node/1"
. "\n[2] http://www.example.com"
// @todo The following two references should be combined.
. "\n[3] $base_url/node/1"
. "\n[4] node/1\n";
$this->assertHtmlToText($source, $tt, 'Footnotes');
}
/**
* Test that combinations of paragraph breaks, line breaks, linefeeds,
* and spaces are properly handled.
*/
function testDrupalHtmlToTextParagraphs() {
$tests = array();
$tests[] = array(
'html' => "<p>line 1<br />\nline 2<br />line 3\n<br />line 4</p><p>paragraph</p>",
// @todo Trailing line breaks should be trimmed.
'text' => "line 1\nline 2\nline 3\nline 4\n\nparagraph\n\n",
);
$tests[] = array(
'html' => "<p>line 1<br /> line 2</p> <p>line 4<br /> line 5</p> <p>0</p>",
// @todo Trailing line breaks should be trimmed.
'text' => "line 1\nline 2\n\nline 4\nline 5\n\n0\n\n",
);
foreach ($tests as $test) {
$this->assertHtmlToText($test['html'], $test['text'], 'Paragraph breaks');
}
}
/**
* Tests that drupal_html_to_text() wraps before 1000 characters.
*
* RFC 3676 says, "The Text/Plain media type is the lowest common
* denominator of Internet email, with lines of no more than 998 characters."
*
* RFC 2046 says, "SMTP [RFC-821] allows a maximum of 998 octets before the
* next CRLF sequence."
*
* RFC 821 says, "The maximum total length of a text line including the
* <CRLF> is 1000 characters."
*/
function testVeryLongLineWrap() {
$input = 'Drupal<br /><p>' . str_repeat('x', 2100) . '</><br />Drupal';
$output = drupal_html_to_text($input);
// This awkward construct comes from includes/mail.inc lines 8-13.
$eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
// We must use strlen() rather than drupal_strlen() in order to count
// octets rather than characters.
$line_length_limit = 1000 - drupal_strlen($eol);
$maximum_line_length = 0;
foreach (explode($eol, $output) as $line) {
// We must use strlen() rather than drupal_strlen() in order to count
// octets rather than characters.
$maximum_line_length = max($maximum_line_length, strlen($line . $eol));
}
$verbose = 'Maximum line length found was ' . $maximum_line_length . ' octets.';
// @todo This should assert that $maximum_line_length <= 1000.
$this->pass($verbose);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment