Commit 667bf953 authored by Dries's avatar Dries

- Patch #54833 by Steven: added an HTML corrector.

parent d78badcc
......@@ -40,6 +40,9 @@ Drupal 6.0, xxxx-xx-xx (development version)
* Added form to provide initial site information during installation.
* Added ability to provide extra installation steps programmatically.
* Made it possible to import interface translations at install time.
- Added the HTML corrector filter:
* Fixes faulty and chopped off HTML in postings.
* Tags are now automatically closed at the end of the teaser.
Drupal 5.0, 2007-01-15
----------------------
......
......@@ -955,7 +955,7 @@ function theme_filter_tips_more_info() {
function filter_filter($op, $delta = 0, $format = -1, $text = '') {
switch ($op) {
case 'list':
return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'));
return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector'));
case 'description':
switch ($delta) {
......@@ -965,6 +965,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
return t('Converts line breaks into HTML (i.e. <br> and <p> tags).');
case 2:
return t('Turns web and e-mail addresses into clickable links.');
case 3:
return t('Corrects faulty and chopped off HTML in postings.');
default:
return;
}
......@@ -977,6 +979,8 @@ function filter_filter($op, $delta = 0, $format = -1, $text = '') {
return _filter_autop($text);
case 2:
return _filter_url($text, $format);
case 3:
return _filter_htmlcorrector($text);
default:
return $text;
}
......@@ -1098,6 +1102,80 @@ function _filter_url($text, $format) {
return $text;
}
/**
* Scan input and make sure that all HTML tags are properly closed and nested.
*/
function _filter_htmlcorrector($text) {
// Prepare tag lists.
static $no_nesting, $single_use;
if (!isset($no_nesting)) {
// Tags which cannot be nested but are typically left unclosed.
$no_nesting = drupal_map_assoc(array('li', 'p'));
// Single use tags in HTML4
$single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
}
// Properly entify angles.
$text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);
// Split tags from text.
$split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting $null as required).
$tag = false; // Odd/even counter. Tag or no tag.
$stack = array();
$output = '';
foreach ($split as $value) {
// Process HTML tags.
if ($tag) {
list($tagname) = explode(' ', strtolower($value), 2);
// Closing tag
if ($tagname{0} == '/') {
$tagname = substr($tagname, 1);
// Discard XHTML closing tags for single use tags.
if (!isset($single_use[$tagname])) {
// See if we possibly have a matching opening tag on the stack.
if (in_array($tagname, $stack)) {
// Close other tags lingering first.
do {
$output .= '</'. $stack[0] .'>';
} while (array_shift($stack) != $tagname);
}
// Otherwise, discard it.
}
}
// Opening tag
else {
// See if we have an identical 'no nesting' tag already open and close it if found.
if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
$output .= '</'. array_shift($stack) .'>';
}
// Push non-single-use tags onto the stack
if (!isset($single_use[$tagname])) {
array_unshift($stack, $tagname);
}
// Add trailing slash to single-use tags as per X(HT)ML.
else {
$value = rtrim($value, ' /') . ' /';
}
$output .= '<'. $value .'>';
}
}
else {
// Passthrough all text.
$output .= $value;
}
$tag = !$tag;
}
// Close remaining tags.
while (count($stack) > 0) {
$output .= '</'. array_shift($stack) .'>';
}
return $output;
}
/**
* Make links out of absolute URLs.
*/
......
......@@ -1209,12 +1209,16 @@ function system_install() {
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 0, 1)");
// Line break filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 1, 2)");
// HTML corrector filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)");
// Full HTML:
// URL filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 2, 0)");
// Line break filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (2, 'filter', 1, 1)");
// HTML corrector filter.
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (1, 'filter', 3, 10)");
db_query("INSERT INTO {variable} (name,value) VALUES ('filter_html_1','i:1;')");
......@@ -4062,6 +4066,30 @@ function system_update_6017() {
return $ret;
}
/**
* Add HTML corrector to HTML formats or replace the old module if it was in use.
*/
function system_update_6018() {
$ret = array();
// Disable htmlcorrector.module, if it exists and replace its filter.
if (module_exists('htmlcorrector')) {
module_disable(array('htmlcorrector'));
$ret[] = update_sql("UPDATE {filter_formats} SET module = 'filter', delta = 3 WHERE module = 'htmlcorrector'");
$ret[] = t('HTML Corrector module was disabled; this functionality has now been added to core.');
return $ret;
}
// Otherwise, find any format with 'HTML' in its name and add the filter at the end.
$result = db_query("SELECT format FROM {filter_formats} WHERE name LIKE '%HTML%'");
while ($format = db_fetch_object($result)) {
$weight = db_result(db_query("SELECT MAX(weight) FROM {filters} WHERE format = %d", $format->format));
db_query("INSERT INTO {filters} (format, module, delta, weight) VALUES (%d, '%s', %d, %d)", $format->format, 'filter', 3, max(10, $weight + 1));
}
return $ret;
}
/**
* @} End of "defgroup updates-5.x-to-6.x"
* The next series of updates should start at 7000.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment