filter.module 46.4 KB
Newer Older
1 2 3
<?php
// $Id$

Dries's avatar
 
Dries committed
4 5 6 7 8
/**
 * @file
 * Framework for handling filtering of content.
 */

9 10 11 12 13 14
/**
 * Special format ID which means "use the default format".
 *
 * This value can be passed to the filter APIs as a format ID: this is
 * equivalent to not passing an explicit format at all.
 */
15 16
define('FILTER_FORMAT_DEFAULT', 0);

Dries's avatar
Dries committed
17 18 19
/**
 * Implementation of hook_help().
 */
20 21
function filter_help($path, $arg) {
  switch ($path) {
22
    case 'admin/help#filter':
23 24 25
      $output = '<p>' . t("The filter module allows administrators to configure text formats for use on your site. A text format defines the HTML tags, codes, and other input allowed in both content and comments, and is a key feature in guarding against potentially damaging input from malicious users. Two formats included by default are <em>Filtered HTML</em> (which allows only an administrator-approved subset of HTML tags) and <em>Full HTML</em> (which allows the full set of HTML tags). Additional formats may be created by an administrator.") . '</p>';
      $output .= '<p>' . t('Each text format uses filters to manipulate text, and most formats apply several different filters to text in a specific order. Each filter is designed for a specific purpose, and generally either adds, removes or transforms elements within user-entered text before it is displayed. A filter does not change the actual content of a post, but instead, modifies it temporarily before it is displayed. A filter may remove unapproved HTML tags, for instance, while another automatically adds HTML to make links referenced in text clickable.') . '</p>';
      $output .= '<p>' . t('Users with access to more than one text format can use the <em>Text format</em> fieldset to choose between available text formats when creating or editing multi-line content. Administrators determine the text formats available to each user role, select a default text format, and control the order of formats listed in the <em>Text format</em> fieldset.') . '</p>';
26
      $output .= '<p>' . t('For more information, see the online handbook entry for <a href="@filter">Filter module</a>.', array('@filter' => 'http://drupal.org/handbook/modules/filter/')) . '</p>';
27
      return $output;
28 29 30
    case 'admin/settings/filter':
      $output = '<p>' . t('Use the list below to review the text formats available to each user role, to select a default text format, and to control the order of formats listed in the <em>Text format</em> fieldset. (The <em>Text format</em> fieldset is displayed below textareas when users with access to more than one text format create multi-line content.) The text format selected as <em>Default</em> is available to all users and, unless another format is selected, is applied to all content. All text formats are available to users in roles with the "administer filters" permission.') . '</p>';
      $output .= '<p>' . t('Since text formats, if available, are presented in the same order as the list below, it may be helpful to arrange the formats in descending order of your preference for their use. To change the order of an text format, grab a drag-and-drop handle under the <em>Name</em> column and drag to a new location in the list. (Grab a handle by clicking and holding the mouse while hovering over a handle icon.) Remember that your changes will not be saved until you click the <em>Save changes</em> button at the bottom of the page.') . '</p>';
31
      return $output;
32 33 34 35 36
    case 'admin/settings/filter/%':
      return '<p>' . t('Every <em>filter</em> performs one particular change on the user input, for example stripping out malicious HTML or making URLs clickable. Choose which filters you want to apply to text in this format. If you notice some filters are causing conflicts in the output, you can <a href="@rearrange">rearrange them</a>.', array('@rearrange' => url('admin/settings/filter/' . $arg[3] . '/order'))) . '</p>';
    case 'admin/settings/filter/%/configure':
      return '<p>' . t('If you cannot find the settings for a certain filter, make sure you have enabled it on the <a href="@url">edit tab</a> first.', array('@url' => url('admin/settings/filter/' . $arg[3]))) . '</p>';
    case 'admin/settings/filter/%/order':
37 38
      $output = '<p>' . t('Because of the flexible filtering system, you might encounter a situation where one filter prevents another from doing its job. For example: a word in an URL gets converted into a glossary term, before the URL can be converted to a clickable link. When this happens, rearrange the order of the filters.') . '</p>';
      $output .= '<p>' . t("Filters are executed from top-to-bottom. To change the order of the filters, modify the values in the <em>Weight</em> column or grab a drag-and-drop handle under the <em>Name</em> column and drag filters to new locations in the list. (Grab a handle by clicking and holding the mouse while hovering over a handle icon.) Remember that your changes will not be saved until you click the <em>Save configuration</em> button at the bottom of the page.") . '</p>';
39
      return $output;
40 41 42
  }
}

43
/**
44
 * Implementation of hook_theme().
45 46 47 48 49
 */
function filter_theme() {
  return array(
    'filter_admin_overview' => array(
      'arguments' => array('form' => NULL),
50
      'file' => 'filter.admin.inc',
51 52 53
    ),
    'filter_admin_order' => array(
      'arguments' => array('form' => NULL),
54
      'file' => 'filter.admin.inc',
55 56
    ),
    'filter_tips' => array(
57
      'arguments' => array('tips' => NULL, 'long' => FALSE),
58
      'file' => 'filter.pages.inc',
59 60 61 62
    ),
    'filter_tips_more_info' => array(
      'arguments' => array(),
    ),
63 64
    'filter_guidelines' => array(
      'arguments' => array('format' => NULL),
65
    ),
66 67 68
  );
}

69 70 71
/**
 * Implementation of hook_menu().
 */
72
function filter_menu() {
73 74
  $items['admin/settings/filter'] = array(
    'title' => 'Text formats',
75
    'description' => 'Configure how content input by users is filtered, including allowed HTML tags. Also allows enabling of module-provided filters.',
76 77 78 79
    'page callback' => 'drupal_get_form',
    'page arguments' => array('filter_admin_overview'),
    'access arguments' => array('administer filters'),
  );
80
  $items['admin/settings/filter/list'] = array(
81
    'title' => 'List',
82 83
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
84 85
  $items['admin/settings/filter/add'] = array(
    'title' => 'Add text format',
86
    'page callback' => 'filter_admin_format_page',
87
    'access arguments' => array('administer filters'),
88 89 90
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
  );
91 92
  $items['admin/settings/filter/delete'] = array(
    'title' => 'Delete text format',
93 94
    'page callback' => 'drupal_get_form',
    'page arguments' => array('filter_admin_delete'),
95
    'access arguments' => array('administer filters'),
96 97 98
    'type' => MENU_CALLBACK,
  );
  $items['filter/tips'] = array(
99
    'title' => 'Compose tips',
100 101 102 103
    'page callback' => 'filter_tips_long',
    'access callback' => TRUE,
    'type' => MENU_SUGGESTED_ITEM,
  );
104
  $items['admin/settings/filter/%filter_format'] = array(
105
    'type' => MENU_CALLBACK,
106 107
    'title callback' => 'filter_admin_format_title',
    'title arguments' => array(3),
108 109
    'page callback' => 'filter_admin_format_page',
    'page arguments' => array(3),
110 111
    'access arguments' => array('administer filters'),
  );
112
  $items['admin/settings/filter/%filter_format/edit'] = array(
113
    'title' => 'Edit',
114 115 116
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => 0,
  );
117
  $items['admin/settings/filter/%filter_format/configure'] = array(
118
    'title' => 'Configure',
119 120
    'page callback' => 'filter_admin_configure_page',
    'page arguments' => array(3),
121
    'access arguments' => array('administer filters'),
122 123 124
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
  );
125
  $items['admin/settings/filter/%filter_format/order'] = array(
126
    'title' => 'Rearrange',
127 128
    'page callback' => 'filter_admin_order_page',
    'page arguments' => array(3),
129
    'access arguments' => array('administer filters'),
130 131 132
    'type' => MENU_LOCAL_TASK,
    'weight' => 2,
  );
133 134 135
  return $items;
}

136 137 138 139
function filter_format_load($arg) {
  return filter_formats($arg);
}

140
/**
141
 * Display a text format form title.
142 143 144 145 146
 */
function filter_admin_format_title($format) {
  return $format->name;
}

147 148 149 150
/**
 * Implementation of hook_perm().
 */
function filter_perm() {
151
  return array(
152 153
    'administer filters' => array(
      'title' => t('Administer filters'),
154
      'description' => t('Manage text formats and filters, and select which roles may use them. %warning', array('%warning' => t('Warning: Give to trusted roles only; this permission has security implications.'))),
155
    ),
156
  );
157 158
}

159 160 161 162 163 164 165 166 167
/**
 * Implementation of hook_cron().
 *
 * Expire outdated filter cache entries
 */
function filter_cron() {
  cache_clear_all(NULL, 'cache_filter');
}

168
/**
169
 * Implementation of hook_filter_tips().
170
 */
171
function filter_filter_tips($delta, $format, $long = FALSE) {
172
  global $base_url;
173 174
  switch ($delta) {
    case 0:
175
      if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
176 177 178 179
        switch ($long) {
          case 0:
            return t('Allowed HTML tags: @tags', array('@tags' => $allowed_html));
          case 1:
180
            $output = '<p>' . t('Allowed HTML tags: @tags', array('@tags' => $allowed_html)) . '</p>';
181 182 183 184
            if (!variable_get("filter_html_help_$format", 1)) {
              return $output;
            }

185 186
            $output .= '<p>' . t('This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.') . '</p>';
            $output .= '<p>' . t('For more information see W3C\'s <a href="@html-specifications">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.', array('@html-specifications' => 'http://www.w3.org/TR/html/')) . '</p>';
187
            $tips = array(
188
              'a' => array( t('Anchors are used to make links to other pages.'), '<a href="' . $base_url . '">' . variable_get('site_name', 'Drupal') . '</a>'),
189
              'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
190 191 192 193 194 195 196 197
              'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>' . t('Paragraph one.') . '</p> <p>' . t('Paragraph two.') . '</p>'),
              'strong' => array( t('Strong'), '<strong>' . t('Strong') . '</strong>'),
              'em' => array( t('Emphasized'), '<em>' . t('Emphasized') . '</em>'),
              'cite' => array( t('Cited'), '<cite>' . t('Cited') . '</cite>'),
              'code' => array( t('Coded text used to show programming source code'), '<code>' . t('Coded') . '</code>'),
              'b' => array( t('Bolded'), '<b>' . t('Bolded') . '</b>'),
              'u' => array( t('Underlined'), '<u>' . t('Underlined') . '</u>'),
              'i' => array( t('Italicized'), '<i>' . t('Italicized') . '</i>'),
198 199
              'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
              'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
200
              'pre' => array( t('Preformatted'), '<pre>' . t('Preformatted') . '</pre>'),
201 202
              'abbr' => array( t('Abbreviation'), t('<abbr title="Abbreviation">Abbrev.</abbr>')),
              'acronym' => array( t('Acronym'), t('<acronym title="Three-Letter Acronym">TLA</acronym>')),
203 204
              'blockquote' => array( t('Block quoted'), '<blockquote>' . t('Block quoted') . '</blockquote>'),
              'q' => array( t('Quoted inline'), '<q>' . t('Quoted inline') . '</q>'),
205
              // Assumes and describes tr, td, th.
206
              'table' => array( t('Table'), '<table> <tr><th>' . t('Table header') . '</th></tr> <tr><td>' . t('Table cell') . '</td></tr> </table>'),
207
              'tr' => NULL, 'td' => NULL, 'th' => NULL,
208 209
              'del' => array( t('Deleted'), '<del>' . t('Deleted') . '</del>'),
              'ins' => array( t('Inserted'), '<ins>' . t('Inserted') . '</ins>'),
210
               // Assumes and describes li.
211 212
              'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>' . t('First item') . '</li> <li>' . t('Second item') . '</li> </ol>'),
              'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>' . t('First item') . '</li> <li>' . t('Second item') . '</li> </ul>'),
213 214
              'li' => NULL,
              // Assumes and describes dt and dd.
215
              'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>' . t('First term') . '</dt> <dd>' . t('First definition') . '</dd> <dt>' . t('Second term') . '</dt> <dd>' . t('Second definition') . '</dd> </dl>'),
216
              'dt' => NULL, 'dd' => NULL,
217 218 219 220 221 222
              'h1' => array( t('Heading'), '<h1>' . t('Title') . '</h1>'),
              'h2' => array( t('Heading'), '<h2>' . t('Subtitle') . '</h2>'),
              'h3' => array( t('Heading'), '<h3>' . t('Subtitle three') . '</h3>'),
              'h4' => array( t('Heading'), '<h4>' . t('Subtitle four') . '</h4>'),
              'h5' => array( t('Heading'), '<h5>' . t('Subtitle five') . '</h5>'),
              'h6' => array( t('Heading'), '<h6>' . t('Subtitle six') . '</h6>')
223 224 225 226 227 228
            );
            $header = array(t('Tag Description'), t('You Type'), t('You Get'));
            preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
            foreach ($out[1] as $tag) {
              if (array_key_exists($tag, $tips)) {
                if ($tips[$tag]) {
Dries's avatar
Dries committed
229
                  $rows[] = array(
230
                    array('data' => $tips[$tag][0], 'class' => 'description'),
231
                    array('data' => '<code>' . check_plain($tips[$tag][1]) . '</code>', 'class' => 'type'),
232
                    array('data' => $tips[$tag][1], 'class' => 'get')
Dries's avatar
Dries committed
233 234 235
                  );
                }
              }
236
              else {
Dries's avatar
Dries committed
237
                $rows[] = array(
238
                  array('data' => t('No help provided for tag %tag.', array('%tag' => $tag)), 'class' => 'description', 'colspan' => 3),
239
                );
Dries's avatar
Dries committed
240
              }
241 242 243
            }
            $output .= theme('table', $header, $rows);

244 245
            $output .= '<p>' . t('Most unusual characters can be directly entered without any problems.') . '</p>';
            $output .= '<p>' . t('If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="@html-entities">entities</a> page. Some of the available characters include:', array('@html-entities' => 'http://www.w3.org/TR/html4/sgml/entities.html')) . '</p>';
246 247 248 249 250 251 252 253 254 255 256 257

            $entities = array(
              array( t('Ampersand'), '&amp;'),
              array( t('Greater than'), '&gt;'),
              array( t('Less than'), '&lt;'),
              array( t('Quotation mark'), '&quot;'),
            );
            $header = array(t('Character Description'), t('You Type'), t('You Get'));
            unset($rows);
            foreach ($entities as $entity) {
              $rows[] = array(
                array('data' => $entity[0], 'class' => 'description'),
258
                array('data' => '<code>' . check_plain($entity[1]) . '</code>', 'class' => 'type'),
259 260 261 262 263
                array('data' => $entity[1], 'class' => 'get')
              );
            }
            $output .= theme('table', $header, $rows);
            return $output;
Dries's avatar
Dries committed
264
        }
Dries's avatar
 
Dries committed
265 266
      }
      break;
267 268

    case 1:
269 270 271 272 273 274
      switch ($long) {
        case 0:
          return t('Lines and paragraphs break automatically.');
        case 1:
          return t('Lines and paragraphs are automatically recognized. The &lt;br /&gt; line break, &lt;p&gt; paragraph and &lt;/p&gt; close paragraph tags are inserted automatically. If paragraphs are not recognized simply add a couple blank lines.');
      }
275
      break;
276

277
    case 2:
278
      return t('Web page addresses and e-mail addresses turn into links automatically.');
279 280 281 282 283
      break;

    case 4:
      return t('No HTML tags allowed');
      break;
284

285 286 287
  }
}

Dries's avatar
Dries committed
288
/**
289
 * Retrieve a list of text formats.
Dries's avatar
Dries committed
290
 */
291
function filter_formats($index = NULL) {
292 293 294
  global $user;
  static $formats;

295
  // Administrators can always use all text formats.
296 297 298 299 300
  $all = user_access('administer filters');

  if (!isset($formats)) {
    $formats = array();

301
    $query = db_select('filter_format', 'f');
302 303 304 305 306 307
    $query->addField('f', 'format', 'format');
    $query->addField('f', 'name', 'name');
    $query->addField('f', 'roles', 'roles');
    $query->addField('f', 'cache', 'cache');
    $query->addField('f', 'weight', 'weight');
    $query->orderBy('weight');
308

309
    // Build query for selecting the format(s) based on the user's roles.
310
    if (!$all) {
311
      $or = db_or()->condition('format', variable_get('filter_default_format', 1));
312
      foreach ($user->roles as $rid => $role) {
313
        $or->condition('roles', '%'. (int)$rid .'%', 'LIKE');
314
      }
315
      $query->condition($or);
316 317
    }

318
    $formats = $query->execute()->fetchAllAssoc('format');
319
  }
320 321 322
  if (isset($index)) {
    return isset($formats[$index]) ? $formats[$index] : FALSE;
  }
323 324
  return $formats;
}
325

326 327 328 329 330 331
/**
 * Build a list of all filters.
 */
function filter_list_all() {
  $filters = array();

332 333 334
  foreach (module_implements('filter') as $module) {
    $function = $module . '_filter';
    $list = $function('list');
335
    if (isset($list) && is_array($list)) {
336
      foreach ($list as $delta => $name) {
337
        $filters[$module . '/' . $delta] = (object)array('module' => $module, 'delta' => $delta, 'name' => $name);
338
      }
Dries's avatar
 
Dries committed
339 340 341
    }
  }

342 343 344 345 346 347 348 349 350 351
  uasort($filters, '_filter_list_cmp');

  return $filters;
}

/**
 * Helper function for sorting the filter list by filter name.
 */
function _filter_list_cmp($a, $b) {
  return strcmp($a->name, $b->name);
Dries's avatar
 
Dries committed
352 353
}

354 355 356 357 358 359
/**
 * Resolve a format id, including the default format.
 */
function filter_resolve_format($format) {
  return $format == FILTER_FORMAT_DEFAULT ? variable_get('filter_default_format', 1) : $format;
}
Dries's avatar
Dries committed
360
/**
361
 * Check if text in a certain text format is allowed to be cached.
Dries's avatar
Dries committed
362
 */
363 364
function filter_format_allowcache($format) {
  static $cache = array();
365
  $format = filter_resolve_format($format);
366
  if (!isset($cache[$format])) {
367
    $cache[$format] = db_result(db_query('SELECT cache FROM {filter_format} WHERE format = %d', $format));
368 369 370 371 372 373 374 375 376 377
  }
  return $cache[$format];
}

/**
 * Retrieve a list of filters for a certain format.
 */
function filter_list_format($format) {
  static $filters = array();

378
  if (!isset($filters[$format])) {
379
    $filters[$format] = array();
380
    $result = db_query("SELECT * FROM {filter} WHERE format = %d ORDER BY weight, module, delta", $format);
381 382
    while ($filter = db_fetch_object($result)) {
      $list = module_invoke($filter->module, 'filter', 'list');
383
      if (isset($list) && is_array($list) && isset($list[$filter->delta])) {
384
        $filter->name = $list[$filter->delta];
385
        $filters[$format][$filter->module . '/' . $filter->delta] = $filter;
Dries's avatar
 
Dries committed
386 387 388 389
      }
    }
  }

390
  return $filters[$format];
391 392
}

393 394
/**
 * @name Filtering functions
Dries's avatar
 
Dries committed
395
 * @{
396 397
 * Modules which need to have content filtered can use these functions to
 * interact with the filter system.
398 399 400 401 402 403 404 405
 *
 * For more info, see the hook_filter() documentation.
 *
 * Note: because filters can inject JavaScript or execute PHP code, security is
 * vital here. When a user supplies a $format, you should validate it with
 * filter_access($format) before accepting/using it. This is normally done in
 * the validation stage of the node system. You should for example never make a
 * preview of content in a disallowed format.
406 407
 */

Dries's avatar
Dries committed
408 409
/**
 * Run all the enabled filters on a piece of text.
410 411 412 413 414 415
 *
 * @param $text
 *    The text to be filtered.
 * @param $format
 *    The format of the text to be filtered. Specify FILTER_FORMAT_DEFAULT for
 *    the default format.
416 417
 * @param $langcode
 *    Optional: the language code of the text to be filtered, e.g. 'en' for
418
 *    English. This allows filters to be language aware so language specific
419
 *    text replacement can be implemented.
420 421 422
 * @param $check
 *    Whether to check the $format with filter_access() first. Defaults to TRUE.
 *    Note that this will check the permissions of the current user, so you
423
 *    should specify $check = FALSE when viewing other people's content. When
424 425
 *    showing content that is not (yet) stored in the database (eg. upon preview),
 *    set to TRUE so the user's permissions are checked.
Dries's avatar
Dries committed
426
 */
427
function check_markup($text, $format = FILTER_FORMAT_DEFAULT, $langcode = '', $check = TRUE) {
428
  // When $check = TRUE, do an access check on $format.
429
  if (isset($text) && (!$check || filter_access($format))) {
430
    $format = filter_resolve_format($format);
431

432
    // Check for a cached version of this piece of text.
433
    $cache_id = $format . ':' . $langcode . ':' . md5($text);
434
    if ($cached = cache_get($cache_id, 'cache_filter')) {
435 436 437
      return $cached->data;
    }

438
    // Convert all Windows and Mac newlines to a single newline,
439
    // so filters only need to deal with one possibility.
440 441
    $text = str_replace(array("\r\n", "\r"), "\n", $text);

442
    // Get a complete list of filters, ordered properly.
443
    $filters = filter_list_format($format);
Dries's avatar
 
Dries committed
444

Dries's avatar
Dries committed
445
    // Give filters the chance to escape HTML-like data such as code or formulas.
446
    foreach ($filters as $filter) {
447
      $text = module_invoke($filter->module, 'filter', 'prepare', $filter->delta, $format, $text, $langcode, $cache_id);
Dries's avatar
 
Dries committed
448
    }
449

450
    // Perform filtering.
451
    foreach ($filters as $filter) {
452
      $text = module_invoke($filter->module, 'filter', 'process', $filter->delta, $format, $text, $langcode, $cache_id);
453 454
    }

455
    // Store in cache with a minimum expiration time of 1 day.
456
    if (filter_format_allowcache($format)) {
457
      cache_set($cache_id, $text, 'cache_filter', REQUEST_TIME + (60 * 60 * 24));
Dries's avatar
Dries committed
458 459 460
    }
  }
  else {
461
    $text = t('n/a');
Dries's avatar
Dries committed
462 463 464 465 466 467 468 469
  }

  return $text;
}

/**
 * Generate a selector for choosing a format in a form.
 *
470
 * @ingroup forms
471
 * @see filter_form_validate()
Dries's avatar
Dries committed
472 473
 * @param $value
 *   The ID of the format that is currently selected.
474
 * @param $weight
475
 *   The weight of the text format.
476
 * @param $parents
477
 *   Required when defining multiple text formats on a single node or having a different parent than 'format'.
Dries's avatar
Dries committed
478 479 480
 * @return
 *   HTML for the form element.
 */
481
function filter_form($value = FILTER_FORMAT_DEFAULT, $weight = NULL, $parents = array('format')) {
Steven Wittens's avatar
Oopsie  
Steven Wittens committed
482
  $value = filter_resolve_format($value);
Dries's avatar
Dries committed
483
  $formats = filter_formats();
484 485 486 487 488

  drupal_add_js('misc/form.js');
  drupal_add_css(drupal_get_path('module', 'filter') . '/filter.css');
  $element_id = form_clean_id('edit-' . implode('-', $parents));

489
  $form = array(
490 491 492
    '#type' => 'fieldset',
    '#weight' => $weight,
    '#attributes' => array('class' => 'filter-wrapper'),
493
  );
494 495 496 497 498 499 500 501 502
  $form['format_guidelines'] = array(
    '#prefix' => '<div id="' . $element_id . '-guidelines" class="filter-guidelines">',
    '#suffix' => '</div>',
    '#weight' => 2,
  );
  foreach ($formats as $format) {
    $options[$format->format] = $format->name;
    $form['format_guidelines'][$format->format] = array(
      '#markup' => theme('filter_guidelines', $format),
503
    );
Dries's avatar
Dries committed
504
  }
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
  $form['format'] = array(
    '#type' => 'select',
    '#title' => t('Text format'),
    '#options' => $options,
    '#default_value' => $value,
    '#parents' => $parents,
    '#access' => count($formats) > 1,
    '#id' => $element_id,
    '#attributes' => array('class' => 'filter-list'),
  );
  $form['format_help'] = array(
    '#prefix' => '<div id="' . $element_id . '-help" class="filter-help">',
    '#markup' => theme('filter_tips_more_info'),
    '#suffix' => '</div>',
    '#weight' => 1,
  );

522
  return $form;
Dries's avatar
Dries committed
523 524 525
}

/**
526
 * Returns TRUE if the user is allowed to access this format.
Dries's avatar
Dries committed
527 528
 */
function filter_access($format) {
529 530
  $format = filter_resolve_format($format);
  if (user_access('administer filters') || ($format == variable_get('filter_default_format', 1))) {
531
    return TRUE;
Dries's avatar
Dries committed
532 533 534 535 536 537
  }
  else {
    $formats = filter_formats();
    return isset($formats[$format]);
  }
}
538

Dries's avatar
Dries committed
539 540 541 542 543 544 545 546
/**
 * @} End of "Filtering functions".
 */


/**
 * Helper function for fetching filter tips.
 */
547
function _filter_tips($format, $long = FALSE) {
Dries's avatar
Dries committed
548 549 550 551
  if ($format == -1) {
    $formats = filter_formats();
  }
  else {
552
    $formats = array(db_fetch_object(db_query("SELECT * FROM {filter_format} WHERE format = %d", $format)));
Dries's avatar
Dries committed
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
  }

  $tips = array();

  foreach ($formats as $format) {
    $filters = filter_list_format($format->format);

    $tips[$format->name] = array();
    foreach ($filters as $id => $filter) {
      if ($tip = module_invoke($filter->module, 'filter_tips', $filter->delta, $format->format, $long)) {
        $tips[$format->name][] = array('tip' => $tip, 'id' => $id);
      }
    }
  }

  return $tips;
}


572 573 574 575 576 577
/**
 * Format a link to the more extensive filter tips.
 *
 * @ingroup themeable
 */
function theme_filter_tips_more_info() {
578 579 580 581 582 583 584 585 586
  return '<p>' . l(t('More information about text formats'), 'filter/tips') . '</p>';
}

/**
 * Format guidelines for a text format.
 *
 * @ingroup themeable
 */
function theme_filter_guidelines($format) {
587
  $name = isset($format->name) ? '<label>' . $format->name . ':</label>' : '';
588
  return '<div id="filter-guidelines-' . $format->format . '" class="filter-guidelines-item">' . $name . theme('filter_tips', _filter_tips($format->format, FALSE)) . '</div>';
589 590
}

Dries's avatar
Dries committed
591 592 593 594 595 596 597 598 599 600 601 602
/**
 * @name Standard filters
 * @{
 * Filters implemented by the filter.module.
 */

/**
 * Implementation of hook_filter(). Contains a basic set of essential filters.
 * - HTML filter:
 *     Validates user-supplied HTML, transforming it as necessary.
 * - Line break converter:
 *     Converts newlines into paragraph and break tags.
603 604
 * - URL and e-mail address filter:
 *     Converts newlines into paragraph and break tags.
Dries's avatar
Dries committed
605 606 607 608
 */
function filter_filter($op, $delta = 0, $format = -1, $text = '') {
  switch ($op) {
    case 'list':
609
      return array(0 => t('Limit allowed HTML tags'), 1 => t('Convert line breaks'), 2 => t('Convert URLs into links'), 3 => t('Correct broken HTML'), 4 => t('Escape all HTML'));
Dries's avatar
Dries committed
610 611 612 613

    case 'description':
      switch ($delta) {
        case 0:
614
          return t('Allows you to restrict the HTML tags the user can use. It will also remove harmful content such as JavaScript events, JavaScript URLs and CSS styles from those tags that are not removed.');
Dries's avatar
Dries committed
615
        case 1:
616
          return t('Converts line breaks into HTML (i.e. &lt;br&gt; and &lt;p&gt;) tags.');
617
        case 2:
618
          return t('Turns web and e-mail addresses into clickable links.');
619 620
        case 3:
          return t('Corrects faulty and chopped off HTML in postings.');
621 622
        case 4:
          return t('Escapes all HTML tags, so they will be visible instead of being effective.');
Dries's avatar
Dries committed
623 624 625 626 627 628 629 630 631 632
        default:
          return;
      }

    case 'process':
      switch ($delta) {
        case 0:
          return _filter_html($text, $format);
        case 1:
          return _filter_autop($text);
633
        case 2:
634
          return _filter_url($text, $format);
635 636
        case 3:
          return _filter_htmlcorrector($text);
637 638
        case 4:
          return trim(check_plain($text));
Dries's avatar
Dries committed
639 640 641 642 643 644 645 646
        default:
          return $text;
      }

    case 'settings':
      switch ($delta) {
        case 0:
          return _filter_html_settings($format);
647
        case 2:
648
          return _filter_url_settings($format);
Dries's avatar
Dries committed
649 650 651 652 653 654 655 656 657 658 659 660 661
        default:
          return;
      }

    default:
      return $text;
  }
}

/**
 * Settings for the HTML filter.
 */
function _filter_html_settings($format) {
Dries's avatar
-Patch  
Dries committed
662 663 664 665 666 667 668 669
  $form['filter_html'] = array(
    '#type' => 'fieldset',
    '#title' => t('HTML filter'),
    '#collapsible' => TRUE,
  );
  $form['filter_html']["allowed_html_$format"] = array(
    '#type' => 'textfield',
    '#title' => t('Allowed HTML tags'),
670
    '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd>'),
Dries's avatar
-Patch  
Dries committed
671
    '#size' => 64,
672
    '#maxlength' => 1024,
673
    '#description' => t('Specify a list of tags which should not be stripped. (Note that JavaScript event attributes are always stripped.)'),
Dries's avatar
-Patch  
Dries committed
674 675 676 677 678 679 680 681 682 683 684 685 686
  );
  $form['filter_html']["filter_html_help_$format"] = array(
    '#type' => 'checkbox',
    '#title' => t('Display HTML help'),
    '#default_value' => variable_get("filter_html_help_$format", 1),
    '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'),
  );
  $form['filter_html']["filter_html_nofollow_$format"] = array(
    '#type' => 'checkbox',
    '#title' => t('Spam link deterrent'),
    '#default_value' => variable_get("filter_html_nofollow_$format", FALSE),
    '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'),
  );
687
  return $form;
Dries's avatar
Dries committed
688 689 690 691 692 693
}

/**
 * HTML filter. Provides filtering of input into accepted HTML.
 */
function _filter_html($text, $format) {
694
  $allowed_tags = preg_split('/\s+|<|>/', variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY);
695
  $text = filter_xss($text, $allowed_tags);
Dries's avatar
Dries committed
696 697 698 699 700 701 702 703

  if (variable_get("filter_html_nofollow_$format", FALSE)) {
    $text = preg_replace('/<a([^>]+)>/i', '<a\\1 rel="nofollow">', $text);
  }

  return trim($text);
}

704 705 706 707 708 709 710 711 712
/**
 * Settings for URL filter.
 */
function _filter_url_settings($format) {
  $form['filter_urlfilter'] = array(
    '#type' => 'fieldset',
    '#title' => t('URL filter'),
    '#collapsible' => TRUE,
  );
713
  $form['filter_urlfilter']['filter_url_length_' . $format] = array(
714 715
    '#type' => 'textfield',
    '#title' => t('Maximum link text length'),
716
    '#default_value' => variable_get('filter_url_length_' . $format, 72),
717
    '#maxlength' => 4,
drumm's avatar
drumm committed
718
    '#description' => t('URLs longer than this number of characters will be truncated to prevent long strings that break formatting. The link itself will be retained; just the text portion of the link will be truncated.'),
719 720 721 722 723 724 725 726 727 728
  );
  return $form;
}

/**
 * URL filter. Automatically converts text web addresses (URLs, e-mail addresses,
 * ftp links, etc.) into hyperlinks.
 */
function _filter_url($text, $format) {
  // Pass length to regexp callback
729
  _filter_url_trim(NULL, variable_get('filter_url_length_' . $format, 72));
730

731
  $text = ' ' . $text . ' ';
732 733

  // Match absolute URLs.
734
  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
735

736
  // Match e-mail addresses.
737
  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
738 739

  // Match www domains/addresses.
740
  $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
741 742 743 744 745
  $text = substr($text, 1, -1);

  return $text;
}

746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
/**
 * Scan input and make sure that all HTML tags are properly closed and nested.
 */
function _filter_htmlcorrector($text) {
  // Prepare tag lists.
  static $no_nesting, $single_use;
  if (!isset($no_nesting)) {
    // Tags which cannot be nested but are typically left unclosed.
    $no_nesting = drupal_map_assoc(array('li', 'p'));

    // Single use tags in HTML4
    $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
  }

  // Properly entify angles.
  $text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);

  // Split tags from text.
  $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting $null as required).

768
  $tag = FALSE; // Odd/even counter. Tag or no tag.
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
  $stack = array();
  $output = '';
  foreach ($split as $value) {
    // Process HTML tags.
    if ($tag) {
      list($tagname) = explode(' ', strtolower($value), 2);
      // Closing tag
      if ($tagname{0} == '/') {
        $tagname = substr($tagname, 1);
        // Discard XHTML closing tags for single use tags.
        if (!isset($single_use[$tagname])) {
          // See if we possibly have a matching opening tag on the stack.
          if (in_array($tagname, $stack)) {
            // Close other tags lingering first.
            do {
784
              $output .= '</' . $stack[0] . '>';
785 786 787 788 789 790 791 792 793
            } while (array_shift($stack) != $tagname);
          }
          // Otherwise, discard it.
        }
      }
      // Opening tag
      else {
        // See if we have an identical 'no nesting' tag already open and close it if found.
        if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
794
          $output .= '</' . array_shift($stack) . '>';
795 796 797 798 799 800 801
        }
        // Push non-single-use tags onto the stack
        if (!isset($single_use[$tagname])) {
          array_unshift($stack, $tagname);
        }
        // Add trailing slash to single-use tags as per X(HT)ML.
        else {
802
          $value = rtrim($value, ' /') . ' /';
803
        }
804
        $output .= '<' . $value . '>';
805 806 807 808 809 810 811 812 813 814
      }
    }
    else {
      // Passthrough all text.
      $output .= $value;
    }
    $tag = !$tag;
  }
  // Close remaining tags.
  while (count($stack) > 0) {
815
    $output .= '</' . array_shift($stack) . '>';
816 817 818 819
  }
  return $output;
}

820
/**
821
 * Make links out of absolute URLs.
822 823 824 825 826
 */
function _filter_url_parse_full_links($match) {
  $match[2] = decode_entities($match[2]);
  $caption = check_plain(_filter_url_trim($match[2]));
  $match[2] = check_url($match[2]);
827
  return $match[1] . '<a href="' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[5];
828 829 830 831 832 833 834 835 836
}

/**
 * Make links out of domain names starting with "www."
 */
function _filter_url_parse_partial_links($match) {
  $match[2] = decode_entities($match[2]);
  $caption = check_plain(_filter_url_trim($match[2]));
  $match[2] = check_plain($match[2]);
837
  return $match[1] . '<a href="http://' . $match[2] . '" title="' . $match[2] . '">' . $caption . '</a>' . $match[3];
838 839 840 841 842 843 844 845 846 847 848
}

/**
 * Shortens long URLs to http://www.example.com/long/url...
 */
function _filter_url_trim($text, $length = NULL) {
  static $_length;
  if ($length !== NULL) {
    $_length = $length;
  }

849 850
  // Use +3 for '...' string length.
  if (strlen($text) > $_length + 3) {
851
    $text = substr($text, 0, $_length) . '...';
852 853 854 855 856
  }

  return $text;
}

Dries's avatar
Dries committed
857 858 859 860 861
/**
 * Convert line breaks into <p> and <br> in an intelligent fashion.
 * Based on: http://photomatt.net/scripts/autop
 */
function _filter_autop($text) {
862
  // All block level tags
863
  $block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)';
Dries's avatar
Dries committed
864

Dries's avatar
Dries committed
865 866 867 868
  // Split at <pre>, <script>, <style> and </pre>, </script>, </style> tags.
  // We don't apply any processing to the contents of these tags to avoid messing
  // up code. We look for matched pairs and allow basic nesting. For example:
  // "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
869
  $chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
Dries's avatar
Dries committed
870 871
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting NULL as required).
872
  $ignore = FALSE;
Dries's avatar
Dries committed
873 874 875 876 877
  $ignoretag = '';
  $output = '';
  foreach ($chunks as $i => $chunk) {
    if ($i % 2) {
      // Opening or closing tag?
878
      $open = ($chunk[1] != '/');
879
      list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2);
Dries's avatar
Dries committed
880 881
      if (!$ignore) {
        if ($open) {
882
          $ignore = TRUE;
Dries's avatar
Dries committed
883 884 885 886
          $ignoretag = $tag;
        }
      }
      // Only allow a matching tag to close it.
887
      elseif (!$open && $ignoretag == $tag) {
888
        $ignore = FALSE;
Dries's avatar
Dries committed
889 890 891
        $ignoretag = '';
      }
    }
892
    elseif (!$ignore) {
893
      $chunk = preg_replace('|\n*$|', '', $chunk) . "\n\n"; // just to make things a little easier, pad the end
Dries's avatar
Dries committed
894
      $chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
895 896
      $chunk = preg_replace('!(<' . $block . '[^>]*>)!', "\n$1", $chunk); // Space things out a little
      $chunk = preg_replace('!(</' . $block . '>)!', "$1\n\n", $chunk); // Space things out a little
Dries's avatar
Dries committed
897 898 899 900 901
      $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates
      $chunk = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "<p>$1</p>\n", $chunk); // make paragraphs, including one at the end
      $chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem with nested lists
      $chunk = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $chunk);
      $chunk = str_replace('</blockquote></p>', '</p></blockquote>', $chunk);
902
      $chunk = preg_replace('|<p>\s*</p>\n?|', '', $chunk); // under certain strange conditions it could create a P of entirely whitespace
903 904
      $chunk = preg_replace('!<p>\s*(</?' . $block . '[^>]*>)!', "$1", $chunk);
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*</p>!', "$1", $chunk);
Dries's avatar
Dries committed
905
      $chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); // make line breaks
906
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*<br />!', "$1", $chunk);
Dries's avatar
Dries committed
907
      $chunk = preg_replace('!<br />(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
908
      $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1', $chunk);
Dries's avatar
Dries committed
909 910 911 912 913 914
    }
    $output .= $chunk;
  }
  return $output;
}

915 916 917
/**
 * Very permissive XSS/HTML filter for admin-only use.
 *
Dries's avatar
Dries committed
918
 * Use only for fields where it is impractical to use the
919 920 921 922
 * whole filter system, but where some (mainly inline) mark-up
 * is desired (so check_plain() is not acceptable).
 *
 * Allows all tags that can be used inside an HTML body, save
Dries's avatar
Dries committed
923
 * for scripts and styles.
924 925
 */
function filter_xss_admin($string) {
926
  return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'ol', 'p', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
927 928
}

Dries's avatar
Dries committed
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
/**
 * Filters XSS. Based on kses by Ulf Harnhammar, see
 * http://sourceforge.net/projects/kses
 *
 * For examples of various XSS attacks, see:
 * http://ha.ckers.org/xss.html
 *
 * This code does four things:
 * - Removes characters and constructs that can trick browsers
 * - Makes sure all HTML entities are well-formed
 * - Makes sure all HTML tags and attributes are well-formed
 * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
 *
 * @param $string
 *   The string with raw HTML in it. It will be stripped of everything that can cause
 *   an XSS attack.
Dries's avatar
Dries committed
945 946
 * @param $allowed_tags
 *   An array of allowed tags.
Dries's avatar
Dries committed
947
 */
948
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'blockquote', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
Gábor Hojtsy's avatar
Gábor Hojtsy committed
949 950 951 952 953
  // Only operate on valid UTF-8 strings. This is necessary to prevent cross
  // site scripting issues on Internet Explorer 6.
  if (!drupal_validate_utf8($string)) {
    return '';
  }
954
  // Store the text format
Dries's avatar
Dries committed
955
  _filter_xss_split($allowed_tags, TRUE);
956
  // Remove NULL characters (ignored by some browsers)
Dries's avatar
Dries committed
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
  $string = str_replace(chr(0), '', $string);
  // Remove Netscape 4 JS entities
  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);

  // Defuse all HTML entities
  $string = str_replace('&', '&amp;', $string);
  // Change back only well-formed entities in our whitelist
  // Named entities
  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
  // Decimal numeric entities
  $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
  // Hexadecimal numeric entities
  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);

  return preg_replace_callback('%
    (
973
    <(?=[^a-zA-Z!/])  # a lone <
974
    |                 # or