filter.module 44.9 KB
Newer Older
1 2 3
<?php
// $Id$

Dries's avatar
 
Dries committed
4 5 6 7 8
/**
 * @file
 * Framework for handling filtering of content.
 */

9 10 11
// This is a special format ID which means "use the default format". This value
// can be passed to the filter APIs as a format ID: this is equivalent to not
// passing an explicit format at all.
12 13
define('FILTER_FORMAT_DEFAULT', 0);

14 15 16
define('FILTER_HTML_STRIP', 1);
define('FILTER_HTML_ESCAPE', 2);

Dries's avatar
Dries committed
17 18 19
/**
 * Implementation of hook_help().
 */
20 21
function filter_help($path, $arg) {
  switch ($path) {
22
    case 'admin/help#filter':
23
      $output = '<p>'. t("The filter module allows administrators to configure  text input formats for the site. For example, an administrator may want a filter to strip out malicious HTML from user's comments. Administrators may also want to make URLs linkable even if they are only entered in an unlinked format.") .'</p>';
24
      $output .= '<p>'. t('Users can choose between the available input formats when creating or editing content. Administrators can configure which input formats are available to which user roles, as well as choose a default input format. Administrators can also create new input formats. Each input format can be configured to use a selection of filters.') .'</p>';
25
      $output .= '<p>'. t('For more information please read the configuration and customization handbook <a href="@filter">Filter page</a>.', array('@filter' => 'http://drupal.org/handbook/modules/filter/')) .'</p>';
26
      return $output;
27
    case 'admin/settings/filters':
28
      return t('
Dries's avatar
 
Dries committed
29
<p><em>Input formats</em> define a way of processing user-supplied text in Drupal. Every input format has its own settings of which <em>filters</em> to apply. Possible filters include stripping out malicious HTML and making URLs clickable.</p>
30
<p>Users can choose between the available input formats when submitting content.</p>
31
<p>Below you can configure which input formats are available to which roles, as well as choose a default input format (used for imported content, for example).</p>
32
<p>Note that (1) the default format is always available to all roles, and (2) all input formats can always be used by roles with the "administer filters" permission even if they are not explicitly listed in the Roles column of the table below.</p>');
33

34
    case 'admin/settings/filters/%':
35
      return t('
Dries's avatar
 
Dries committed
36
<p>Every <em>filter</em> performs one particular change on the user input, for example stripping out malicious HTML or making URLs clickable. Choose which filters you want to apply to text in this input format.</p>
37
<p>If you notice some filters are causing conflicts in the output, you can <a href="@rearrange">rearrange them</a>.</p>', array('@rearrange' => url('admin/settings/filters/'. $arg[3] .'/order')));
38

39 40
    case 'admin/settings/filters/%/configure':
      return '<p>'. t('If you cannot find the settings for a certain filter, make sure you have enabled it on the <a href="@url">view tab</a> first.', array('@url' => url('admin/settings/filters/'. $arg[3]))) .'</p>';
41

42
    case 'admin/settings/filters/%/order':
43
      return t('
Dries's avatar
 
Dries committed
44
<p>Because of the flexible filtering system, you might encounter a situation where one filter prevents another from doing its job. For example: a word in an URL gets converted into a glossary term, before the URL can be converted in a clickable link. When this happens, you will need to rearrange the order in which filters get executed.</p>
45
<p>Filters are executed from top-to-bottom. You can use the weight column to rearrange them: heavier filters "sink" to the bottom.</p>');
46 47 48
  }
}

49 50 51 52 53 54 55
/**
 * Implementation of hook_theme()
 */
function filter_theme() {
  return array(
    'filter_admin_overview' => array(
      'arguments' => array('form' => NULL),
56
      'file' => 'filter.admin.inc',
57 58 59
    ),
    'filter_admin_order' => array(
      'arguments' => array('form' => NULL),
60
      'file' => 'filter.admin.inc',
61 62 63
    ),
    'filter_tips' => array(
      'arguments' => array('tips' => NULL, 'long' => FALSE, 'extra' => ''),
64
      'file' => 'filter.pages.inc',
65 66 67 68 69 70 71
    ),
    'filter_tips_more_info' => array(
      'arguments' => array(),
    ),
  );
}

72 73 74
/**
 * Implementation of hook_menu().
 */
75 76
function filter_menu() {
  $items['admin/settings/filters'] = array(
77 78
    'title' => 'Input formats',
    'description' => 'Configure how content input by users is filtered, including allowed HTML tags. Also allows enabling of module-provided filters.',
79 80 81
    'page callback' => 'drupal_get_form',
    'page arguments' => array('filter_admin_overview'),
    'access arguments' => array('administer filters'),
82
    'file' => 'filter.admin.inc',
83 84
  );
  $items['admin/settings/filters/list'] = array(
85
    'title' => 'List',
86 87 88
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
  $items['admin/settings/filters/add'] = array(
89
    'title' => 'Add input format',
90
    'page callback' => 'filter_admin_format_page',
91 92
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
93
    'file' => 'filter.admin.inc',
94 95
  );
  $items['admin/settings/filters/delete'] = array(
96
    'title' => 'Delete input format',
97 98 99
    'page callback' => 'drupal_get_form',
    'page arguments' => array('filter_admin_delete'),
    'type' => MENU_CALLBACK,
100
    'file' => 'filter.admin.inc',
101 102
  );
  $items['filter/tips'] = array(
103
    'title' => 'Compose tips',
104 105 106
    'page callback' => 'filter_tips_long',
    'access callback' => TRUE,
    'type' => MENU_SUGGESTED_ITEM,
107
    'file' => 'filter.pages.inc',
108
  );
109
  $items['admin/settings/filters/%filter_format'] = array(
110
    'type' => MENU_CALLBACK,
111 112
    'page callback' => 'filter_admin_format_page',
    'page arguments' => array(3),
113
    'access arguments' => array('administer filters'),
114
    'file' => 'filter.admin.inc',
115
  );
116

117 118
  $items['admin/settings/filters/%filter_format/edit'] = array(
    'title' => 'Edit',
119 120
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => 0,
121
    'file' => 'filter.admin.inc',
122
  );
123
  $items['admin/settings/filters/%filter_format/configure'] = array(
124
    'title' => 'Configure',
125 126
    'page callback' => 'filter_admin_configure_page',
    'page arguments' => array(3),
127 128
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
129
    'file' => 'filter.admin.inc',
130
  );
131
  $items['admin/settings/filters/%filter_format/order'] = array(
132
    'title' => 'Rearrange',
133 134
    'page callback' => 'filter_admin_order_page',
    'page arguments' => array(3),
135 136
    'type' => MENU_LOCAL_TASK,
    'weight' => 2,
137
    'file' => 'filter.admin.inc',
138
  );
139 140 141
  return $items;
}

142 143 144 145
function filter_format_load($arg) {
  return filter_formats($arg);
}

146 147 148 149 150 151 152
/**
 * Implementation of hook_perm().
 */
function filter_perm() {
  return array('administer filters');
}

153 154 155 156 157 158 159 160 161
/**
 * Implementation of hook_cron().
 *
 * Expire outdated filter cache entries
 */
function filter_cron() {
  cache_clear_all(NULL, 'cache_filter');
}

162
/**
163
 * Implementation of hook_filter_tips().
164
 */
165
function filter_filter_tips($delta, $format, $long = FALSE) {
166
  global $base_url;
167 168
  switch ($delta) {
    case 0:
Dries's avatar
Dries committed
169 170 171 172
      if (variable_get("filter_html_$format", FILTER_HTML_STRIP) ==  FILTER_HTML_STRIP) {
        if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
          switch ($long) {
            case 0:
173
              return t('Allowed HTML tags: @tags', array('@tags' => $allowed_html));
Dries's avatar
Dries committed
174
            case 1:
175
              $output = '<p>'. t('Allowed HTML tags: @tags', array('@tags' => $allowed_html)) .'</p>';
Dries's avatar
Dries committed
176 177 178
              if (!variable_get("filter_html_help_$format", 1)) {
                return $output;
              }
179

Dries's avatar
Dries committed
180
              $output .= t('
181
<p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
182
<p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
Dries's avatar
Dries committed
183
              $tips = array(
184
                'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'Drupal') .'</a>'),
Dries's avatar
Dries committed
185 186
                'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
                'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
187
                'strong' => array( t('Strong'), '<strong>'. t('Strong') .'</strong>'),
Dries's avatar
Dries committed
188 189 190 191 192 193 194 195 196
                'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
                'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
                'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
                'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
                'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
                'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
                'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
                'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
                'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
197 198
                'abbr' => array( t('Abbreviation'), t('<abbr title="Abbreviation">Abbrev.</abbr>')),
                'acronym' => array( t('Acronym'), t('<acronym title="Three-Letter Acronym">TLA</acronym>')),
Dries's avatar
Dries committed
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
                'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
                'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
                // Assumes and describes tr, td, th.
                'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
                'tr' => NULL, 'td' => NULL, 'th' => NULL,
                'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
                'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
                 // Assumes and describes li.
                'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
                'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
                'li' => NULL,
                // Assumes and describes dt and dd.
                'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
                'dt' => NULL, 'dd' => NULL,
                'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
                'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
                'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
                'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
                'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
                'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
              );
              $header = array(t('Tag Description'), t('You Type'), t('You Get'));
              preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
              foreach ($out[1] as $tag) {
                if (array_key_exists($tag, $tips)) {
                  if ($tips[$tag]) {
225
                    $rows[] = array(
Dries's avatar
Dries committed
226 227 228
                      array('data' => $tips[$tag][0], 'class' => 'description'),
                      array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
                      array('data' => $tips[$tag][1], 'class' => 'get')
229 230 231
                    );
                  }
                }
Dries's avatar
Dries committed
232 233
                else {
                  $rows[] = array(
234
                    array('data' => t('No help provided for tag %tag.', array('%tag' => $tag)), 'class' => 'description', 'colspan' => 3),
Dries's avatar
Dries committed
235 236 237 238
                  );
                }
              }
              $output .= theme('table', $header, $rows);
239

Dries's avatar
Dries committed
240
              $output .= t('
241
<p>Most unusual characters can be directly entered without any problems.</p>
242
<p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
Dries's avatar
Dries committed
243 244 245 246 247 248 249 250 251 252 253 254 255
              $entities = array(
                array( t('Ampersand'), '&amp;'),
                array( t('Greater than'), '&gt;'),
                array( t('Less than'), '&lt;'),
                array( t('Quotation mark'), '&quot;'),
              );
              $header = array(t('Character Description'), t('You Type'), t('You Get'));
              unset($rows);
              foreach ($entities as $entity) {
                $rows[] = array(
                  array('data' => $entity[0], 'class' => 'description'),
                  array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
                  array('data' => $entity[1], 'class' => 'get')
256
                );
Dries's avatar
Dries committed
257 258 259
              }
              $output .= theme('table', $header, $rows);
              return $output;
Dries's avatar
 
Dries committed
260
          }
Dries's avatar
Dries committed
261 262
        }
        else {
Dries's avatar
Dries committed
263
          return t('No HTML tags allowed');
Dries's avatar
Dries committed
264
        }
Dries's avatar
 
Dries committed
265 266
      }
      break;
267 268

    case 1:
269 270 271 272 273 274
      switch ($long) {
        case 0:
          return t('Lines and paragraphs break automatically.');
        case 1:
          return t('Lines and paragraphs are automatically recognized. The &lt;br /&gt; line break, &lt;p&gt; paragraph and &lt;/p&gt; close paragraph tags are inserted automatically. If paragraphs are not recognized simply add a couple blank lines.');
      }
275 276
      break;
    case 2:
277
      return t('Web page addresses and e-mail addresses turn into links automatically.');
278 279 280
  }
}

Dries's avatar
Dries committed
281
/**
282
 * Retrieve a list of input formats.
Dries's avatar
Dries committed
283
 */
284
function filter_formats($index = NULL) {
285 286 287 288 289 290 291 292 293
  global $user;
  static $formats;

  // Administrators can always use all input formats.
  $all = user_access('administer filters');

  if (!isset($formats)) {
    $formats = array();

294
    $query = 'SELECT * FROM {filter_formats}';
295

296
    // Build query for selecting the format(s) based on the user's roles.
297
    $args = array();
298 299 300 301
    if (!$all) {
      $where = array();
      foreach ($user->roles as $rid => $role) {
        $where[] = "roles LIKE '%%,%d,%%'";
302
        $args[] = $rid;
303
      }
304
      $query .= ' WHERE '. implode(' OR ', $where) .' OR format = %d';
305
      $args[] = variable_get('filter_default_format', 1);
306 307
    }

308
    $result = db_query($query, $args);
309 310 311 312
    while ($format = db_fetch_object($result)) {
      $formats[$format->format] = $format;
    }
  }
313 314 315
  if (isset($index)) {
    return isset($formats[$index]) ? $formats[$index] : FALSE;
  }
316 317
  return $formats;
}
318

319 320 321 322 323 324 325 326
/**
 * Build a list of all filters.
 */
function filter_list_all() {
  $filters = array();

  foreach (module_list() as $module) {
    $list = module_invoke($module, 'filter', 'list');
327
    if (isset($list) && is_array($list)) {
328 329 330
      foreach ($list as $delta => $name) {
        $filters[$module .'/'. $delta] = (object)array('module' => $module, 'delta' => $delta, 'name' => $name);
      }
Dries's avatar
 
Dries committed
331 332 333
    }
  }

334 335 336 337 338 339 340 341 342 343
  uasort($filters, '_filter_list_cmp');

  return $filters;
}

/**
 * Helper function for sorting the filter list by filter name.
 */
function _filter_list_cmp($a, $b) {
  return strcmp($a->name, $b->name);
Dries's avatar
 
Dries committed
344 345
}

346 347 348 349 350 351
/**
 * Resolve a format id, including the default format.
 */
function filter_resolve_format($format) {
  return $format == FILTER_FORMAT_DEFAULT ? variable_get('filter_default_format', 1) : $format;
}
Dries's avatar
Dries committed
352
/**
353
 * Check if text in a certain input format is allowed to be cached.
Dries's avatar
Dries committed
354
 */
355 356
function filter_format_allowcache($format) {
  static $cache = array();
357
  $format = filter_resolve_format($format);
358 359 360 361 362 363 364 365 366 367 368 369
  if (!isset($cache[$format])) {
    $cache[$format] = db_result(db_query('SELECT cache FROM {filter_formats} WHERE format = %d', $format));
  }
  return $cache[$format];
}

/**
 * Retrieve a list of filters for a certain format.
 */
function filter_list_format($format) {
  static $filters = array();

370
  if (!isset($filters[$format])) {
371 372 373 374
    $filters[$format] = array();
    $result = db_query("SELECT * FROM {filters} WHERE format = %d ORDER BY weight ASC", $format);
    while ($filter = db_fetch_object($result)) {
      $list = module_invoke($filter->module, 'filter', 'list');
375
      if (isset($list) && is_array($list) && isset($list[$filter->delta])) {
376 377
        $filter->name = $list[$filter->delta];
        $filters[$format][$filter->module .'/'. $filter->delta] = $filter;
Dries's avatar
 
Dries committed
378 379 380 381
      }
    }
  }

382
  return $filters[$format];
383 384
}

385 386
/**
 * @name Filtering functions
Dries's avatar
 
Dries committed
387
 * @{
388 389
 * Modules which need to have content filtered can use these functions to
 * interact with the filter system.
390 391 392 393 394 395 396 397
 *
 * For more info, see the hook_filter() documentation.
 *
 * Note: because filters can inject JavaScript or execute PHP code, security is
 * vital here. When a user supplies a $format, you should validate it with
 * filter_access($format) before accepting/using it. This is normally done in
 * the validation stage of the node system. You should for example never make a
 * preview of content in a disallowed format.
398 399
 */

Dries's avatar
Dries committed
400 401
/**
 * Run all the enabled filters on a piece of text.
402 403 404 405 406 407 408 409 410
 *
 * @param $text
 *    The text to be filtered.
 * @param $format
 *    The format of the text to be filtered. Specify FILTER_FORMAT_DEFAULT for
 *    the default format.
 * @param $check
 *    Whether to check the $format with filter_access() first. Defaults to TRUE.
 *    Note that this will check the permissions of the current user, so you
411
 *    should specify $check = FALSE when viewing other people's content. When
412 413
 *    showing content that is not (yet) stored in the database (eg. upon preview),
 *    set to TRUE so the user's permissions are checked.
Dries's avatar
Dries committed
414
 */
415
function check_markup($text, $format = FILTER_FORMAT_DEFAULT, $check = TRUE) {
416
  // When $check = TRUE, do an access check on $format.
417
  if (isset($text) && (!$check || filter_access($format))) {
418
    $format = filter_resolve_format($format);
419

420
    // Check for a cached version of this piece of text.
421 422
    $id = $format .':'. md5($text);
    if ($cached = cache_get($id, 'cache_filter')) {
423 424 425
      return $cached->data;
    }

426
    // See if caching is allowed for this format.
427
    $cache = filter_format_allowcache($format);
428 429

    // Convert all Windows and Mac newlines to a single newline,
430
    // so filters only need to deal with one possibility.
431 432
    $text = str_replace(array("\r\n", "\r"), "\n", $text);

433
    // Get a complete list of filters, ordered properly.
434
    $filters = filter_list_format($format);
Dries's avatar
 
Dries committed
435

Dries's avatar
Dries committed
436
    // Give filters the chance to escape HTML-like data such as code or formulas.
437 438
    foreach ($filters as $filter) {
      $text = module_invoke($filter->module, 'filter', 'prepare', $filter->delta, $format, $text);
Dries's avatar
 
Dries committed
439
    }
440

441
    // Perform filtering.
442 443
    foreach ($filters as $filter) {
      $text = module_invoke($filter->module, 'filter', 'process', $filter->delta, $format, $text);
444 445
    }

446
    // Store in cache with a minimum expiration time of 1 day.
Dries's avatar
Dries committed
447
    if ($cache) {
448
      cache_set($id, $text, 'cache_filter', time() + (60 * 60 * 24));
Dries's avatar
Dries committed
449 450 451
    }
  }
  else {
452
    $text = t('n/a');
Dries's avatar
Dries committed
453 454 455 456 457 458 459 460
  }

  return $text;
}

/**
 * Generate a selector for choosing a format in a form.
 *
461 462
 * @ingroup forms
 * @see filter_form_validate().
Dries's avatar
Dries committed
463 464
 * @param $value
 *   The ID of the format that is currently selected.
465 466 467 468
 * @param $weight
 *   The weight of the input format.
 * @param $parents
 *   Required when defining multiple input formats on a single node or having a different parent than 'format'.
Dries's avatar
Dries committed
469 470 471
 * @return
 *   HTML for the form element.
 */
472
function filter_form($value = FILTER_FORMAT_DEFAULT, $weight = NULL, $parents = array('format')) {
Steven Wittens's avatar
Oopsie  
Steven Wittens committed
473
  $value = filter_resolve_format($value);
Dries's avatar
Dries committed
474 475
  $formats = filter_formats();

476
  $extra = theme('filter_tips_more_info');
Dries's avatar
Dries committed
477 478

  if (count($formats) > 1) {
479
    $form = array(
480 481 482 483 484
      '#type' => 'fieldset',
      '#title' => t('Input format'),
      '#collapsible' => TRUE,
      '#collapsed' => TRUE,
      '#weight' => $weight,
485
      '#element_validate' => array('filter_form_validate'),
486
    );
Dries's avatar
Dries committed
487 488
    // Multiple formats available: display radio buttons with tips.
    foreach ($formats as $format) {
489 490 491
      // Generate the parents as the autogenerator does, so we will have a
      // unique id for each radio button.
      $parents_for_id = array_merge($parents, array($format->format));
492
      $form[$format->format] = array(
493
        '#type' => 'radio',
494 495 496
        '#title' => $format->name,
        '#default_value' => $value,
        '#return_value' => $format->format,
497
        '#parents' => $parents,
498
        '#description' => theme('filter_tips', _filter_tips($format->format, FALSE)),
499
        '#id' => form_clean_id('edit-'. implode('-', $parents_for_id)),
500
      );
Dries's avatar
Dries committed
501 502 503 504 505
    }
  }
  else {
    // Only one format available: use a hidden form item and only show tips.
    $format = array_shift($formats);
506
    $form[$format->format] = array('#type' => 'value', '#value' => $format->format, '#parents' => $parents);
507
    $tips = _filter_tips(variable_get('filter_default_format', 1), FALSE);
508
    $form['format']['guidelines'] = array(
509
      '#title' => t('Formatting guidelines'),
510
      '#value' => theme('filter_tips', $tips, FALSE, $extra),
511
    );
Dries's avatar
Dries committed
512
  }
513
  $form[] = array('#value' => $extra);
514
  return $form;
Dries's avatar
Dries committed
515 516
}

517 518 519 520 521 522 523
function filter_form_validate($form) {
  foreach (element_children($form) as $key) {
    if ($form[$key]['#value'] == $form[$key]['#return_value']) {
      return;
    }
  }
  form_error($form, t('An illegal choice has been detected. Please contact the site administrator.'));
524
  watchdog('form', 'Illegal choice %choice in %name element.', array('%choice' => $form[$key]['#value'], '%name' => empty($form['#title']) ? $form['#parents'][0] : $form['#title']), WATCHDOG_ERROR);
525 526
}

Dries's avatar
Dries committed
527
/**
528
 * Returns TRUE if the user is allowed to access this format.
Dries's avatar
Dries committed
529 530
 */
function filter_access($format) {
531 532
  $format = filter_resolve_format($format);
  if (user_access('administer filters') || ($format == variable_get('filter_default_format', 1))) {
533
    return TRUE;
Dries's avatar
Dries committed
534 535 536 537 538 539
  }
  else {
    $formats = filter_formats();
    return isset($formats[$format]);
  }
}
540

Dries's avatar
Dries committed
541 542 543 544 545 546 547 548
/**
 * @} End of "Filtering functions".
 */


/**
 * Helper function for fetching filter tips.
 */
549
function _filter_tips($format, $long = FALSE) {
Dries's avatar
Dries committed
550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
  if ($format == -1) {
    $formats = filter_formats();
  }
  else {
    $formats = array(db_fetch_object(db_query("SELECT * FROM {filter_formats} WHERE format = %d", $format)));
  }

  $tips = array();

  foreach ($formats as $format) {
    $filters = filter_list_format($format->format);

    $tips[$format->name] = array();
    foreach ($filters as $id => $filter) {
      if ($tip = module_invoke($filter->module, 'filter_tips', $filter->delta, $format->format, $long)) {
        $tips[$format->name][] = array('tip' => $tip, 'id' => $id);
      }
    }
  }

  return $tips;
}


574 575 576 577 578 579
/**
 * Format a link to the more extensive filter tips.
 *
 * @ingroup themeable
 */
function theme_filter_tips_more_info() {
580
  return '<p>'. l(t('More information about formatting options'), 'filter/tips') .'</p>';
581 582
}

Dries's avatar
Dries committed
583 584 585 586 587 588 589 590 591 592 593 594
/**
 * @name Standard filters
 * @{
 * Filters implemented by the filter.module.
 */

/**
 * Implementation of hook_filter(). Contains a basic set of essential filters.
 * - HTML filter:
 *     Validates user-supplied HTML, transforming it as necessary.
 * - Line break converter:
 *     Converts newlines into paragraph and break tags.
595 596
 * - URL and e-mail address filter:
 *     Converts newlines into paragraph and break tags.
Dries's avatar
Dries committed
597 598 599 600
 */
function filter_filter($op, $delta = 0, $format = -1, $text = '') {
  switch ($op) {
    case 'list':
601
      return array(0 => t('HTML filter'), 1 => t('Line break converter'), 2 => t('URL filter'), 3 => t('HTML corrector'));
Dries's avatar
Dries committed
602 603 604 605

    case 'description':
      switch ($delta) {
        case 0:
606
          return t('Allows you to restrict whether users can post HTML and which tags to filter out. It will also remove harmful content such as JavaScript events, JavaScript URLs and CSS styles from those tags that are not removed.');
Dries's avatar
Dries committed
607 608
        case 1:
          return t('Converts line breaks into HTML (i.e. &lt;br&gt; and &lt;p&gt; tags).');
609
        case 2:
610
          return t('Turns web and e-mail addresses into clickable links.');
611 612
        case 3:
          return t('Corrects faulty and chopped off HTML in postings.');
Dries's avatar
Dries committed
613 614 615 616 617 618 619 620 621 622
        default:
          return;
      }

    case 'process':
      switch ($delta) {
        case 0:
          return _filter_html($text, $format);
        case 1:
          return _filter_autop($text);
623
        case 2:
624
          return _filter_url($text, $format);
625 626
        case 3:
          return _filter_htmlcorrector($text);
Dries's avatar
Dries committed
627 628 629 630 631 632 633 634
        default:
          return $text;
      }

    case 'settings':
      switch ($delta) {
        case 0:
          return _filter_html_settings($format);
635
        case 2:
636
          return _filter_url_settings($format);
Dries's avatar
Dries committed
637 638 639 640 641 642 643 644 645 646 647 648 649
        default:
          return;
      }

    default:
      return $text;
  }
}

/**
 * Settings for the HTML filter.
 */
function _filter_html_settings($format) {
Dries's avatar
-Patch  
Dries committed
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
  $form['filter_html'] = array(
    '#type' => 'fieldset',
    '#title' => t('HTML filter'),
    '#collapsible' => TRUE,
  );
  $form['filter_html']["filter_html_$format"] = array(
    '#type' => 'radios',
    '#title' => t('Filter HTML tags'),
    '#default_value' => variable_get("filter_html_$format", FILTER_HTML_STRIP),
    '#options' => array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape all tags')),
    '#description' => t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'),
  );
  $form['filter_html']["allowed_html_$format"] = array(
    '#type' => 'textfield',
    '#title' => t('Allowed HTML tags'),
    '#default_value' => variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'),
    '#size' => 64,
    '#maxlength' => 255,
    '#description' => t('If "Strip disallowed tags" is selected, optionally specify tags which should not be stripped. JavaScript event attributes are always stripped.'),
  );
  $form['filter_html']["filter_html_help_$format"] = array(
    '#type' => 'checkbox',
    '#title' => t('Display HTML help'),
    '#default_value' => variable_get("filter_html_help_$format", 1),
    '#description' => t('If enabled, Drupal will display some basic HTML help in the long filter tips.'),
  );
  $form['filter_html']["filter_html_nofollow_$format"] = array(
    '#type' => 'checkbox',
    '#title' => t('Spam link deterrent'),
    '#default_value' => variable_get("filter_html_nofollow_$format", FALSE),
    '#description' => t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'),
  );
682
  return $form;
Dries's avatar
Dries committed
683 684 685 686 687 688 689
}

/**
 * HTML filter. Provides filtering of input into accepted HTML.
 */
function _filter_html($text, $format) {
  if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
Dries's avatar
Dries committed
690 691
    $allowed_tags = preg_split('/\s+|<|>/', variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY);
    $text = filter_xss($text, $allowed_tags);
Dries's avatar
Dries committed
692 693 694 695 696 697 698 699 700 701 702 703 704 705
  }

  if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
    // Escape HTML
    $text = check_plain($text);
  }

  if (variable_get("filter_html_nofollow_$format", FALSE)) {
    $text = preg_replace('/<a([^>]+)>/i', '<a\\1 rel="nofollow">', $text);
  }

  return trim($text);
}

706 707 708 709 710 711 712 713 714 715 716 717 718 719
/**
 * Settings for URL filter.
 */
function _filter_url_settings($format) {
  $form['filter_urlfilter'] = array(
    '#type' => 'fieldset',
    '#title' => t('URL filter'),
    '#collapsible' => TRUE,
  );
  $form['filter_urlfilter']['filter_url_length_'. $format] = array(
    '#type' => 'textfield',
    '#title' => t('Maximum link text length'),
    '#default_value' => variable_get('filter_url_length_'. $format, 72),
    '#maxlength' => 4,
drumm's avatar
drumm committed
720
    '#description' => t('URLs longer than this number of characters will be truncated to prevent long strings that break formatting. The link itself will be retained; just the text portion of the link will be truncated.'),
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
  );
  return $form;
}

/**
 * URL filter. Automatically converts text web addresses (URLs, e-mail addresses,
 * ftp links, etc.) into hyperlinks.
 */
function _filter_url($text, $format) {
  // Pass length to regexp callback
  _filter_url_trim(NULL, variable_get('filter_url_length_'. $format, 72));

  $text   = ' '. $text .' ';

  // Match absolute URLs.
736
  $text = preg_replace_callback("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])((http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://)([a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-]))([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_full_links', $text);
737

738
  // Match e-mail addresses.
739
  $text = preg_replace("`(<p>|<li>|<br\s*/?>|[ \n\r\t\(])([A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\.[A-Za-z]{2,4})([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '\1<a href="mailto:\2">\2</a>\3', $text);
740 741

  // Match www domains/addresses.
742
  $text = preg_replace_callback("`(<p>|<li>|[ \n\r\t\(])(www\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\&=/;-])([.,?!]*?)(?=(</p>|</li>|<br\s*/?>|[ \n\r\t\)]))`i", '_filter_url_parse_partial_links', $text);
743 744 745 746 747
  $text = substr($text, 1, -1);

  return $text;
}

748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
/**
 * Scan input and make sure that all HTML tags are properly closed and nested.
 */
function _filter_htmlcorrector($text) {
  // Prepare tag lists.
  static $no_nesting, $single_use;
  if (!isset($no_nesting)) {
    // Tags which cannot be nested but are typically left unclosed.
    $no_nesting = drupal_map_assoc(array('li', 'p'));

    // Single use tags in HTML4
    $single_use = drupal_map_assoc(array('base', 'meta', 'link', 'hr', 'br', 'param', 'img', 'area', 'input', 'col', 'frame'));
  }

  // Properly entify angles.
  $text = preg_replace('!<([^a-zA-Z/])!', '&lt;\1', $text);

  // Split tags from text.
  $split = preg_split('/<([^>]+?)>/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting $null as required).

  $tag = false; // Odd/even counter. Tag or no tag.
  $stack = array();
  $output = '';
  foreach ($split as $value) {
    // Process HTML tags.
    if ($tag) {
      list($tagname) = explode(' ', strtolower($value), 2);
      // Closing tag
      if ($tagname{0} == '/') {
        $tagname = substr($tagname, 1);
        // Discard XHTML closing tags for single use tags.
        if (!isset($single_use[$tagname])) {
          // See if we possibly have a matching opening tag on the stack.
          if (in_array($tagname, $stack)) {
            // Close other tags lingering first.
            do {
              $output .= '</'. $stack[0] .'>';
            } while (array_shift($stack) != $tagname);
          }
          // Otherwise, discard it.
        }
      }
      // Opening tag
      else {
        // See if we have an identical 'no nesting' tag already open and close it if found.
        if (count($stack) && ($stack[0] == $tagname) && isset($no_nesting[$stack[0]])) {
          $output .= '</'. array_shift($stack) .'>';
        }
        // Push non-single-use tags onto the stack
        if (!isset($single_use[$tagname])) {
          array_unshift($stack, $tagname);
        }
        // Add trailing slash to single-use tags as per X(HT)ML.
        else {
804
          $value = rtrim($value, ' /') .' /';
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
        }
        $output .= '<'. $value .'>';
      }
    }
    else {
      // Passthrough all text.
      $output .= $value;
    }
    $tag = !$tag;
  }
  // Close remaining tags.
  while (count($stack) > 0) {
    $output .= '</'. array_shift($stack) .'>';
  }
  return $output;
}

822
/**
823
 * Make links out of absolute URLs.
824 825 826 827 828
 */
function _filter_url_parse_full_links($match) {
  $match[2] = decode_entities($match[2]);
  $caption = check_plain(_filter_url_trim($match[2]));
  $match[2] = check_url($match[2]);
829
  return $match[1] .'<a href="'. $match[2] .'" title="'. $match[2] .'">'. $caption .'</a>'. $match[5];
830 831 832 833 834 835 836 837 838
}

/**
 * Make links out of domain names starting with "www."
 */
function _filter_url_parse_partial_links($match) {
  $match[2] = decode_entities($match[2]);
  $caption = check_plain(_filter_url_trim($match[2]));
  $match[2] = check_plain($match[2]);
839
  return $match[1] .'<a href="http://'. $match[2] .'" title="'. $match[2] .'">'. $caption .'</a>'. $match[3];
840 841 842 843 844 845 846 847 848 849 850
}

/**
 * Shortens long URLs to http://www.example.com/long/url...
 */
function _filter_url_trim($text, $length = NULL) {
  static $_length;
  if ($length !== NULL) {
    $_length = $length;
  }

851 852
  // Use +3 for '...' string length.
  if (strlen($text) > $_length + 3) {
853 854 855 856 857 858
    $text = substr($text, 0, $_length) .'...';
  }

  return $text;
}

Dries's avatar
Dries committed
859 860 861 862 863
/**
 * Convert line breaks into <p> and <br> in an intelligent fashion.
 * Based on: http://photomatt.net/scripts/autop
 */
function _filter_autop($text) {
864 865
  // All block level tags
  $block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6])';
Dries's avatar
Dries committed
866

Dries's avatar
Dries committed
867 868 869 870
  // Split at <pre>, <script>, <style> and </pre>, </script>, </style> tags.
  // We don't apply any processing to the contents of these tags to avoid messing
  // up code. We look for matched pairs and allow basic nesting. For example:
  // "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
871
  $chunks = preg_split('@(</?(?:pre|script|style|object)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
Dries's avatar
Dries committed
872 873
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting NULL as required).
874
  $ignore = FALSE;
Dries's avatar
Dries committed
875 876 877 878 879
  $ignoretag = '';
  $output = '';
  foreach ($chunks as $i => $chunk) {
    if ($i % 2) {
      // Opening or closing tag?
880
      $open = ($chunk[1] != '/');
Dries's avatar
Dries committed
881 882 883
      list($tag) = split('[ >]', substr($chunk, 2 - $open), 2);
      if (!$ignore) {
        if ($open) {
884
          $ignore = TRUE;
Dries's avatar
Dries committed
885 886 887 888 889
          $ignoretag = $tag;
        }
      }
      // Only allow a matching tag to close it.
      else if (!$open && $ignoretag == $tag) {
890
        $ignore = FALSE;
Dries's avatar
Dries committed
891 892 893 894 895 896
        $ignoretag = '';
      }
    }
    else if (!$ignore) {
      $chunk = preg_replace('|\n*$|', '', $chunk) ."\n\n"; // just to make things a little easier, pad the end
      $chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
897 898
      $chunk = preg_replace('!(<'. $block .'[^>]*>)!', "\n$1", $chunk); // Space things out a little
      $chunk = preg_replace('!(</'. $block .'>)!', "$1\n\n", $chunk); // Space things out a little
Dries's avatar
Dries committed
899 900
      $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates
      $chunk = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "<p>$1</p>\n", $chunk); // make paragraphs, including one at the end
901
      $chunk = preg_replace('|<p>\s*</p>\n|', '', $chunk); // under certain strange conditions it could create a P of entirely whitespace
Dries's avatar
Dries committed
902 903 904
      $chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem with nested lists
      $chunk = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $chunk);
      $chunk = str_replace('</blockquote></p>', '</p></blockquote>', $chunk);
905 906
      $chunk = preg_replace('!<p>\s*(</?'. $block .'[^>]*>)!', "$1", $chunk);
      $chunk = preg_replace('!(</?'. $block .'[^>]*>)\s*</p>!', "$1", $chunk);
Dries's avatar
Dries committed
907
      $chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); // make line breaks
908
      $chunk = preg_replace('!(</?'. $block .'[^>]*>)\s*<br />!', "$1", $chunk);
Dries's avatar
Dries committed
909
      $chunk = preg_replace('!<br />(\s*</?(?:p|li|div|th|pre|td|ul|ol)>)!', '$1', $chunk);
910
      $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1', $chunk);
Dries's avatar
Dries committed
911 912 913 914 915 916
    }
    $output .= $chunk;
  }
  return $output;
}

917 918 919
/**
 * Very permissive XSS/HTML filter for admin-only use.
 *
Dries's avatar
Dries committed
920
 * Use only for fields where it is impractical to use the
921 922 923 924
 * whole filter system, but where some (mainly inline) mark-up
 * is desired (so check_plain() is not acceptable).
 *
 * Allows all tags that can be used inside an HTML body, save
Dries's avatar
Dries committed
925
 * for scripts and styles.
926 927
 */
function filter_xss_admin($string) {
928
  return filter_xss($string, array('a', 'abbr', 'acronym', 'address', 'b', 'bdo', 'big', 'blockquote', 'br', 'caption', 'cite', 'code', 'col', 'colgroup', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'ins', 'kbd', 'li', 'object', 'ol', 'p', 'param', 'pre', 'q', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'ul', 'var'));
929 930
}

Dries's avatar
Dries committed
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
/**
 * Filters XSS. Based on kses by Ulf Harnhammar, see
 * http://sourceforge.net/projects/kses
 *
 * For examples of various XSS attacks, see:
 * http://ha.ckers.org/xss.html
 *
 * This code does four things:
 * - Removes characters and constructs that can trick browsers
 * - Makes sure all HTML entities are well-formed
 * - Makes sure all HTML tags and attributes are well-formed
 * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
 *
 * @param $string
 *   The string with raw HTML in it. It will be stripped of everything that can cause
 *   an XSS attack.
Dries's avatar
Dries committed
947 948
 * @param $allowed_tags
 *   An array of allowed tags.
Dries's avatar
Dries committed
949 950 951
 * @param $format
 *   The format to use.
 */
Dries's avatar
Dries committed
952
function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
Dries's avatar
Dries committed
953
  // Store the input format
Dries's avatar
Dries committed
954
  _filter_xss_split($allowed_tags, TRUE);
Dries's avatar
Dries committed
955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
  // Remove NUL characters (ignored by some browsers)
  $string = str_replace(chr(0), '', $string);
  // Remove Netscape 4 JS entities
  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);

  // Defuse all HTML entities
  $string = str_replace('&', '&amp;', $string);
  // Change back only well-formed entities in our whitelist
  // Named entities
  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
  // Decimal numeric entities
  $string = preg_replace('/&amp;#([0-9]+;)/', '&#\1', $string);
  // Hexadecimal numeric entities
  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);

  return preg_replace_callback('%
    (
972
    <(?=[^a-zA-Z!/])  # a lone <
973 974 975 976
    |                 # or
    <[^>]*.(>|$)      # a string that starts with a <, up until the > or the end of the string
    |                 # or
    >                 # just a >
977
    )%x', '_filter_xss_split', $string);
Dries's avatar
Dries committed
978 979 980 981 982
}

/**
 * Processes an HTML tag.
 *
Dries's avatar
Dries committed
983 984 985 986 987 988
 * @param @m
 *   An array with various meaning depending on the value of $store.
 *   If $store is TRUE then the array contains the allowed tags.
 *   If $store is FALSE then the array has one element, the HTML tag to process.
 * @param $store
 *   Whether to store $m.
Dries's avatar
Dries committed
989
 * @return
Dries's avatar
Dries committed
990 991
 *   If the element isn't allowed, an empty string. Otherwise, the cleaned up
 *   version of the HTML element.
Dries's avatar
Dries committed
992
 */
Dries's avatar
Dries committed
993
function _filter_xss_split($m, $store = FALSE) {
Dries's avatar
Dries committed
994 995
  static $allowed_html;

Dries's avatar
Dries committed
996 997
  if ($store) {
    $allowed_html = array_flip($m);
Dries's avatar
Dries committed
998 999 1000
    return;
  }

1001
  $string = $m[1];
Dries's avatar
Dries committed
1002 1003 1004 1005 1006

  if (substr($string, 0, 1) != '<') {
    // We matched a lone ">" character
    return '&gt;';
  }
1007 1008 1009 1010
  else if (strlen($string) == 1) {
    // We matched a lone "<" character
    return '&lt;';
  }