filter.module 48.3 KB
Newer Older
1
2
3
<?php
// $Id$

Dries's avatar
   
Dries committed
4
5
6
7
8
/**
 * @file
 * Framework for handling filtering of content.
 */

Dries's avatar
Dries committed
9
/**
10
 * Implements hook_help().
Dries's avatar
Dries committed
11
 */
12
13
function filter_help($path, $arg) {
  switch ($path) {
14
    case 'admin/help#filter':
15
16
17
18
19
      $output = '';
      $output .= '<h3>' . t('About') . '</h3>';
      $output .= '<p>' . t('The Filter module allows administrators to configure text formats. A text format defines the HTML tags, codes, and other input allowed in content and comments, and is a key feature in guarding against potentially damaging input from malicious users. For more information, see the online handbook entry for <a href="@filter">Filter module</a>.', array('@filter' => 'http://drupal.org/handbook/modules/filter/')) . '</p>';
      $output .= '<h3>' . t('Uses') . '</h3>';
      $output .= '<dl>';
20
21
      $output .= '<dt>' . t('Configuring text formats') . '</dt>';
      $output .= '<dd>' . t('Configure text formats on the <a href="@formats">Text formats page</a>. <strong>Improper text format configuration is a security risk</strong>. To ensure security, untrusted users should only have access to text formats that restrict them to either plain text or a safe set of HTML tags, since certain HTML tags can allow embedding malicious links or scripts in text. More trusted registered users may be granted permission to use less restrictive text formats in order to create rich content.', array('@formats' => url('admin/config/content/formats'))) . '</dd>';
22
      $output .= '<dt>' . t('Applying filters to text') . '</dt>';
23
      $output .= '<dd>' . t('Each text format uses filters to manipulate text, and most formats apply several different filters to text in a specific order. Each filter is designed for a specific purpose, and generally either adds, removes, or transforms elements within user-entered text before it is displayed. A filter does not change the actual content, but instead, modifies it temporarily before it is displayed. One filter may remove unapproved HTML tags, while another automatically adds HTML to make URLs display as clickable links.') . '</dd>';
24
      $output .= '<dt>' . t('Defining text formats') . '</dt>';
25
      $output .= '<dd>' . t('One format is included by default: <em>Plain text</em> (which removes all HTML tags). Additional formats may be created by your installation profile when you install Drupal, and more can be created by an administrator on the <a href="@text-formats">Text formats page</a>.', array('@text-formats' => url('admin/config/content/formats'))) . '</dd>';
26
      $output .= '<dt>' . t('Choosing a text format') . '</dt>';
27
      $output .= '<dd>' . t('Users with access to more than one text format can use the <em>Text format</em> fieldset to choose between available text formats when creating or editing multi-line content. Administrators can define the text formats available to each user role, and control the order of formats listed in the <em>Text format</em> fieldset on the <a href="@text-formats">Text formats page</a>.', array('@text-formats' => url('admin/config/content/formats'))) . '</dd>';
28
      $output .= '</dl>';
29
      return $output;
30

31
    case 'admin/config/content/formats':
32
33
      $output = '<p>' . t('Text formats define the HTML tags, code, and other formatting that can be used when entering text. <strong>Improper text format configuration is a security risk</strong>. Learn more on the <a href="@filterhelp">Filter module help page</a>.', array('@filterhelp' => url('admin/help/filter'))) . '</p>';
      $output .= '<p>' . t('Text formats are presented on content editing pages in the order defined on this page.') . '</p>';
34
      return $output;
35

36
    case 'admin/config/content/formats/%':
37
      $output = '<p>' . t('A text format contains filters that change the user input, for example stripping out malicious HTML or making URLs clickable. Filters are executed from top to bottom and the order is important, since one filter may prevent another filter from doing its job. For example, when URLs are converted into links before disallowed HTML tags are removed, all links may be removed. When this happens, the order of filters may need to be re-arranged.') . '</p>';
38
      return $output;
39
40
41
  }
}

42
/**
43
 * Implements hook_theme().
44
45
46
47
 */
function filter_theme() {
  return array(
    'filter_admin_overview' => array(
48
      'render element' => 'form',
49
      'file' => 'filter.admin.inc',
50
    ),
51
52
    'filter_admin_format_filter_order' => array(
      'render element' => 'element',
53
      'file' => 'filter.admin.inc',
54
55
    ),
    'filter_tips' => array(
56
      'variables' => array('tips' => NULL, 'long' => FALSE),
57
      'file' => 'filter.pages.inc',
58
59
    ),
    'filter_tips_more_info' => array(
60
      'variables' => array(),
61
    ),
62
    'filter_guidelines' => array(
63
      'variables' => array('format' => NULL),
64
    ),
65
66
67
  );
}

68
/**
69
 * Implements hook_menu().
70
 */
71
function filter_menu() {
72
73
74
75
76
77
78
  $items['filter/tips'] = array(
    'title' => 'Compose tips',
    'page callback' => 'filter_tips_long',
    'access callback' => TRUE,
    'type' => MENU_SUGGESTED_ITEM,
    'file' => 'filter.pages.inc',
  );
79
  $items['admin/config/content/formats'] = array(
80
    'title' => 'Text formats',
81
    'description' => 'Configure how content input by users is filtered, including allowed HTML tags. Also allows enabling of module-provided filters.',
82
83
84
    'page callback' => 'drupal_get_form',
    'page arguments' => array('filter_admin_overview'),
    'access arguments' => array('administer filters'),
85
    'file' => 'filter.admin.inc',
86
  );
87
  $items['admin/config/content/formats/list'] = array(
88
    'title' => 'List',
89
90
    'type' => MENU_DEFAULT_LOCAL_TASK,
  );
91
  $items['admin/config/content/formats/add'] = array(
92
    'title' => 'Add text format',
93
    'page callback' => 'filter_admin_format_page',
94
    'access arguments' => array('administer filters'),
95
    'type' => MENU_LOCAL_ACTION,
96
    'weight' => 1,
97
    'file' => 'filter.admin.inc',
98
  );
99
  $items['admin/config/content/formats/%filter_format'] = array(
100
    'type' => MENU_CALLBACK,
101
    'title callback' => 'filter_admin_format_title',
102
    'title arguments' => array(4),
103
    'page callback' => 'filter_admin_format_page',
104
    'page arguments' => array(4),
105
    'access arguments' => array('administer filters'),
106
    'file' => 'filter.admin.inc',
107
  );
108
  $items['admin/config/content/formats/%filter_format/delete'] = array(
109
110
    'title' => 'Delete text format',
    'page callback' => 'drupal_get_form',
111
    'page arguments' => array('filter_admin_delete', 4),
112
113
    'access callback' => '_filter_delete_format_access',
    'access arguments' => array(4),
114
115
116
    'type' => MENU_CALLBACK,
    'file' => 'filter.admin.inc',
  );
117
118
119
  return $items;
}

120
121
122
123
124
125
126
127
128
129
130
131
132
133
/**
 * Access callback for deleting text formats.
 *
 * @param $format
 *   A text format object.
 * @return
 *   TRUE if the text format can be deleted by the current user, FALSE
 *   otherwise.
 */
function _filter_delete_format_access($format) {
  // The fallback format can never be deleted.
  return user_access('administer filters') && ($format->format != filter_fallback_format());
}

134
135
136
/**
 * Load a text format object from the database.
 *
137
 * @param $format_id
138
139
140
141
142
 *   The format ID.
 *
 * @return
 *   A fully-populated text format object.
 */
143
function filter_format_load($format_id) {
144
  $formats = filter_formats();
145
  return isset($formats[$format_id]) ? $formats[$format_id] : FALSE;
146
147
}

148
149
150
151
/**
 * Save a text format object to the database.
 *
 * @param $format
152
153
154
155
156
157
158
159
160
161
162
163
164
165
 *   A format object using the properties:
 *   - 'name': The title of the text format.
 *   - 'format': (optional) The internal ID of the text format. If omitted, a
 *     new text format is created.
 *   - 'roles': (optional) An associative array containing the roles allowed to
 *     access/use the text format.
 *   - 'filters': (optional) An associative, multi-dimensional array of filters
 *     assigned to the text format, using the properties:
 *     - 'weight': The weight of the filter in the text format.
 *     - 'status': A boolean indicating whether the filter is enabled in the
 *       text format.
 *     - 'module': The name of the module implementing the filter.
 *     - 'settings': (optional) An array of configured settings for the filter.
 *       See hook_filter_info() for details.
166
 */
167
function filter_format_save(&$format) {
168
  $format->name = trim($format->name);
169
  $format->cache = _filter_format_is_cacheable($format);
170
171
172

  // Add a new text format.
  if (empty($format->format)) {
173
    $return = drupal_write_record('filter_format', $format);
174
175
  }
  else {
176
    $return = drupal_write_record('filter_format', $format, 'format');
177
178
  }

179
  $filter_info = filter_get_filters();
180
  // Programmatic saves may not contain any filters.
181
182
183
  if (!isset($format->filters)) {
    $format->filters = array();
  }
184
  foreach ($filter_info as $name => $filter) {
185
    // Add new filters without weight to the bottom.
186
    if (!isset($format->filters[$name]['weight'])) {
187
      $format->filters[$name]['weight'] = 10;
188
189
190
191
192
193
194
195
196
    }
    $format->filters[$name]['status'] = isset($format->filters[$name]['status']) ? $format->filters[$name]['status'] : 0;
    $format->filters[$name]['module'] = $filter['module'];

    // If settings were passed, only ensure default settings.
    if (isset($format->filters[$name]['settings'])) {
      if (isset($filter['default settings'])) {
        $format->filters[$name]['settings'] = array_merge($filter['default settings'], $format->filters[$name]['settings']);
      }
197
    }
198
199
200
201
202
203
204
205
206
207
208
    // Otherwise, use default settings or fall back to an empty array.
    else {
      $format->filters[$name]['settings'] = isset($filter['default settings']) ? $filter['default settings'] : array();
    }

    $fields = array();
    $fields['weight'] = $format->filters[$name]['weight'];
    $fields['status'] = $format->filters[$name]['status'];
    $fields['module'] = $format->filters[$name]['module'];
    $fields['settings'] = serialize($format->filters[$name]['settings']);

209
210
211
212
213
214
215
    db_merge('filter')
      ->key(array(
        'format' => $format->format,
        'name' => $name,
      ))
      ->fields($fields)
      ->execute();
216
217
  }

218
  if ($return == SAVED_NEW) {
219
220
221
222
    module_invoke_all('filter_format_insert', $format);
  }
  else {
    module_invoke_all('filter_format_update', $format);
223
224
225
226
227
228
229
230
    // Explicitly indicate that the format was updated. We need to do this
    // since if the filters were updated but the format object itself was not,
    // the call to drupal_write_record() above would not return an indication
    // that anything had changed.
    $return = SAVED_UPDATED;

    // Clear the filter cache whenever a text format is updated.
    cache_clear_all($format->format . ':', 'cache_filter', TRUE);
231
232
  }

233
  filter_formats_reset();
234

235
  return $return;
236
237
238
239
240
241
}

/**
 * Delete a text format.
 *
 * @param $format
242
 *   The text format object to be deleted.
243
244
245
 */
function filter_format_delete($format) {
  db_delete('filter_format')
246
    ->condition('format', $format->format)
247
248
    ->execute();
  db_delete('filter')
249
    ->condition('format', $format->format)
250
251
    ->execute();

252
  // Allow modules to react on text format deletion.
253
254
  $fallback = filter_format_load(filter_fallback_format());
  module_invoke_all('filter_format_delete', $format, $fallback);
255

256
  filter_formats_reset();
257
  cache_clear_all($format->format . ':', 'cache_filter', TRUE);
258
259
}

260
/**
261
 * Display a text format form title.
262
263
264
265
266
 */
function filter_admin_format_title($format) {
  return $format->name;
}

267
/**
268
 * Implements hook_permission().
269
 */
270
function filter_permission() {
271
  $perms['administer filters'] = array(
272
    'title' => t('Administer and use any text formats and filters'),
273
    'description' => drupal_placeholder(array('text' => t('Warning: This permission may have security implications depending on how the text format is configured.'))),
274
  );
275
276
277
278
279
280
281
282

  // Generate permissions for each text format. Warn the administrator that any
  // of them are potentially unsafe.
  foreach (filter_formats() as $format) {
    $permission = filter_permission_name($format);
    if (!empty($permission)) {
      // Only link to the text format configuration page if the user who is
      // viewing this will have access to that page.
283
      $format_name_replacement = user_access('administer filters') ? l($format->name, 'admin/config/content/formats/' . $format->format) : drupal_placeholder(array('text' => $format->name));
284
      $perms[$permission] = array(
285
        'title' => t("Use the !text_format text format", array('!text_format' => $format_name_replacement,)),
286
        'description' => drupal_placeholder(array('text' => t('Warning: This permission may have security implications depending on how the text format is configured.'))),
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
      );
    }
  }
  return $perms;
}

/**
 * Returns the machine-readable permission name for a provided text format.
 *
 * @param $format
 *   An object representing a text format.
 * @return
 *   The machine-readable permission name, or FALSE if the provided text format
 *   is malformed or is the fallback format (which is available to all users).
 */
function filter_permission_name($format) {
  if (isset($format->format) && $format->format != filter_fallback_format()) {
    return 'use text format ' . $format->format;
  }
  return FALSE;
307
308
}

309
/**
310
 * Implements hook_cron().
311
312
313
314
315
316
317
 *
 * Expire outdated filter cache entries
 */
function filter_cron() {
  cache_clear_all(NULL, 'cache_filter');
}

Dries's avatar
Dries committed
318
/**
319
 * Retrieve a list of text formats, ordered by weight.
320
321
 *
 * @param $account
322
323
 *   (optional) If provided, only those formats that are allowed for this user
 *   account will be returned. All formats will be returned otherwise.
324
 * @return
325
326
327
328
 *   An array of text format objects, keyed by the format ID and ordered by
 *   weight.
 *
 * @see filter_formats_reset()
Dries's avatar
Dries committed
329
 */
330
function filter_formats($account = NULL) {
331
  $formats = &drupal_static(__FUNCTION__, array());
332

333
334
  // Statically cache all existing formats upfront.
  if (!isset($formats['all'])) {
335
336
337
338
339
340
    $formats['all'] = db_select('filter_format', 'ff')
      ->addTag('translatable')
      ->fields('ff')
      ->orderBy('weight')
      ->execute()
      ->fetchAllAssoc('format');
341
  }
342

343
344
345
346
347
348
  // Build a list of user-specific formats.
  if (isset($account) && !isset($formats['user'][$account->uid])) {
    $formats['user'][$account->uid] = array();
    foreach ($formats['all'] as $format) {
      if (filter_access($format, $account)) {
        $formats['user'][$account->uid][$format->format] = $format;
349
350
      }
    }
351
352
353
354
  }

  return isset($account) ? $formats['user'][$account->uid] : $formats['all'];
}
355

356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
/**
 * Resets the static cache of all text formats.
 *
 * @see filter_formats()
 */
function filter_formats_reset() {
  drupal_static_reset('filter_list_format');
  drupal_static_reset('filter_formats');
}

/**
 * Retrieves a list of roles that are allowed to use a given text format.
 *
 * @param $format
 *   An object representing the text format.
 * @return
 *   An array of role names, keyed by role ID.
 */
function filter_get_roles_by_format($format) {
  // Handle the fallback format upfront (all roles have access to this format).
  if ($format->format == filter_fallback_format()) {
    return user_roles();
378
  }
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
  // Do not list any roles if the permission does not exist.
  $permission = filter_permission_name($format);
  return !empty($permission) ? user_roles(FALSE, $permission) : array();
}

/**
 * Retrieves a list of text formats that are allowed for a given role.
 *
 * @param $rid
 *   The user role ID to retrieve text formats for.
 * @return
 *   An array of text format objects that are allowed for the role, keyed by
 *   the text format ID and ordered by weight.
 */
function filter_get_formats_by_role($rid) {
  $formats = array();
  foreach (filter_formats() as $format) {
    $roles = filter_get_roles_by_format($format);
    if (isset($roles[$rid])) {
      $formats[$format->format] = $format;
    }
400
  }
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
  return $formats;
}

/**
 * Returns the ID of the default text format for a particular user.
 *
 * The default text format is the first available format that the user is
 * allowed to access, when the formats are ordered by weight. It should
 * generally be used as a default choice when presenting the user with a list
 * of possible text formats (for example, in a node creation form).
 *
 * Conversely, when existing content that does not have an assigned text format
 * needs to be filtered for display, the default text format is the wrong
 * choice, because it is not guaranteed to be consistent from user to user, and
 * some trusted users may have an unsafe text format set by default, which
 * should not be used on text of unknown origin. Instead, the fallback format
 * returned by filter_fallback_format() should be used, since that is intended
 * to be a safe, consistent format that is always available to all users.
 *
 * @param $account
 *   (optional) The user account to check. Defaults to the currently logged-in
 *   user.
 * @return
 *   The ID of the user's default text format.
 *
 * @see filter_fallback_format()
 */
function filter_default_format($account = NULL) {
  global $user;
  if (!isset($account)) {
    $account = $user;
  }
  // Get a list of formats for this user, ordered by weight. The first one
  // available is the user's default format.
435
436
  $formats = filter_formats($account);
  $format = reset($formats);
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
  return $format->format;
}

/**
 * Returns the ID of the fallback text format that all users have access to.
 */
function filter_fallback_format() {
  // This variable is automatically set in the database for all installations
  // of Drupal. In the event that it gets deleted somehow, there is no safe
  // default to return, since we do not want to risk making an existing (and
  // potentially unsafe) text format on the site automatically available to all
  // users. Returning NULL at least guarantees that this cannot happen.
  return variable_get('filter_fallback_format');
}

/**
 * Returns the title of the fallback text format.
 */
function filter_fallback_format_title() {
  $fallback_format = filter_format_load(filter_fallback_format());
  return filter_admin_format_title($fallback_format);
458
}
459

460
/**
461
 * Return a list of all filters provided by modules.
462
 */
463
464
465
466
467
468
469
function filter_get_filters() {
  $filters = &drupal_static(__FUNCTION__, array());

  if (empty($filters)) {
    foreach (module_implements('filter_info') as $module) {
      $info = module_invoke($module, 'filter_info');
      if (isset($info) && is_array($info)) {
470
471
        // Assign the name of the module implementing the filters and ensure
        // default values.
472
473
        foreach (array_keys($info) as $name) {
          $info[$name]['module'] = $module;
474
475
476
477
          $info[$name] += array(
            'description' => '',
            'weight' => 0,
          );
478
        }
479
        $filters = array_merge($filters, $info);
480
      }
Dries's avatar
   
Dries committed
481
    }
482
483
    // Allow modules to alter filter definitions.
    drupal_alter('filter_info', $filters);
Dries's avatar
   
Dries committed
484

485
486
    uasort($filters, '_filter_list_cmp');
  }
487
488
489
490
491
492
493
494

  return $filters;
}

/**
 * Helper function for sorting the filter list by filter name.
 */
function _filter_list_cmp($a, $b) {
495
  return strcmp($a['title'], $b['title']);
Dries's avatar
   
Dries committed
496
497
}

Dries's avatar
Dries committed
498
/**
499
 * Check if text in a certain text format is allowed to be cached.
500
501
502
503
504
505
506
507
508
 *
 * This function can be used to check whether the result of the filtering
 * process can be cached. A text format may allow caching depending on the
 * filters enabled.
 *
 * @param $format_id
 *   The text format ID to check.
 * @return
 *   TRUE if the given text format allows caching, FALSE otherwise.
Dries's avatar
Dries committed
509
 */
510
function filter_format_allowcache($format_id) {
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
  $format = filter_format_load($format_id);
  return !empty($format->cache);
}

/**
 * Helper function to determine whether the output of a given text format can be cached.
 *
 * The output of a given text format can be cached when all enabled filters in
 * the text format allow caching.
 *
 * @param $format
 *   The text format object to check.
 * @return
 *   TRUE if all the filters enabled in the given text format allow caching,
 *   FALSE otherwise.
 *
 * @see filter_format_save()
 */
function _filter_format_is_cacheable($format) {
  if (empty($format->filters)) {
    return TRUE;
  }
  $filter_info = filter_get_filters();
  foreach ($format->filters as $name => $filter) {
    // By default, 'cache' is TRUE for all filters unless specified otherwise.
536
    if (!empty($filter['status']) && isset($filter_info[$name]['cache']) && !$filter_info[$name]['cache']) {
537
538
      return FALSE;
    }
539
  }
540
  return TRUE;
541
542
543
}

/**
544
 * Retrieve a list of filters for a given text format.
545
 *
546
547
548
549
550
 * Note that this function returns all associated filters regardless of whether
 * they are enabled or disabled. All functions working with the filter
 * information outside of filter administration should test for $filter->status
 * before performing actions with the filter.
 *
551
 * @param $format_id
552
 *   The format ID to retrieve filters for.
553
 *
554
 * @return
555
556
 *   An array of filter objects associated to the given text format, keyed by
 *   filter name.
557
 */
558
function filter_list_format($format_id) {
559
  $filters = &drupal_static(__FUNCTION__, array());
560
  $filter_info = filter_get_filters();
561

562
563
564
565
566
567
568
  if (!isset($filters['all'])) {
    $result = db_query('SELECT * FROM {filter} ORDER BY weight, module, name');
    foreach ($result as $record) {
      $filters['all'][$record->format][$record->name] = $record;
    }
  }

569
  if (!isset($filters[$format_id])) {
570
    $format_filters = array();
571
    foreach ($filters['all'][$format_id] as $name => $filter) {
572
573
      if (isset($filter_info[$name])) {
        $filter->title = $filter_info[$name]['title'];
574
        // Unpack stored filter settings.
575
        $filter->settings = (isset($filter->settings) ? unserialize($filter->settings) : array());
576

577
        $format_filters[$name] = $filter;
Dries's avatar
   
Dries committed
578
579
      }
    }
580
    $filters[$format_id] = $format_filters;
Dries's avatar
   
Dries committed
581
582
  }

583
  return isset($filters[$format_id]) ? $filters[$format_id] : array();
584
585
}

586
/**
587
 * Run all the enabled filters on a piece of text.
588
 *
589
 * Note: Because filters can inject JavaScript or execute PHP code, security is
590
591
592
593
 * vital here. When a user supplies a text format, you should validate it using
 * filter_access() before accepting/using it. This is normally done in the
 * validation stage of the Form API. You should for example never make a preview
 * of content in a disallowed format.
594
595
 *
 * @param $text
596
 *   The text to be filtered.
597
598
 * @param $format_id
 *   The format id of the text to be filtered. If no format is assigned, the
599
 *   fallback format will be used.
600
 * @param $langcode
601
602
603
 *   Optional: the language code of the text to be filtered, e.g. 'en' for
 *   English. This allows filters to be language aware so language specific
 *   text replacement can be implemented.
604
605
606
607
 * @param $cache
 *   Boolean whether to cache the filtered output in the {cache_filter} table.
 *   The caller may set this to FALSE when the output is already cached
 *   elsewhere to avoid duplicate cache lookups and storage.
Dries's avatar
Dries committed
608
 */
609
610
611
function check_markup($text, $format_id = NULL, $langcode = '', $cache = FALSE) {
  if (empty($format_id)) {
    $format_id = filter_fallback_format();
612
  }
613
  $format = filter_format_load($format_id);
614

615
  // Check for a cached version of this piece of text.
616
  $cache = $cache && !empty($format->cache);
617
  $cache_id = '';
618
  if ($cache) {
619
    $cache_id = $format->format . ':' . $langcode . ':' . md5($text);
620
621
622
    if ($cached = cache_get($cache_id, 'cache_filter')) {
      return $cached->data;
    }
623
  }
624

625
626
627
  // Convert all Windows and Mac newlines to a single newline, so filters only
  // need to deal with one possibility.
  $text = str_replace(array("\r\n", "\r"), "\n", $text);
Dries's avatar
   
Dries committed
628

629
  // Get a complete list of filters, ordered properly.
630
  $filters = filter_list_format($format->format);
631
  $filter_info = filter_get_filters();
632

633
  // Give filters the chance to escape HTML-like data such as code or formulas.
634
  foreach ($filters as $name => $filter) {
635
636
637
    if ($filter->status && isset($filter_info[$name]['prepare callback']) && function_exists($filter_info[$name]['prepare callback'])) {
      $function = $filter_info[$name]['prepare callback'];
      $text = $function($text, $filter, $format, $langcode, $cache, $cache_id);
638
    }
639
  }
640

641
  // Perform filtering.
642
  foreach ($filters as $name => $filter) {
643
644
645
    if ($filter->status && isset($filter_info[$name]['process callback']) && function_exists($filter_info[$name]['process callback'])) {
      $function = $filter_info[$name]['process callback'];
      $text = $function($text, $filter, $format, $langcode, $cache, $cache_id);
Dries's avatar
Dries committed
646
647
    }
  }
648
649

  // Store in cache with a minimum expiration time of 1 day.
650
  if ($cache) {
651
    cache_set($cache_id, $text, 'cache_filter', REQUEST_TIME + (60 * 60 * 24));
Dries's avatar
Dries committed
652
653
654
655
656
657
  }

  return $text;
}

/**
658
 * Generates a selector for choosing a format in a form.
Dries's avatar
Dries committed
659
 *
660
 * @param $selected_format
661
662
 *   The ID of the format that is currently selected; uses the default format
 *   for the current user if not provided.
663
 * @param $weight
664
 *   The weight of the form element within the form.
665
 * @param $parents
666
667
668
 *   The parents array of the element. Required when defining multiple text
 *   formats on a single form or having a different parent than 'format'.
 *
Dries's avatar
Dries committed
669
 * @return
670
671
672
 *   Form API array for the form element.
 *
 * @ingroup forms
Dries's avatar
Dries committed
673
 */
674
675
676
677
678
679
680
681
682
683
function filter_form($selected_format = NULL, $weight = NULL, $parents = array('format')) {
  global $user;

  // Use the default format for this user if none was selected.
  if (empty($selected_format)) {
    $selected_format = filter_default_format($user);
  }

  // Get a list of formats that the current user has access to.
  $formats = filter_formats($user);
684
685
686

  drupal_add_js('misc/form.js');
  drupal_add_css(drupal_get_path('module', 'filter') . '/filter.css');
687
  $element_id = drupal_html_id('edit-' . implode('-', $parents));
688

689
  $form = array(
690
691
    '#type' => 'fieldset',
    '#weight' => $weight,
692
    '#attributes' => array('class' => array('filter-wrapper')),
693
  );
694
695
696
697
698
699
700
701
  $form['format_guidelines'] = array(
    '#prefix' => '<div id="' . $element_id . '-guidelines" class="filter-guidelines">',
    '#suffix' => '</div>',
    '#weight' => 2,
  );
  foreach ($formats as $format) {
    $options[$format->format] = $format->name;
    $form['format_guidelines'][$format->format] = array(
702
      '#markup' => theme('filter_guidelines', array('format' => $format)),
703
    );
Dries's avatar
Dries committed
704
  }
705
706
707
708
  $form['format'] = array(
    '#type' => 'select',
    '#title' => t('Text format'),
    '#options' => $options,
709
    '#default_value' => $selected_format,
710
711
712
    '#parents' => $parents,
    '#access' => count($formats) > 1,
    '#id' => $element_id,
713
    '#attributes' => array('class' => array('filter-list')),
714
715
716
717
718
719
720
721
  );
  $form['format_help'] = array(
    '#prefix' => '<div id="' . $element_id . '-help" class="filter-help">',
    '#markup' => theme('filter_tips_more_info'),
    '#suffix' => '</div>',
    '#weight' => 1,
  );

722
  return $form;
Dries's avatar
Dries committed
723
724
725
}

/**
726
 * Checks if a user has access to a particular text format.
727
728
 *
 * @param $format
729
 *   An object representing the text format.
730
731
732
733
734
735
 * @param $account
 *   (optional) The user account to check access for; if omitted, the currently
 *   logged-in user is used.
 *
 * @return
 *   Boolean TRUE if the user is allowed to access the given format.
Dries's avatar
Dries committed
736
 */
737
function filter_access($format, $account = NULL) {
738
739
740
  global $user;
  if (!isset($account)) {
    $account = $user;
Dries's avatar
Dries committed
741
  }
742
743
744
745
  // Handle special cases up front. All users have access to the fallback
  // format, and administrators have access to all formats.
  if (user_access('administer filters', $account) || $format->format == filter_fallback_format()) {
    return TRUE;
Dries's avatar
Dries committed
746
  }
747
748
749
750
  // Check the permission if one exists; otherwise, we have a non-existent
  // format so we return FALSE.
  $permission = filter_permission_name($format);
  return !empty($permission) && user_access($permission, $account);
Dries's avatar
Dries committed
751
}
752

Dries's avatar
Dries committed
753
754
755
/**
 * Helper function for fetching filter tips.
 */
756
function _filter_tips($format_id, $long = FALSE) {
757
758
759
  global $user;

  $formats = filter_formats($user);
760
  $filter_info = filter_get_filters();
Dries's avatar
Dries committed
761
762
763

  $tips = array();

764
  // If only listing one format, extract it from the $formats array.
765
766
  if ($format_id != -1) {
    $formats = array($formats[$format_id]);
767
768
  }

Dries's avatar
Dries committed
769
770
771
  foreach ($formats as $format) {
    $filters = filter_list_format($format->format);
    $tips[$format->name] = array();
772
    foreach ($filters as $name => $filter) {
773
      if ($filter->status && isset($filter_info[$name]['tips callback']) && function_exists($filter_info[$name]['tips callback'])) {
774
775
        $tip = $filter_info[$name]['tips callback']($filter, $format, $long);
        $tips[$format->name][$name] = array('tip' => $tip, 'id' => $name);
Dries's avatar
Dries committed
776
777
778
779
780
781
782
      }
    }
  }

  return $tips;
}

783
784
785
786
787
788
789
/**
 * Parses an HTML snippet and returns it as a DOM object.
 *
 * This function loads the body part of a partial (X)HTML document
 * and returns a full DOMDocument object that represents this document.
 * You can use filter_dom_serialize() to serialize this DOMDocument
 * back to a XHTML snippet.
790
 *
791
792
793
794
795
796
797
 * @param $text
 *   The partial (X)HTML snippet to load. Invalid mark-up
 *   will be corrected on import.
 * @return
 *   A DOMDocument that represents the loaded (X)HTML snippet.
 */
function filter_dom_load($text) {
798
799
  // Ignore warnings during HTML soup loading.
  $dom_document = @DOMDocument::loadHTML('<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>' . $text . '</body></html>');
800

801
  return $dom_document;
802
803
804
805
806
807
808
809
810
811
}

/**
 * Converts a DOM object back to an HTML snippet.
 *
 * The function serializes the body part of a DOMDocument
 * back to an XHTML snippet.
 *
 * The resulting XHTML snippet will be properly formatted
 * to be compatible with HTML user agents.
812
 *
813
814
815
816
817
818
819
820
821
 * @param $dom_document
 *   A DOMDocument object to serialize, only the tags below
 *   the first <body> node will be converted.
 * @return
 *   A valid (X)HTML snippet, as a string.
 */
function filter_dom_serialize($dom_document) {
  $body_node = $dom_document->getElementsByTagName('body')->item(0);
  $body_content = '';
822
823
824
825
826
827
828
829
830

  foreach($body_node->getElementsByTagName('script') as $node) {
    filter_dom_serialize_escape_cdata_element($dom_document, $node);
  }

  foreach($body_node->getElementsByTagName('style') as $node) {
    filter_dom_serialize_escape_cdata_element($dom_document, $node, '/*', '*/');
  }
  
831
832
833
834
835
  foreach ($body_node->childNodes as $child_node) {
    $body_content .= $dom_document->saveXML($child_node);
  }
  return preg_replace('|<([^>]*)/>|i', '<$1 />', $body_content);
}
Dries's avatar
Dries committed
836

837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
/**
 * Adds comments around the <!CDATA section in a dom element.
 * 
 * DOMDocument::loadHTML in filter_dom_load() makes CDATA sections from the
 * contents of inline script and style tags.  This can cause HTML 4 browsers to
 * throw exceptions.
 * 
 * This function attempts to solve the problem by creating a DocumentFragment
 * and immitating the behavior in drupal_get_js(), commenting the CDATA tag.
 * 
 * @param $dom_document
 *   The DOMDocument containing the $dom_element.
 * @param $dom_element
 *   The element potentially containing a CDATA node.
 * @param $comment_start
 *   String to use as a comment start marker to escape the CDATA declaration.
 * @param $comment_end
 *   String to use as a comment end marker to escape the CDATA declaration.
 */
function filter_dom_serialize_escape_cdata_element($dom_document, $dom_element, $comment_start = '//', $comment_end = '') {
  foreach ($dom_element->childNodes as $node) {
    if (get_class($node) == 'DOMCdataSection') {
      // @see drupal_get_js().  This code is more or less duplicated there.
      $embed_prefix = "\n<!--{$comment_start}--><![CDATA[{$comment_start} ><!--{$comment_end}\n";
      $embed_suffix = "\n{$comment_start}--><!]]>{$comment_end}\n";
      $fragment = $dom_document->createDocumentFragment();
      $fragment->appendXML($embed_prefix . $node->data . $embed_suffix);
      $dom_element->appendChild($fragment);
      $dom_element->removeChild($node);
    }
  }
}

870
871
872
873
874
875
/**
 * Format a link to the more extensive filter tips.
 *
 * @ingroup themeable
 */
function theme_filter_tips_more_info() {
876
877
878
879
880
881
  return '<p>' . l(t('More information about text formats'), 'filter/tips') . '</p>';
}

/**
 * Format guidelines for a text format.
 *
882
883
884
885
 * @param $variables
 *   An associative array containing:
 *   - format: An object representing a text format.
 *
886
887
 * @ingroup themeable
 */
888
889
890
function theme_filter_guidelines($variables) {
  $format = $variables['format'];

891
  $name = isset($format->name) ? '<label>' . $format->name . ':</label>' : '';
892
  return '<div id="filter-guidelines-' . $format->format . '" class="filter-guidelines-item">' . $name . theme('filter_tips', array('tips' => _filter_tips($format->format, FALSE))) . '</div>';
893
894
}

Dries's avatar
Dries committed
895
896
897
898
899
900
/**
 * @name Standard filters
 * @{
 * Filters implemented by the filter.module.
 */

901
/**
902
 * Implements hook_filter_info().
903
 */
904
function filter_filter_info() {
905
906
  $filters['filter_html'] = array(
    'title' => t('Limit allowed HTML tags'),
907
908
    'process callback' => '_filter_html',
    'settings callback' => '_filter_html_settings',
909
910
911
912
913
    'default settings' => array(
      'allowed_html' => '<a> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd>',
      'filter_html_help' => 1,
      'filter_html_nofollow' => 0,
    ),
914
    'tips callback' => '_filter_html_tips',
915
    'weight' => -10,
916
  );
917
  $filters['filter_autop'] = array(
918
    'title' => t('Convert line breaks into HTML (i.e. <code>&lt;br&gt;</code> and <code>&lt;p&gt;</code>)'),
919
    'process callback' => '_filter_autop',
920
    'tips callback' => '_filter_autop_tips',
921
  );
922
923
  $filters['filter_url'] = array(
    'title' => t('Convert URLs into links'),
924
925
    'process callback' => '_filter_url',
    'settings callback' => '_filter_url_settings',
926
927
928
    'default settings' => array(
      'filter_url_length' => 72,
    ),
929
    'tips callback' => '_filter_url_tips',
930
  );
931
  $filters['filter_htmlcorrector'] = array(
932
    'title' =>  t('Correct faulty and chopped off HTML'),
933
    'process callback' => '_filter_htmlcorrector',
934
    'weight' => 10,
935
  );
936
  $filters['filter_html_escape'] = array(
937
    'title' => t('Display any HTML as plain text'),
938
    'process callback' => '_filter_html_escape',
939
    'tips callback' => '_filter_html_escape_tips',
940
    'weight' => -10,
941
942
  );
  return $filters;
Dries's avatar
Dries committed
943
944
945
}

/**
946
 * Settings callback for the HTML filter.
Dries's avatar
Dries committed
947
 */
948
function _filter_html_settings($form, &$form_state, $filter, $format, $defaults) {
949
  $settings['allowed_html'] = array(
Dries's avatar
-Patch    
Dries committed
950
951
    '#type' => 'textfield',
    '#title' => t('Allowed HTML tags'),
952
    '#default_value' => isset($filter->settings['allowed_html']) ? $filter->settings['allowed_html'] : $defaults['allowed_html'],
953
    '#maxlength' => 1024,
954
    '#description' => t('A list of HTML tags that can be used. JavaScript event attributes, JavaScript URLs, and CSS are always stripped.'),
Dries's avatar
-Patch    
Dries committed
955
  );
956
  $settings['filter_html_help'] = array(
Dries's avatar
-Patch    
Dries committed
957
    '#type' => 'checkbox',
958
    '#title' => t('Display basic HTML help in long filter tips'),
959
    '#default_value' => isset($filter->settings['filter_html_help']) ? $filter->settings['filter_html_help'] : $defaults['filter_html_help'],
Dries's avatar
-Patch    
Dries committed
960
  );
961
  $settings['filter_html_nofollow'] = array(
Dries's avatar
-Patch    
Dries committed
962
    '#type' => 'checkbox',
963
    '#title' => t('Add rel="nofollow" to all links'),
964
    '#default_value' => isset($filter->settings['filter_html_nofollow']) ? $filter->settings['filter_html_nofollow'] : $defaults['filter_html_nofollow'],
Dries's avatar
-Patch    
Dries committed
965
  );
966
  return $settings;
Dries's avatar
Dries committed
967
968
969
970
971
}

/**
 * HTML filter. Provides filtering of input into accepted HTML.
 */
972
973
function _filter_html($text, $filter) {
  $allowed_tags = preg_split('/\s+|<|>/', $filter->settings['allowed_html'], -1, PREG_SPLIT_NO_EMPTY);
974
  $text = filter_xss($text, $allowed_tags);
Dries's avatar
Dries committed
975

976
  if ($filter->settings['filter_html_nofollow']) {
977
978
979
980
981
982
    $html_dom = filter_dom_load($text);
    $links = $html_dom->getElementsByTagName('a');
    foreach($links as $link) {
      $link->setAttribute('rel', 'nofollow');
    }
    $text = filter_dom_serialize($html_dom);
Dries's avatar
Dries committed
983
984
985
986
987
  }

  return trim($text);
}

988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
/**
 * Filter tips callback for HTML filter.
 */
function _filter_html_tips($filter, $format, $long = FALSE) {
  global $base_url;

  if (!($allowed_html = $filter->settings['allowed_html'])) {
    return;
  }
  $output = t('Allowed HTML tags: @tags', array('@tags' => $allowed_html));
  if (!$long) {
    return $output;
  }

  $output = '<p>' . $output . '</p>';
  if (!$filter->settings['filter_html_help']) {
    return $output;
  }

  $output .= '<p>' . t('This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.') . '</p>';
  $output .= '<p>' . t('For more information see W3C\'s <a href="@html-specifications">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.', array('@html-specifications' => 'http://www.w3.org/TR/html/')) . '</p>';
  $tips = array(
    'a' => array(t('Anchors are used to make links to other pages.'), '<a href="' . $base_url . '">' . variable_get('site_name', 'Drupal') . '</a>'),
    'br' => array(t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
    'p' => array(t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>' . t('Paragraph one.') . '</p> <p>' . t('Paragraph two.') . '</p>'),
    'strong' => array(t('Strong'), '<strong>' . t('Strong') . '</strong>'),
    'em' => array(t('Emphasized'), '<em>' . t('Emphasized') . '</em>'),
    'cite' => array(t('Cited'), '<cite>' . t('Cited') . '</cite>'),
    'code' => array(t('Coded text used to show programming source code'), '<code>' . t('Coded') . '</code>'),
    'b' => array(t('Bolded'), '<b>' . t('Bolded') . '</b>'),
    'u' => array(t('Underlined'), '<u>' . t('Underlined') . '</u>'),
    'i' => array(t('Italicized'), '<i>' . t('Italicized') . '</i>'),
    'sup' => array(t('Superscripted'), t('<sup>Super</sup>scripted')),
    'sub' => array(t('Subscripted'), t('<sub>Sub</sub>scripted')),
    'pre' => array(t('Preformatted'), '<pre>' . t('Preformatted') . '</pre>'),
    'abbr' => array(t('Abbreviation'), t('<abbr title="Abbreviation">Abbrev.</abbr>')),
    'acronym' => array(t('Acronym'), t('<acronym title="Three-Letter Acronym">TLA</acronym>')),
    'blockquote' => array(t('Block quoted'), '<blockquote>' . t('Block quoted') . '</blockquote>'),
    'q' => array(t('Quoted inline'), '<q>' . t('Quoted inline') . '</q>'),
    // Assumes and describes tr, td, th.
    'table' => array(t('Table'), '<table> <tr><th>' . t('Table header') . '</th></tr> <tr><td>' . t('Table cell') . '</td></tr> </table>'),
    'tr' => NULL, 'td' => NULL, 'th' => NULL,
    'del' => array(t('Deleted'), '<del>' . t('Deleted') . '</del>'),
    'ins' => array(t('Inserted'), '<ins>' . t('Inserted') . '</ins>'),
     // Assumes and describes li.
    'ol' => array(t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>' . t('First item') . '</li> <li>' . t('Second item') . '</li> </ol>'),
    'ul' => array(t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>' . t('First item') . '</li> <li>' . t('Second item') . '</li> </ul>'),
    'li' => NULL,
    // Assumes and describes dt and dd.
    'dl' => array(t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>' . t('First term') . '</dt> <dd>' . t('First definition') . '</dd> <dt>' . t('Second term') . '</dt> <dd>' . t('Second definition') . '</dd> </dl>'),
    'dt' => NULL, 'dd' => NULL,
    'h1' => array(t('Heading'), '<h1>' . t('Title') . '</h1>'),
    'h2' => array(t('Heading'), '<h2>' . t('Subtitle') . '</h2>'),
    'h3' => array(t('Heading'), '<h3>' . t('Subtitle three') . '</h3>'),
    'h4' => array(t('Heading'), '<h4>' . t('Subtitle four') . '</h4>'),
    'h5' => array(t('Heading'), '<h5>' . t('Subtitle five') . '</h5>'),
    'h6' => array(t('Heading'), '<h6>' . t('Subtitle six') . '</h6>')
  );
  $header = array(t('Tag Description'), t('You Type'), t('You Get'));
  preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
  foreach ($out[1] as $tag) {
    if (array_key_exists($tag, $tips)) {
      if ($tips[$tag]) {
        $rows[] = array(
          array('data' => $tips[$tag][0], 'class' => array('description')),
          array('data' => '<code>' . check_plain($tips[$tag][1]) . '</code>', 'class' => array('type')),
          array('data' => $tips[$tag][1], 'class' => array('get'))
        );
      }
    }
    else {
      $rows[] = array(
        array('data' => t('No help provided for tag %tag.', array('%tag' => $tag)), 'class' => array('description'), 'colspan' => 3),
      );
    }
  }
  $output .= theme('table', array('header' => $header, 'rows' => $rows));

  $output .= '<p>' . t('Most unusual characters can be directly entered without any problems.') . '</p>';
  $output .= '<p>' . t('If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="@html-entities">entities</a> page. Some of the available characters include:', array('@html-entities' => 'http://www.w3.org/TR/html4/sgml/entities.html')) . '</p>';

  $entities = array(
    array(t('Ampersand'), '&amp;'),
    array(t('Greater than'), '&gt;'),
    array(t('Less than'), '&lt;'),
    array(t('Quotation mark'), '&quot;'),
  );
  $header = array(t('Character Description'), t('You Type'), t('You Get'));
  unset($rows);
  foreach ($entities as $entity) {
    $rows[] = array(
      array('data' => $entity[0], 'class' => array('description')),
      array('data' => '<code>' . check_plain($entity[1]) . '</code>', 'class' => array('type')),
      array('data' => $entity[1], 'class' => array('get'))
    );
  }
  $output .= theme('table', array('header' => $header, 'rows' => $rows));
  return $output;
}

Steven Wittens's avatar