filter.module 36 KB
Newer Older
1 2
<?php

3 4
/**
 * @file
5
 * Framework for handling the filtering of content.
6
 */
7

8
use Drupal\Component\Utility\Html;
9
use Drupal\Component\Utility\Unicode;
10
use Drupal\Component\Utility\String;
11
use Drupal\Component\Utility\Xss;
12
use Drupal\Core\Cache\Cache;
13
use Drupal\Core\Extension\Extension;
14
use Drupal\Core\Render\Element;
15
use Drupal\Core\Routing\RouteMatchInterface;
16
use Drupal\Core\Session\AccountInterface;
17
use Drupal\Core\Template\Attribute;
18
use Drupal\filter\Entity\FilterFormat;
19
use Drupal\filter\FilterFormatInterface;
20

21
/**
22
 * Implements hook_help().
23
 */
24
function filter_help($route_name, RouteMatchInterface $route_match) {
25 26
  switch ($route_name) {
    case 'help.page.filter':
27 28
      $output = '';
      $output .= '<h3>' . t('About') . '</h3>';
29
      $output .= '<p>' . t('The Filter module allows administrators to configure text formats. Text formats define the HTML tags, codes, and other input allowed in text entered in the site and they defend your web site against potentially damaging input from malicious users. A visual text editor can be associated with the text formats by using the <a href="!editor_help">Text Editor module</a>. For more information, see <a href="!filter_do">the online documentation for the Filter module</a>.', array('!filter_do' => 'https://drupal.org/documentation/modules/filter/','!editor_help' => \Drupal::url('help.page', array('name' => 'editor')))) . '</p>';
30 31
      $output .= '<h3>' . t('Uses') . '</h3>';
      $output .= '<dl>';
32
      $output .= '<dt>' . t('Managing text formats') . '</dt>';
33 34 35 36 37 38 39 40
      $output .= '<dd>' . t('You can create and edit text formats on the <a href="!formats">Text formats page</a> (if the Text Editor module is enabled, this page is named Text formats and editors). One text format is included by default: Plain text (which removes all HTML tags). Additional text formats may be created during installation. You can create a text format by clicking "<a href="!add_format">Add text format</a>".', array('!formats' => \Drupal::url('filter.admin_overview'),'!add_format' => \Drupal::url('filter.format_add'))) . '</dd>';
      $output .= '<dt>' . t('Assigning roles to text formats') . '</dt>';
      $output .= '<dd>' . t('You can define which users will be able to use each text format by selecting roles. To ensure security, anonymous and untrusted users should only have access to text formats that restrict them to either plain text or a safe set of HTML tags. This is because HTML tags can allow embedding malicious links or scripts in text. More trusted registered users may be granted permission to use less restrictive text formats in order to create rich text. <strong>Improper text format configuration is a security risk</strong>.') . '</dd>';
      $output .= '<dt>' . t('Selecting filters') . '</dt>';
      $output .= '<dd><p>' . t('Each text format uses filters that add, remove, or transform elements within user-entered text. For example, one filter removes unapproved HTML tags, while another transforms URLs into clickable links. Filters are applied in a specific order and do not change the actual content, but instead, modify it temporarily before it is displayed.') . '<p>';
      $output .= '<p>' . t('Each filter can have additional configuration options. For example, for the "Limit allowed HTML tags" filter you need to define the list of HTML tags that the filter leaves in the text.') . '</p></dd>';
      $output .= '<dt>' . t('Enabling text formats for field editing') . '</dt>';
      $output .= '<dd>' . t('In the field settings for a field that supports text formats (such as Long text), you can enable the use of text formats by choosing "Filtered text (user selects text format)"  under "Text processing". See the <a href="!field_help">Field module help</a> and the <a href="!field_ui_help">Field UI help</a> pages for general information on fields and how to create and manage them.', array('!field_help' => \Drupal::url('help.page', array('name' => 'field')), '!field_ui_help' => \Drupal::url('help.page', array('name' => 'field_ui')))) . '</dd>';
41
      $output .= '<dt>' . t('Choosing a text format') . '</dt>';
42
      $output .= '<dd>' . t('When creating or editing data in a field that has text formats enabled, users can select the format under the field from the Text format select list.') . '</dd>';
43
      $output .= '</dl>';
44
      return $output;
45

46
    case 'filter.admin_overview':
47
      $output = '<p>' . t('Text formats define the HTML tags, code, and other formatting that can be used when entering text. <strong>Improper text format configuration is a security risk</strong>. Learn more on the <a href="@filterhelp">Filter module help page</a>.', array('@filterhelp' => \Drupal::url('help.page', array('name' => 'filter')))) . '</p>';
48
      $output .= '<p>' . t('Text formats are presented on content editing pages in the order defined on this page. The first format available to a user will be selected by default.') . '</p>';
49
      return $output;
50

51
    case 'entity.filter_format.edit_form':
52
      $output = '<p>' . t('A text format contains filters that change the user input, for example stripping out malicious HTML or making URLs clickable. Filters are executed from top to bottom and the order is important, since one filter may prevent another filter from doing its job. For example, when URLs are converted into links before disallowed HTML tags are removed, all links may be removed. When this happens, the order of filters may need to be re-arranged.') . '</p>';
53
      return $output;
54 55 56
  }
}

57
/**
58
 * Implements hook_theme().
59 60 61 62
 */
function filter_theme() {
  return array(
    'filter_tips' => array(
63
      'variables' => array('tips' => NULL, 'long' => FALSE),
64
    ),
65
    'text_format_wrapper' => array(
66 67 68 69 70
      'variables' => array(
        'children' => NULL,
        'description' => NULL,
        'attributes' => array(),
      ),
71
    ),
72
    'filter_guidelines' => array(
73
      'variables' => array('format' => NULL),
74
    ),
75 76 77 78 79
    'filter_caption' => array(
      'variables' => array(
        'node' => NULL,
        'tag' => NULL,
        'caption' => NULL,
80
        'classes' => NULL,
81 82
      ),
    )
83 84 85
  );
}

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
/**
 * Implements hook_system_info_alter().
 *
 * Prevents uninstallation of modules that provide filter plugins that are being
 * used in a filter format.
 */
function filter_system_info_alter(&$info, Extension $file, $type) {
  // It is not safe to call filter_formats() during maintenance mode.
  if ($type == 'module' && !defined('MAINTENANCE_MODE')) {
    // Get filter plugins supplied by this module.
    $filter_plugins = array_filter(\Drupal::service('plugin.manager.filter')->getDefinitions(), function ($definition) use ($file) {
      return $definition['provider'] == $file->getName();
    });
    if (!empty($filter_plugins)) {
      $used_in = [];
      // Find out if any filter formats have the plugin enabled.
      foreach (filter_formats() as $filter_format) {
103 104 105 106 107 108 109 110
        $filters = $filter_format->filters();
        // Typically, all formats will contain settings for all filter plugins,
        // even if they are disabled. However, if a module which provides filter
        // plugins is being enabled right now, that won't be the case, so we
        // still check to see if this format has this filter before we check
        // the filter status.
       foreach ($filter_plugins as $filter_plugin) {
          if ($filters->has($filter_plugin['id']) && $filters->get($filter_plugin['id'])->status) {
111 112 113 114 115 116 117 118 119 120 121 122 123
            $used_in[] = $filter_format->label();
            $info['required'] = TRUE;
            break;
          }
        }
      }
      if (!empty($used_in)) {
        $info['explanation'] = t('Provides a filter plugin that is in use in the following filter formats: %formats', array('%formats' => implode(', ', $used_in)));
      }
    }
  }
}

124
/**
125
 * Retrieves a list of enabled text formats, ordered by weight.
126
 *
127
 * @param \Drupal\Core\Session\AccountInterface|null $account
128
 *   (optional) If provided, only those formats that are allowed for this user
129 130
 *   account will be returned. All enabled formats will be returned otherwise.
 *   Defaults to NULL.
131
 *
132
 * @return \Drupal\filter\FilterFormatInterface[]
133 134 135 136
 *   An array of text format objects, keyed by the format ID and ordered by
 *   weight.
 *
 * @see filter_formats_reset()
137
 */
138
function filter_formats(AccountInterface $account = NULL) {
139
  $formats = &drupal_static(__FUNCTION__, array());
140

141
  // All available formats are cached for performance.
142
  if (!isset($formats['all'])) {
143
    $language_interface = \Drupal::languageManager()->getCurrentLanguage();
144
    if ($cache = \Drupal::cache()->get("filter_formats:{$language_interface->getId()}")) {
145 146 147
      $formats['all'] = $cache->data;
    }
    else {
148
      $formats['all'] = \Drupal::entityManager()->getStorage('filter_format')->loadByProperties(array('status' => TRUE));
149
      uasort($formats['all'], 'Drupal\Core\Config\Entity\ConfigEntityBase::sort');
150
      \Drupal::cache()->set("filter_formats:{$language_interface->getId()}", $formats['all'], Cache::PERMANENT, \Drupal::entityManager()->getDefinition('filter_format')->getListCacheTags());
151
    }
152
  }
153

154 155 156 157 158
  // If no user was specified, return all formats.
  if (!isset($account)) {
    return $formats['all'];
  }

159
  // Build a list of user-specific formats.
160 161 162
  $account_id = $account->id();
  if (!isset($formats['user'][$account_id])) {
    $formats['user'][$account_id] = array();
163
    foreach ($formats['all'] as $format) {
164
      if ($format->access('use', $account)) {
165
        $formats['user'][$account_id][$format->id()] = $format;
166 167
      }
    }
168 169
  }

170
  return $formats['user'][$account_id];
171
}
172

173
/**
174
 * Resets the text format caches.
175 176 177 178 179 180 181 182 183 184
 *
 * @see filter_formats()
 */
function filter_formats_reset() {
  drupal_static_reset('filter_formats');
}

/**
 * Retrieves a list of roles that are allowed to use a given text format.
 *
185
 * @param \Drupal\filter\FilterFormatInterface $format
186
 *   An object representing the text format.
187
 *
188
 * @return array
189 190
 *   An array of role names, keyed by role ID.
 */
191
function filter_get_roles_by_format(FilterFormatInterface $format) {
192
  // Handle the fallback format upfront (all roles have access to this format).
193
  if ($format->isFallbackFormat()) {
194
    return user_role_names();
195
  }
196
  // Do not list any roles if the permission does not exist.
197
  $permission = $format->getPermissionName();
198
  return !empty($permission) ? user_role_names(FALSE, $permission) : array();
199 200 201 202 203 204 205
}

/**
 * Retrieves a list of text formats that are allowed for a given role.
 *
 * @param $rid
 *   The user role ID to retrieve text formats for.
206
 *
207
 * @return \Drupal\filter\FilterFormatInterface[]
208 209 210 211 212 213 214 215
 *   An array of text format objects that are allowed for the role, keyed by
 *   the text format ID and ordered by weight.
 */
function filter_get_formats_by_role($rid) {
  $formats = array();
  foreach (filter_formats() as $format) {
    $roles = filter_get_roles_by_format($format);
    if (isset($roles[$rid])) {
216
      $formats[$format->id()] = $format;
217
    }
218
  }
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
  return $formats;
}

/**
 * Returns the ID of the default text format for a particular user.
 *
 * The default text format is the first available format that the user is
 * allowed to access, when the formats are ordered by weight. It should
 * generally be used as a default choice when presenting the user with a list
 * of possible text formats (for example, in a node creation form).
 *
 * Conversely, when existing content that does not have an assigned text format
 * needs to be filtered for display, the default text format is the wrong
 * choice, because it is not guaranteed to be consistent from user to user, and
 * some trusted users may have an unsafe text format set by default, which
 * should not be used on text of unknown origin. Instead, the fallback format
 * returned by filter_fallback_format() should be used, since that is intended
 * to be a safe, consistent format that is always available to all users.
 *
238
 * @param \Drupal\Core\Session\AccountInterface|null $account
239
 *   (optional) The user account to check. Defaults to the currently logged-in
240
 *   user. Defaults to NULL.
241
 *
242
 * @return string
243 244 245 246
 *   The ID of the user's default text format.
 *
 * @see filter_fallback_format()
 */
247
function filter_default_format(AccountInterface $account = NULL) {
248
  if (!isset($account)) {
249
    $account = \Drupal::currentUser();
250 251 252
  }
  // Get a list of formats for this user, ordered by weight. The first one
  // available is the user's default format.
253 254
  $formats = filter_formats($account);
  $format = reset($formats);
255
  return $format->id();
256 257 258 259
}

/**
 * Returns the ID of the fallback text format that all users have access to.
260 261 262
 *
 * The fallback text format is a regular text format in every respect, except
 * it does not participate in the filter permission system and cannot be
263
 * disabled. It needs to exist because any user who has permission to create
264 265 266 267 268 269 270
 * formatted content must always have at least one text format they can use.
 *
 * Because the fallback format is available to all users, it should always be
 * configured securely. For example, when the Filter module is installed, this
 * format is initialized to output plain text. Installation profiles and site
 * administrators have the freedom to configure it further.
 *
271 272 273 274 275 276
 * Note that the fallback format is completely distinct from the default format,
 * which differs per user and is simply the first format which that user has
 * access to. The default and fallback formats are only guaranteed to be the
 * same for users who do not have access to any other format; otherwise, the
 * fallback format's weight determines its placement with respect to the user's
 * other formats.
277
 *
278 279
 * Any modules implementing a format deletion functionality must not delete this
 * format.
280
 *
281 282 283
 * @return
 *   The ID of the fallback text format.
 *
284
 * @see hook_filter_format_disable()
285
 * @see filter_default_format()
286 287 288
 */
function filter_fallback_format() {
  // This variable is automatically set in the database for all installations
289
  // of Drupal. In the event that it gets disabled or deleted somehow, there
290
  // is no safe default to return, since we do not want to risk making an
291 292 293
  // existing (and potentially unsafe) text format on the site automatically
  // available to all users. Returning NULL at least guarantees that this
  // cannot happen.
294
  return \Drupal::config('filter.settings')->get('fallback_format');
295 296
}

297
/**
298
 * Runs all the enabled filters on a piece of text.
299
 *
300
 * Note: Because filters can inject JavaScript or execute PHP code, security is
301
 * vital here. When a user supplies a text format, you should validate it using
302
 * $format->access() before accepting/using it. This is normally done in the
303 304
 * validation stage of the Form API. You should for example never make a
 * preview of content in a disallowed format.
305
 *
306 307 308 309 310 311 312 313 314
 * Note: this function should only be used when filtering text for use elsewhere
 * than on a rendered HTML page. If this is part of a HTML page, then a
 * renderable array with a #type 'processed_text' element should be used instead
 * of this, because that will allow cache tags to be set and bubbled up, assets
 * to be loaded and #post_render_cache callbacks to be associated. In other
 * words: if you are presenting the filtered text in a HTML page, the only way
 * this will be presented correctly, is by using the 'processed_text' element.
 *
 * @param string $text
315
 *   The text to be filtered.
316
 * @param string|null $format_id
317 318
 *   (optional) The machine name of the filter format to be used to filter the
 *   text. Defaults to the fallback format. See filter_fallback_format().
319
 * @param string $langcode
320
 *   (optional) The language code of the text to be filtered, e.g. 'en' for
321
 *   English. This allows filters to be language-aware so language-specific
322
 *   text replacement can be implemented. Defaults to an empty string.
323 324 325 326
 * @param array $filter_types_to_skip
 *   (optional) An array of filter types to skip, or an empty array (default)
 *   to skip no filter types. All of the format's filters will be applied,
 *   except for filters of the types that are marked to be skipped.
327 328
 *   FilterInterface::TYPE_HTML_RESTRICTOR is the only type that cannot be
 *   skipped.
329
 *
330
 * @return string
331 332
 *   The filtered text.
 *
333 334
 * @see filter_process_text()
 *
335
 * @ingroup sanitization
336
 */
337 338 339 340 341 342 343 344
function check_markup($text, $format_id = NULL, $langcode = '', $filter_types_to_skip = array()) {
  $build = array(
    '#type' => 'processed_text',
    '#text' => $text,
    '#format' => $format_id,
    '#filter_types_to_skip' => $filter_types_to_skip,
    '#langcode' => $langcode,
  );
345
  return \Drupal::service('renderer')->renderPlain($build);
Dries's avatar
Dries committed
346 347
}

348
/**
349
 * Render API callback: Hides the field value of 'text_format' elements.
350
 *
351 352 353
 * To not break form processing and previews if a user does not have access to
 * a stored text format, the expanded form elements in filter_process_format()
 * are forced to take over the stored #default_values for 'value' and 'format'.
354 355 356 357 358 359 360 361 362 363
 * However, to prevent the unfiltered, original #value from being displayed to
 * the user, we replace it with a friendly notice here.
 *
 * @see filter_process_format()
 */
function filter_form_access_denied($element) {
  $element['#value'] = t('This field has been disabled because you do not have sufficient permissions to edit it.');
  return $element;
}

Dries's avatar
Dries committed
364
/**
365 366
 * Retrieves the filter tips.
 *
367
 * @param string $format_id
368 369
 *   The ID of the text format for which to retrieve tips, or -1 to return tips
 *   for all formats accessible to the current user.
370
 * @param bool $long
371 372 373
 *   (optional) Boolean indicating whether the long form of tips should be
 *   returned. Defaults to FALSE.
 *
374
 * @return array
375 376 377 378
 *   An associative array of filtering tips, keyed by filter name. Each
 *   filtering tip is an associative array with elements:
 *   - tip: Tip text.
 *   - id: Filter ID.
Dries's avatar
Dries committed
379
 */
380
function _filter_tips($format_id, $long = FALSE) {
381
  $formats = filter_formats(\Drupal::currentUser());
Dries's avatar
Dries committed
382 383 384

  $tips = array();

385
  // If only listing one format, extract it from the $formats array.
386 387
  if ($format_id != -1) {
    $formats = array($formats[$format_id]);
388 389
  }

Dries's avatar
Dries committed
390
  foreach ($formats as $format) {
391
    foreach ($format->filters() as $name => $filter) {
392 393
      if ($filter->status) {
        $tip = $filter->tips($long);
394
        if (isset($tip)) {
395
          $tips[$format->label()][$name] = array('tip' => $tip, 'id' => $name);
396
        }
Dries's avatar
Dries committed
397 398 399 400 401 402 403
      }
    }
  }

  return $tips;
}

404
/**
405 406 407
 * Prepares variables for text format guideline templates.
 *
 * Default template: filter-guidelines.html.twig.
408
 *
409
 * @param array $variables
410 411
 *   An associative array containing:
 *   - format: An object representing a text format.
412
 */
413
function template_preprocess_filter_guidelines(&$variables) {
414
  $format = $variables['format'];
415
  $variables['tips'] = array(
416
    '#theme' => 'filter_tips',
417
    '#tips' => _filter_tips($format->id(), FALSE),
418
  );
419 420
}

421 422 423 424 425 426 427 428 429 430
/**
 * Prepares variables for text format wrapper templates.
 *
 * Default template: text-format-wrapper.html.twig.
 *
 * @param array $variables
 *   An associative array containing:
 *   - attributes: An associative array containing properties of the element.
 */
function template_preprocess_text_format_wrapper(&$variables) {
431
  $variables['aria_description'] = FALSE;
432 433
  // Add element class and id for screen readers.
  if (isset($variables['attributes']['aria-describedby'])) {
434
    $variables['aria_description'] = TRUE;
435 436 437 438 439 440
    $variables['attributes']['id'] = $variables['attributes']['aria-describedby'];
    // Remove aria-describedby attribute as it shouldn't be visible here.
    unset($variables['attributes']['aria-describedby']);
  }
}

441 442 443 444 445 446 447 448 449 450 451 452 453 454
/**
 * Prepares variables for filter tips templates.
 *
 * Default template: filter-tips.html.twig.
 *
 * @param array $variables
 *   An associative array containing:
 *   - tips: An array containing descriptions and a CSS ID in the form of
 *     'module-name/filter-id' (only used when $long is TRUE) for each
 *     filter in one or more text formats. Example:
 *     @code
 *       array(
 *         'Full HTML' => array(
 *           0 => array(
455
 *             'tip' => 'Web page addresses and email addresses turn into links automatically.',
456 457 458 459 460 461 462 463 464 465 466 467 468 469 470
 *             'id' => 'filter/2',
 *           ),
 *         ),
 *       );
 *     @endcode
 *   - long: (optional) Whether the passed-in filter tips contain extended
 *     explanations, i.e. intended to be output on the path 'filter/tips'
 *     (TRUE), or are in a short format, i.e. suitable to be displayed below a
 *     form element. Defaults to FALSE.
 */
function template_preprocess_filter_tips(&$variables) {
  $tips = $variables['tips'];

  foreach ($variables['tips'] as $name => $tiplist) {
    foreach ($tiplist as $tip_key => $tip) {
471
      $tiplist[$tip_key]['attributes'] = new Attribute();
472 473 474
    }

    $variables['tips'][$name] = array(
475
      'attributes' => new Attribute(),
476 477 478 479 480 481
      'name' => String::checkPlain($name),
      'list' => $tiplist,
    );
  }

  $variables['multiple'] = count($tips) > 1;
482 483
}

Dries's avatar
Dries committed
484
/**
485
 * @defgroup standard_filters Standard filters
Dries's avatar
Dries committed
486
 * @{
487
 * Filters implemented by the Filter module.
Dries's avatar
Dries committed
488 489 490
 */

/**
491
 * Provides filtering of input into accepted HTML.
Dries's avatar
Dries committed
492
 */
493 494
function _filter_html($text, $filter) {
  $allowed_tags = preg_split('/\s+|<|>/', $filter->settings['allowed_html'], -1, PREG_SPLIT_NO_EMPTY);
495
  $text = Xss::filter($text, $allowed_tags);
Dries's avatar
Dries committed
496

497
  if ($filter->settings['filter_html_nofollow']) {
498
    $html_dom = Html::load($text);
499
    $links = $html_dom->getElementsByTagName('a');
500
    foreach ($links as $link) {
501 502
      $link->setAttribute('rel', 'nofollow');
    }
503
    $text = Html::serialize($html_dom);
Dries's avatar
Dries committed
504 505 506 507 508
  }

  return trim($text);
}

509
/**
510
 * Converts text into hyperlinks automatically.
511 512 513
 *
 * This filter identifies and makes clickable three types of "links".
 * - URLs like http://example.com.
514
 * - Email addresses like name@example.com.
515 516
 * - Web addresses without the "http://" protocol defined, like
 *   www.example.com.
517 518
 * Each type must be processed separately, as there is no one regular
 * expression that could possibly match all of the cases in one pass.
519
 */
520
function _filter_url($text, $filter) {
521 522 523 524
  // Tags to skip and not recurse into.
  $ignore_tags = 'a|script|style|code|pre';

  // Pass length to regexp callback.
525
  _filter_url_trim(NULL, $filter->settings['filter_url_length']);
526

527 528 529 530 531 532 533 534 535 536 537 538
  // Create an array which contains the regexps for each type of link.
  // The key to the regexp is the name of a function that is used as
  // callback function to process matches of the regexp. The callback function
  // is to return the replacement for the match. The array is used and
  // matching/replacement done below inside some loops.
  $tasks = array();

  // Prepare protocols pattern for absolute URLs.
  // check_url() will replace any bad protocols with HTTP, so we need to support
  // the identical list. While '//' is technically optional for MAILTO only,
  // we cannot cleanly differ between protocols here without hard-coding MAILTO,
  // so '//' is optional for all protocols.
539
  // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol()
540
  $protocols = \Drupal::config('system.filter')->get('protocols');
541 542
  $protocols = implode(':(?://)?|', $protocols) . ':(?://)?';

543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560
  $valid_url_path_characters = "[\p{L}\p{M}\p{N}!\*\';:=\+,\.\$\/%#\[\]\-_~@&]";

  // Allow URL paths to contain balanced parens
  // 1. Used in Wikipedia URLs like /Primer_(film)
  // 2. Used in IIS sessions like /S(dfd346)/
  $valid_url_balanced_parens = '\('. $valid_url_path_characters . '+\)';

  // Valid end-of-path chracters (so /foo. does not gobble the period).
  // 1. Allow =&# for empty URL parameters and other URL-join artifacts
  $valid_url_ending_characters = '[\p{L}\p{M}\p{N}:_+~#=/]|(?:' . $valid_url_balanced_parens . ')';

  $valid_url_query_chars = '[a-z0-9!?\*\'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]';
  $valid_url_query_ending_chars = '[a-z0-9_&=#\/]';

  //full path
  //and allow @ in a url, but only in the middle. Catch things like http://example.com/@user/
  $valid_url_path = '(?:(?:'.$valid_url_path_characters . '*(?:'.$valid_url_balanced_parens .$valid_url_path_characters . '*)*'. $valid_url_ending_characters . ')|(?:@' . $valid_url_path_characters . '+\/))';

561 562 563 564
  // Prepare domain name pattern.
  // The ICANN seems to be on track towards accepting more diverse top level
  // domains, so this pattern has been "future-proofed" to allow for TLDs
  // of length 2-64.
565
  $domain = '(?:[\p{L}\p{M}\p{N}._+-]+\.)?[\p{L}\p{M}]{2,64}\b';
566
  $ip = '(?:[0-9]{1,3}\.){3}[0-9]{1,3}';
567 568
  $auth = '[\p{L}\p{M}\p{N}:%_+*~#?&=.,/;-]+@';
  $trail = '('.$valid_url_path.'*)?(\\?'.$valid_url_query_chars .'*'.$valid_url_query_ending_chars.')?';
569 570

  // Match absolute URLs.
571
  $url_pattern = "(?:$auth)?(?:$domain|$ip)/?(?:$trail)?";
572
  $pattern = "`((?:$protocols)(?:$url_pattern))`u";
573
  $tasks['_filter_url_parse_full_links'] = $pattern;
574

575
  // Match email addresses.
576 577
  $url_pattern = "[\p{L}\p{M}\p{N}._-]{1,254}@(?:$domain)";
  $pattern = "`($url_pattern)`u";
578 579 580 581
  $tasks['_filter_url_parse_email_links'] = $pattern;

  // Match www domains.
  $url_pattern = "www\.(?:$domain)/?(?:$trail)?";
582
  $pattern = "`($url_pattern)`u";
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
  $tasks['_filter_url_parse_partial_links'] = $pattern;

  // Each type of URL needs to be processed separately. The text is joined and
  // re-split after each task, since all injected HTML tags must be correctly
  // protected before the next task.
  foreach ($tasks as $task => $pattern) {
    // HTML comments need to be handled separately, as they may contain HTML
    // markup, especially a '>'. Therefore, remove all comment contents and add
    // them back later.
    _filter_url_escape_comments('', TRUE);
    $text = preg_replace_callback('`<!--(.*?)-->`s', '_filter_url_escape_comments', $text);

    // Split at all tags; ensures that no tags or attributes are processed.
    $chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
    // PHP ensures that the array consists of alternating delimiters and
    // literals, and begins and ends with a literal (inserting NULL as
    // required). Therefore, the first chunk is always text:
    $chunk_type = 'text';
    // If a tag of $ignore_tags is found, it is stored in $open_tag and only
    // removed when the closing tag is found. Until the closing tag is found,
    // no replacements are made.
    $open_tag = '';

    for ($i = 0; $i < count($chunks); $i++) {
      if ($chunk_type == 'text') {
        // Only process this text if there are no unclosed $ignore_tags.
        if ($open_tag == '') {
          // If there is a match, inject a link into this chunk via the callback
          // function contained in $task.
          $chunks[$i] = preg_replace_callback($pattern, $task, $chunks[$i]);
        }
        // Text chunk is done, so next chunk must be a tag.
        $chunk_type = 'tag';
      }
      else {
        // Only process this tag if there are no unclosed $ignore_tags.
        if ($open_tag == '') {
          // Check whether this tag is contained in $ignore_tags.
          if (preg_match("`<($ignore_tags)(?:\s|>)`i", $chunks[$i], $matches)) {
            $open_tag = $matches[1];
          }
        }
        // Otherwise, check whether this is the closing tag for $open_tag.
        else {
          if (preg_match("`<\/$open_tag>`i", $chunks[$i], $matches)) {
            $open_tag = '';
          }
        }
        // Tag chunk is done, so next chunk must be text.
        $chunk_type = 'text';
      }
    }
635

636 637 638 639 640
    $text = implode($chunks);
    // Revert back to the original comment contents
    _filter_url_escape_comments('', FALSE);
    $text = preg_replace_callback('`<!--(.*?)-->`', '_filter_url_escape_comments', $text);
  }
641 642 643 644 645

  return $text;
}

/**
646 647 648
 * Makes links out of absolute URLs.
 *
 * Callback for preg_replace_callback() within _filter_url().
649 650
 */
function _filter_url_parse_full_links($match) {
651 652 653
  // The $i:th parenthesis in the regexp contains the URL.
  $i = 1;

654
  $match[$i] = String::decodeEntities($match[$i]);
655 656
  $caption = String::checkPlain(_filter_url_trim($match[$i]));
  $match[$i] = String::checkPlain($match[$i]);
657
  return '<a href="' . $match[$i] . '">' . $caption . '</a>';
658 659 660
}

/**
661
 * Makes links out of email addresses.
662 663
 *
 * Callback for preg_replace_callback() within _filter_url().
664 665 666 667 668
 */
function _filter_url_parse_email_links($match) {
  // The $i:th parenthesis in the regexp contains the URL.
  $i = 0;

669
  $match[$i] = String::decodeEntities($match[$i]);
670 671
  $caption = String::checkPlain(_filter_url_trim($match[$i]));
  $match[$i] = String::checkPlain($match[$i]);
672
  return '<a href="mailto:' . $match[$i] . '">' . $caption . '</a>';
673 674 675
}

/**
676 677 678
 * Makes links out of domain names starting with "www."
 *
 * Callback for preg_replace_callback() within _filter_url().
679 680
 */
function _filter_url_parse_partial_links($match) {
681 682 683
  // The $i:th parenthesis in the regexp contains the URL.
  $i = 1;

684
  $match[$i] = String::decodeEntities($match[$i]);
685 686
  $caption = String::checkPlain(_filter_url_trim($match[$i]));
  $match[$i] = String::checkPlain($match[$i]);
687
  return '<a href="http://' . $match[$i] . '">' . $caption . '</a>';
688 689 690
}

/**
691 692 693
 * Escapes the contents of HTML comments.
 *
 * Callback for preg_replace_callback() within _filter_url().
694 695 696 697 698
 *
 * @param $match
 *   An array containing matches to replace from preg_replace_callback(),
 *   whereas $match[1] is expected to contain the content to be filtered.
 * @param $escape
699 700 701
 *   (optional) A Boolean indicating whether to escape (TRUE) or unescape
 *   comments (FALSE). Defaults to NULL, indicating neither. If TRUE, statically
 *   cached $comments are reset.
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
 */
function _filter_url_escape_comments($match, $escape = NULL) {
  static $mode, $comments = array();

  if (isset($escape)) {
    $mode = $escape;
    if ($escape){
      $comments = array();
    }
    return;
  }

  // Replace all HTML coments with a '<!-- [hash] -->' placeholder.
  if ($mode) {
    $content = $match[1];
717
    $hash = hash('sha256', $content);
718 719 720 721 722 723 724 725 726 727
    $comments[$hash] = $content;
    return "<!-- $hash -->";
  }
  // Or replace placeholders with actual comment contents.
  else {
    $hash = $match[1];
    $hash = trim($hash);
    $content = $comments[$hash];
    return "<!--$content-->";
  }
728 729 730
}

/**
731
 * Shortens long URLs to http://www.example.com/long/url…
732 733 734 735 736 737 738
 */
function _filter_url_trim($text, $length = NULL) {
  static $_length;
  if ($length !== NULL) {
    $_length = $length;
  }

739 740
  if (isset($_length)) {
    $text = Unicode::truncate($text, $_length, FALSE, TRUE);
741 742 743 744 745
  }

  return $text;
}

Dries's avatar
Dries committed
746
/**
747 748
 * Converts line breaks into <p> and <br> in an intelligent fashion.
 *
Dries's avatar
Dries committed
749 750 751
 * Based on: http://photomatt.net/scripts/autop
 */
function _filter_autop($text) {
752
  // All block level tags
753
  $block = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|input|p|h[1-6]|fieldset|legend|hr|article|aside|details|figcaption|figure|footer|header|hgroup|menu|nav|section|summary)';
Dries's avatar
Dries committed
754

755 756 757 758
  // Split at opening and closing PRE, SCRIPT, STYLE, OBJECT, IFRAME tags
  // and comments. We don't apply any processing to the contents of these tags
  // to avoid messing up code. We look for matched pairs and allow basic
  // nesting. For example:
Dries's avatar
Dries committed
759
  // "processed <pre> ignored <script> ignored </script> ignored </pre> processed"
760
  $chunks = preg_split('@(<!--.*?-->|</?(?:pre|script|style|object|iframe|!--)[^>]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
Dries's avatar
Dries committed
761 762
  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting NULL as required).
763
  $ignore = FALSE;
Dries's avatar
Dries committed
764 765 766 767
  $ignoretag = '';
  $output = '';
  foreach ($chunks as $i => $chunk) {
    if ($i % 2) {
768
      $comment = (substr($chunk, 0, 4) == '<!--');
769 770 771 772 773 774 775
      if ($comment) {
        // Nothing to do, this is a comment.
        $output .= $chunk;
        continue;
      }
      // Opening or closing tag?
      $open = ($chunk[1] != '/');
776
      list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2);
Dries's avatar
Dries committed
777 778
      if (!$ignore) {
        if ($open) {
779
          $ignore = TRUE;
Dries's avatar
Dries committed
780 781 782 783
          $ignoretag = $tag;
        }
      }
      // Only allow a matching tag to close it.
784
      elseif (!$open && $ignoretag == $tag) {
785
        $ignore = FALSE;
Dries's avatar
Dries committed
786 787 788
        $ignoretag = '';
      }
    }
789
    elseif (!$ignore) {
790
      $chunk = preg_replace('|\n*$|', '', $chunk) . "\n\n"; // just to make things a little easier, pad the end
Dries's avatar
Dries committed
791
      $chunk = preg_replace('|<br />\s*<br />|', "\n\n", $chunk);
792 793
      $chunk = preg_replace('!(<' . $block . '[^>]*>)!', "\n$1", $chunk); // Space things out a little
      $chunk = preg_replace('!(</' . $block . '>)!', "$1\n\n", $chunk); // Space things out a little
Dries's avatar
Dries committed
794
      $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates
795 796
      $chunk = preg_replace('/^\n|\n\s*\n$/', '', $chunk);
      $chunk = '<p>' . preg_replace('/\n\s*\n\n?(.)/', "</p>\n<p>$1", $chunk) . "</p>\n"; // make paragraphs, including one at the end
Dries's avatar
Dries committed
797 798 799
      $chunk = preg_replace("|<p>(<li.+?)</p>|", "$1", $chunk); // problem with nested lists
      $chunk = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $chunk);
      $chunk = str_replace('</blockquote></p>', '</p></blockquote>', $chunk);
800
      $chunk = preg_replace('|<p>\s*</p>\n?|', '', $chunk); // under certain strange conditions it could create a P of entirely whitespace
801 802
      $chunk = preg_replace('!<p>\s*(</?' . $block . '[^>]*>)!', "$1", $chunk);
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*</p>!', "$1", $chunk);
Dries's avatar
Dries committed
803
      $chunk = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $chunk); // make line breaks
804
      $chunk = preg_replace('!(</?' . $block . '[^>]*>)\s*<br />!', "$1", $chunk);
805
      $chunk = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)>)!', '$1', $chunk);
806
      $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&amp;$1', $chunk);
Dries's avatar
Dries committed
807 808 809 810 811 812
    }
    $output .= $chunk;
  }
  return $output;
}

813 814 815 816
/**
 * Escapes all HTML tags, so they will be visible instead of being effective.
 */
function _filter_html_escape($text) {
817
  return trim(String::checkPlain($text));
818 819
}

820 821 822 823 824 825
/**
 * Process callback for local image filter.
 */
function _filter_html_image_secure_process($text) {
  // Find the path (e.g. '/') to Drupal root.
  $base_path = base_path();
826
  $base_path_length = Unicode::strlen($base_path);
827 828

  // Find the directory on the server where index.php resides.
829
  $local_dir = \Drupal::root() . '/';
830

831
  $html_dom = Html::load($text);
832 833 834
  $images = $html_dom->getElementsByTagName('img');
  foreach ($images as $image) {
    $src = $image->getAttribute('src');
835 836 837
    // Transform absolute image URLs to relative image URLs: prevent problems on
    // multisite set-ups and prevent mixed content errors.
    $image->setAttribute('src', file_url_transform_relative($src));
838 839 840 841

    // Verify that $src starts with $base_path.
    // This also ensures that external images cannot be referenced.
    $src = $image->getAttribute('src');
842
    if (Unicode::substr($src, 0, $base_path_length) === $base_path) {
843 844 845
      // Remove the $base_path to get the path relative to the Drupal root.
      // Ensure the path refers to an actual image by prefixing the image source
      // with the Drupal root and running getimagesize() on it.
846
      $local_image_path = $local_dir . Unicode::substr($src, $base_path_length);
847
      $local_image_path = rawurldecode($local_image_path);
848 849 850 851 852
      if (@getimagesize($local_image_path)) {
        // The image has the right path. Erroneous images are dealt with below.
        continue;
      }
    }
853 854 855
    // Allow modules and themes to replace an invalid image with an error
    // indicator. See filter_filter_secure_image_alter().
    \Drupal::moduleHandler()->alter('filter_secure_image', $image);
856
  }
857
  $text = Html::serialize($html_dom);
858 859 860 861
  return $text;
}

/**
862 863
 * Implements hook_filter_secure_image_alter().
 *
864 865 866 867 868 869 870
 * Formats an image DOM element that has an invalid source.
 *
 * @param DOMElement $image
 *   An IMG node to format, parsed from the filtered text.
 *
 * @see _filter_html_image_secure_process()
 */
871
function filter_filter_secure_image_alter(&$image) {
872
  // Turn an invalid image into an error indicator.
873
  $image->setAttribute('src', base_path() . 'core/misc/icons/ea2800/error.svg');
874 875
  $image->setAttribute('alt', t('Image removed.'));
  $image->setAttribute('title', t('This image has been removed. For security reasons, only images from the local domain are allowed.'));
876 877
  $image->setAttribute('height', '16');
  $image->setAttribute('width',  '16');
878 879 880 881 882 883 884

  // Add a CSS class to aid in styling.
  $class = ($image->getAttribute('class') ? trim($image->getAttribute('class')) . ' ' : '');
  $class .= 'filter-image-invalid';
  $image->setAttribute('class', $class);
}

Dries's avatar
Dries committed
885
/**
886
 * @} End of "defgroup standard_filters".
Dries's avatar
Dries committed
887
 */