FilterUnitTest.php 50.3 KB
Newer Older
1 2 3 4
<?php

/**
 * @file
5
 * Contains \Drupal\filter\Tests\FilterUnitTest.
6 7 8 9
 */

namespace Drupal\filter\Tests;

10
use Drupal\Component\Utility\Html;
11
use Drupal\Core\Language\Language;
12
use Drupal\Core\Render\RenderContext;
13 14
use Drupal\editor\EditorXssFilter\Standard;
use Drupal\filter\Entity\FilterFormat;
15
use Drupal\filter\FilterPluginCollection;
16
use Drupal\simpletest\KernelTestBase;
17 18

/**
19 20 21
 * Tests Filter module filters individually.
 *
 * @group filter
22
 */
23
class FilterUnitTest extends KernelTestBase {
24 25 26 27 28 29

  /**
   * Modules to enable.
   *
   * @var array
   */
30
  public static $modules = array('system', 'filter');
31

32 33 34 35 36
  /**
   * @var \Drupal\filter\Plugin\FilterInterface[]
   */
  protected $filters;

37 38
  protected function setUp() {
    parent::setUp();
39
    $this->installConfig(array('system'));
40 41

    $manager = $this->container->get('plugin.manager.filter');
42
    $bag = new FilterPluginCollection($manager, array());
43 44 45
    $this->filters = $bag->getAll();
  }

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
  /**
   * Tests the align filter.
   */
  function testAlignFilter() {
    $filter = $this->filters['filter_align'];

    $test = function($input) use ($filter) {
      return $filter->process($input, 'und');
    };

    // No data-align attribute.
    $input = '<img src="llama.jpg" />';
    $expected = $input;
    $this->assertIdentical($expected, $test($input)->getProcessedText());

    // Data-align attribute: all 3 allowed values.
    $input = '<img src="llama.jpg" data-align="left" />';
    $expected = '<img src="llama.jpg" class="align-left" />';
    $this->assertIdentical($expected, $test($input)->getProcessedText());
    $input = '<img src="llama.jpg" data-align="center" />';
    $expected = '<img src="llama.jpg" class="align-center" />';
    $this->assertIdentical($expected, $test($input)->getProcessedText());
    $input = '<img src="llama.jpg" data-align="right" />';
    $expected = '<img src="llama.jpg" class="align-right" />';
    $this->assertIdentical($expected, $test($input)->getProcessedText());

    // Data-align attribute: a disallowed value.
    $input = '<img src="llama.jpg" data-align="left foobar" />';
    $expected = '<img src="llama.jpg" />';
    $this->assertIdentical($expected, $test($input)->getProcessedText());

    // Empty data-align attribute.
    $input = '<img src="llama.jpg" data-align="" />';
    $expected = '<img src="llama.jpg" />';
    $this->assertIdentical($expected, $test($input)->getProcessedText());

    // Ensure the filter also works with uncommon yet valid attribute quoting.
    $input = '<img src=llama.jpg data-align=right />';
    $expected = '<img src="llama.jpg" class="align-right" />';
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());

    // Security test: attempt to inject an additional class.
    $input = '<img src="llama.jpg" data-align="center another-class-here" />';
    $expected = '<img src="llama.jpg" />';
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());

    // Security test: attempt an XSS.
    $input = '<img src="llama.jpg" data-align="center \'onclick=\'alert(foo);" />';
    $expected = '<img src="llama.jpg" />';
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
  }

101 102 103 104
  /**
   * Tests the caption filter.
   */
  function testCaptionFilter() {
105 106
    /** @var \Drupal\Core\Render\RendererInterface $renderer */
    $renderer = \Drupal::service('renderer');
107 108
    $filter = $this->filters['filter_caption'];

109 110 111 112
    $test = function($input) use ($filter, $renderer) {
      return $renderer->executeInRenderContext(new RenderContext(), function () use ($input, $filter) {
        return $filter->process($input, 'und');
      });
113 114
    };

115 116 117 118 119 120
    $attached_library = array(
      'library' => array(
        'filter/caption',
      ),
    );

121
    // No data-caption attribute.
122 123
    $input = '<img src="llama.jpg" />';
    $expected = $input;
124
    $this->assertIdentical($expected, $test($input)->getProcessedText());
125

126
    // Data-caption attribute.
127
    $input = '<img src="llama.jpg" data-caption="Loquacious llama!" />';
128
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
129 130
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
131
    $this->assertIdentical($attached_library, $output->getAttachments());
132 133 134 135

    // Empty data-caption attribute.
    $input = '<img src="llama.jpg" data-caption="" />';
    $expected = '<img src="llama.jpg" />';
136
    $this->assertIdentical($expected, $test($input)->getProcessedText());
137 138 139

    // HTML entities in the caption.
    $input = '<img src="llama.jpg" data-caption="&ldquo;Loquacious llama!&rdquo;" />';
140
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>“Loquacious llama!”</figcaption></figure>';
141 142
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
143
    $this->assertIdentical($attached_library, $output->getAttachments());
144 145 146

    // HTML encoded as HTML entities in data-caption attribute.
    $input = '<img src="llama.jpg" data-caption="&lt;em&gt;Loquacious llama!&lt;/em&gt;" />';
147
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption><em>Loquacious llama!</em></figcaption></figure>';
148 149
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
150
    $this->assertIdentical($attached_library, $output->getAttachments());
151 152 153 154 155

    // HTML (not encoded as HTML entities) in data-caption attribute, which is
    // not allowed by the HTML spec, but may happen when people manually write
    // HTML, so we explicitly support it.
    $input = '<img src="llama.jpg" data-caption="<em>Loquacious llama!</em>" />';
156
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption><em>Loquacious llama!</em></figcaption></figure>';
157 158
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
159
    $this->assertIdentical($attached_library, $output->getAttachments());
160 161 162

    // Security test: attempt an XSS.
    $input = '<img src="llama.jpg" data-caption="<script>alert(\'Loquacious llama!\')</script>" />';
163
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>alert(\'Loquacious llama!\')</figcaption></figure>';
164 165
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
166
    $this->assertIdentical($attached_library, $output->getAttachments());
167

168 169
    // Ensure the filter also works with uncommon yet valid attribute quoting.
    $input = '<img src=llama.jpg data-caption=\'Loquacious llama!\' />';
170
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
171 172
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
173
    $this->assertIdentical($attached_library, $output->getAttachments());
174

175 176
    // Finally, ensure that this also works on any other tag.
    $input = '<video src="llama.jpg" data-caption="Loquacious llama!" />';
177
    $expected = '<figure role="group"><video src="llama.jpg"></video><figcaption>Loquacious llama!</figcaption></figure>';
178 179
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
180
    $this->assertIdentical($attached_library, $output->getAttachments());
181
    $input = '<foobar data-caption="Loquacious llama!">baz</foobar>';
182
    $expected = '<figure role="group"><foobar>baz</foobar><figcaption>Loquacious llama!</figcaption></figure>';
183 184
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
185
    $this->assertIdentical($attached_library, $output->getAttachments());
186 187 188 189 190 191 192

    // So far we've tested that the caption filter works correctly. But we also
    // want to make sure that it works well in tandem with the "Limit allowed
    // HTML tags" filter, which it is typically used with.
    $html_filter = $this->filters['filter_html'];
    $html_filter->setConfiguration(array(
      'settings' => array(
193
        'allowed_html' => '<img src data-align data-caption>',
194 195 196 197
        'filter_html_help' => 1,
        'filter_html_nofollow' => 0,
      )
    ));
198 199 200 201 202 203 204 205
    $test_with_html_filter = function ($input) use ($filter, $html_filter, $renderer) {
      return $renderer->executeInRenderContext(new RenderContext(), function () use ($input, $filter, $html_filter) {
        // 1. Apply HTML filter's processing step.
        $output = $html_filter->process($input, 'und');
        // 2. Apply caption filter's processing step.
        $output = $filter->process($output, 'und');
        return $output->getProcessedText();
      });
206 207 208 209 210 211 212
    };
    // Editor XSS filter.
    $test_editor_xss_filter = function ($input) {
      $dummy_filter_format = FilterFormat::create();
      return Standard::filterXss($input, $dummy_filter_format);
    };

213
    // All the tricky cases encountered at https://www.drupal.org/node/2105841.
214
    // A plain URL preceded by text.
215
    $input = '<img data-caption="See https://www.drupal.org" src="llama.jpg" />';
216
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>See https://www.drupal.org</figcaption></figure>';
217 218 219 220
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // An anchor.
221
    $input = '<img data-caption="This is a &lt;a href=&quot;https://www.drupal.org&quot;&gt;quick&lt;/a&gt; test…" src="llama.jpg" />';
222
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>This is a <a href="https://www.drupal.org">quick</a> test…</figcaption></figure>';
223 224 225 226
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A plain URL surrounded by parentheses.
227
    $input = '<img data-caption="(https://www.drupal.org)" src="llama.jpg" />';
228
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>(https://www.drupal.org)</figcaption></figure>';
229 230 231 232 233
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A source being credited.
    $input = '<img data-caption="Source: Wikipedia" src="llama.jpg" />';
234
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Source: Wikipedia</figcaption></figure>';
235 236 237 238 239
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A source being credited, without a space after the colon.
    $input = '<img data-caption="Source:Wikipedia" src="llama.jpg" />';
240
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Source:Wikipedia</figcaption></figure>';
241 242 243 244 245
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // A pretty crazy edge case where we have two colons.
    $input = '<img data-caption="Interesting (Scope resolution operator ::)" src="llama.jpg" />';
246
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Interesting (Scope resolution operator ::)</figcaption></figure>';
247 248 249 250 251
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $this->assertIdentical($input, $test_editor_xss_filter($input));

    // An evil anchor (to ensure XSS filtering is applied to the caption also).
    $input = '<img data-caption="This is an &lt;a href=&quot;javascript:alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg" />';
252
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>This is an <a href="alert();">evil</a> test…</figcaption></figure>';
253 254 255
    $this->assertIdentical($expected, $test_with_html_filter($input));
    $expected_xss_filtered = '<img data-caption="This is an &lt;a href=&quot;alert();&quot;&gt;evil&lt;/a&gt; test…" src="llama.jpg" />';
    $this->assertIdentical($expected_xss_filtered, $test_editor_xss_filter($input));
256
  }
257

258 259 260 261
  /**
   * Tests the combination of the align and caption filters.
   */
  function testAlignAndCaptionFilters() {
262 263
    /** @var \Drupal\Core\Render\RendererInterface $renderer */
    $renderer = \Drupal::service('renderer');
264 265 266
    $align_filter = $this->filters['filter_align'];
    $caption_filter = $this->filters['filter_caption'];

267 268 269 270
    $test = function($input) use ($align_filter, $caption_filter, $renderer) {
      return $renderer->executeInRenderContext(new RenderContext(), function () use ($input, $align_filter, $caption_filter) {
        return $caption_filter->process($align_filter->process($input, 'und'), 'und');
      });
271 272 273 274 275 276 277
    };

    $attached_library = array(
      'library' => array(
        'filter/caption',
      ),
    );
278

279 280 281
    // Both data-caption and data-align attributes: all 3 allowed values for the
    // data-align attribute.
    $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="left" />';
282
    $expected = '<figure role="group" class="align-left"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
283 284
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
285
    $this->assertIdentical($attached_library, $output->getAttachments());
286
    $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="center" />';
287
    $expected = '<figure role="group" class="align-center"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
288 289
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
290
    $this->assertIdentical($attached_library, $output->getAttachments());
291
    $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="right" />';
292
    $expected = '<figure role="group" class="align-right"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
293 294
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
295
    $this->assertIdentical($attached_library, $output->getAttachments());
296 297 298 299

    // Both data-caption and data-align attributes, but a disallowed data-align
    // attribute value.
    $input = '<img src="llama.jpg" data-caption="Loquacious llama!" data-align="left foobar" />';
300
    $expected = '<figure role="group"><img src="llama.jpg" /><figcaption>Loquacious llama!</figcaption></figure>';
301 302
    $output = $test($input);
    $this->assertIdentical($expected, $output->getProcessedText());
303
    $this->assertIdentical($attached_library, $output->getAttachments());
304 305
  }

306
  /**
307
   * Tests the line break filter.
308 309
   */
  function testLineBreakFilter() {
310 311
    // Get FilterAutoP object.
    $filter = $this->filters['filter_autop'];
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373

    // Since the line break filter naturally needs plenty of newlines in test
    // strings and expectations, we're using "\n" instead of regular newlines
    // here.
    $tests = array(
      // Single line breaks should be changed to <br /> tags, while paragraphs
      // separated with double line breaks should be enclosed with <p></p> tags.
      "aaa\nbbb\n\nccc" => array(
        "<p>aaa<br />\nbbb</p>\n<p>ccc</p>" => TRUE,
      ),
      // Skip contents of certain block tags entirely.
      "<script>aaa\nbbb\n\nccc</script>
<style>aaa\nbbb\n\nccc</style>
<pre>aaa\nbbb\n\nccc</pre>
<object>aaa\nbbb\n\nccc</object>
<iframe>aaa\nbbb\n\nccc</iframe>
" => array(
        "<script>aaa\nbbb\n\nccc</script>" => TRUE,
        "<style>aaa\nbbb\n\nccc</style>" => TRUE,
        "<pre>aaa\nbbb\n\nccc</pre>" => TRUE,
        "<object>aaa\nbbb\n\nccc</object>" => TRUE,
        "<iframe>aaa\nbbb\n\nccc</iframe>" => TRUE,
      ),
      // Skip comments entirely.
      "One. <!-- comment --> Two.\n<!--\nThree.\n-->\n" => array(
        '<!-- comment -->' => TRUE,
        "<!--\nThree.\n-->" => TRUE,
      ),
      // Resulting HTML should produce matching paragraph tags.
      '<p><div>  </div></p>' => array(
        "<p>\n<div>  </div>\n</p>" => TRUE,
      ),
      '<div><p>  </p></div>' => array(
        "<div>\n</div>" => TRUE,
      ),
      '<blockquote><pre>aaa</pre></blockquote>' => array(
        "<blockquote><pre>aaa</pre></blockquote>" => TRUE,
      ),
      "<pre>aaa\nbbb\nccc</pre>\nddd\neee" => array(
        "<pre>aaa\nbbb\nccc</pre>" => TRUE,
        "<p>ddd<br />\neee</p>" => TRUE,
      ),
      // Comments remain unchanged and subsequent lines/paragraphs are
      // transformed normally.
      "aaa<!--comment-->\n\nbbb\n\nccc\n\nddd<!--comment\nwith linebreak-->\n\neee\n\nfff" => array(
        "<p>aaa</p>\n<!--comment--><p>\nbbb</p>\n<p>ccc</p>\n<p>ddd</p>" => TRUE,
        "<!--comment\nwith linebreak--><p>\neee</p>\n<p>fff</p>" => TRUE,
      ),
      // Check that a comment in a PRE will result that the text after
      // the comment, but still in PRE, is not transformed.
      "<pre>aaa\nbbb<!-- comment -->\n\nccc</pre>\nddd" => array(
        "<pre>aaa\nbbb<!-- comment -->\n\nccc</pre>" => TRUE,
      ),
      // Bug 810824, paragraphs were appearing around iframe tags.
      "<iframe>aaa</iframe>\n\n" => array(
        "<p><iframe>aaa</iframe></p>" => FALSE,
      ),
    );
    $this->assertFilteredString($filter, $tests);

    // Very long string hitting PCRE limits.
    $limit = max(ini_get('pcre.backtrack_limit'), ini_get('pcre.recursion_limit'));
374
    $source = $this->randomMachineName($limit);
375
    $result = _filter_autop($source);
376
    $success = $this->assertEqual($result, '<p>' . $source . "</p>\n", 'Line break filter can process very long strings.');
377 378 379 380 381 382 383
    if (!$success) {
      $this->verbose("\n" . $source . "\n<hr />\n" . $result);
    }
  }


  /**
384
   * Tests filter settings, defaults, access restrictions and similar.
385 386 387 388 389 390 391 392 393 394 395 396 397
   *
   * @todo This is for functions like filter_filter and check_markup, whose
   *   functionality is not completely focused on filtering. Some ideas:
   *   restricting formats according to user permissions, proper cache
   *   handling, defaults -- allowed tags/attributes/protocols.
   *
   * @todo It is possible to add script, iframe etc. to allowed tags, but this
   *   makes HTML filter completely ineffective.
   *
   * @todo Class, id, name and xmlns should be added to disallowed attributes,
   *   or better a whitelist approach should be used for that too.
   */
  function testHtmlFilter() {
398 399
    // Get FilterHtml object.
    $filter = $this->filters['filter_html'];
400
    $filter->setConfiguration(array(
401
      'settings' => array(
402
        'allowed_html' => '<a> <p> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>',
403 404 405 406
        'filter_html_help' => 1,
        'filter_html_nofollow' => 0,
      )
    ));
407 408 409

    // HTML filter is not able to secure some tags, these should never be
    // allowed.
410 411
    $f = (string) $filter->process('<script />', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '', 'HTML filter should remove script tags.');
412

413 414
    $f = (string) $filter->process('<iframe />', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '', 'HTML filter should remove iframe tags.');
415

416 417
    $f = (string) $filter->process('<object />', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '', 'HTML filter should remove object tags.');
418

419 420
    $f = (string) $filter->process('<style />', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '', 'HTML filter should remove style tags.');
421 422

    // Some tags make CSRF attacks easier, let the user take the risk herself.
423 424
    $f = (string) $filter->process('<img />', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '', 'HTML filter should remove img tags by default.');
425

426 427
    $f = (string) $filter->process('<input />', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '', 'HTML filter should remove input tags by default.');
428 429 430

    // Filtering content of some attributes is infeasible, these shouldn't be
    // allowed too.
431 432 433
    $f = (string) $filter->process('<p style="display: none;" />', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertNoNormalized($f, 'style', 'HTML filter should remove style attributes.');
    $this->assertIdentical($f, '<p></p>');
434

435 436 437
    $f = (string) $filter->process('<p onerror="alert(0);"></p>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove on* attributes.');
    $this->assertIdentical($f, '<p></p>');
438

439 440 441 442
    $f = (string) $filter->process('<code onerror>&nbsp;</code>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertNoNormalized($f, 'onerror', 'HTML filter should remove empty on* attributes.');
    // Note - this string has a decoded &nbsp; character.
    $this->assertIdentical($f, '<code> </code>');
443

444 445
    $f = (string) $filter->process('<br>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertNormalized($f, '<br />', 'HTML filter should allow line breaks.');
446

447
    $f = (string) $filter->process('<br />', Language::LANGCODE_NOT_SPECIFIED);
448
    $this->assertNormalized($f, '<br />', 'HTML filter should allow self-closing line breaks.');
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481

    // All attributes of whitelisted tags are stripped by default.
    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertNormalized($f, '<a>link</a>', 'HTML filter should remove attributes that are not explicitly allowed.');

    // Now whitelist the "llama" attribute on <a>.
    $filter->setConfiguration(array(
      'settings' => array(
        'allowed_html' => '<a href llama> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>',
        'filter_html_help' => 1,
        'filter_html_nofollow' => 0,
      )
    ));
    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertNormalized($f, '<a llama="awesome">link</a>', 'HTML filter keeps explicitly allowed attributes, and removes attributes that are not explicitly allowed.');

    // Restrict the whitelisted "llama" attribute on <a> to only allow the value
    // "majestical", or "epic".
    $filter->setConfiguration(array(
      'settings' => array(
        'allowed_html' => '<a href llama="majestical epic"> <em> <strong> <cite> <blockquote> <code> <ul> <ol> <li> <dl> <dt> <dd> <br>',
        'filter_html_help' => 1,
        'filter_html_nofollow' => 0,
      )
    ));
    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '<a>link</a>', 'HTML filter removes allowed attributes that do not have an explicitly allowed value.');
    $f = (string) $filter->process('<a kitten="cute" llama="majestical">link</a>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '<a llama="majestical">link</a>', 'HTML filter keeps explicitly allowed attributes with an attribute value that is also explicitly allowed.');
    $f = (string) $filter->process('<a kitten="cute" llama="awesome">link</a>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertNormalized($f, '<a>link</a>', 'HTML filter removes allowed attributes that have a not explicitly allowed value.');
    $f = (string) $filter->process('<a href="/beautiful-animals" kitten="cute" llama="epic majestical">link</a>', Language::LANGCODE_NOT_SPECIFIED);
    $this->assertIdentical($f, '<a href="/beautiful-animals" llama="epic majestical">link</a>', 'HTML filter keeps explicitly allowed attributes with an attribute value that is also explicitly allowed.');
482 483 484
  }

  /**
485
   * Tests the spam deterrent.
486 487
   */
  function testNoFollowFilter() {
488 489
    // Get FilterHtml object.
    $filter = $this->filters['filter_html'];
490
    $filter->setConfiguration(array(
491
      'settings' => array(
492
        'allowed_html' => '<a href>',
493 494 495 496
        'filter_html_help' => 1,
        'filter_html_nofollow' => 1,
      )
    ));
497 498 499

    // Test if the rel="nofollow" attribute is added, even if we try to prevent
    // it.
500
    $f = (string) $filter->process('<a href="http://www.example.com/">text</a>', Language::LANGCODE_NOT_SPECIFIED);
501
    $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent -- no evasion.');
502

503
    $f = (string) $filter->process('<A href="http://www.example.com/">text</a>', Language::LANGCODE_NOT_SPECIFIED);
504
    $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- capital A.');
505

506
    $f = (string) $filter->process("<a/href=\"http://www.example.com/\">text</a>", Language::LANGCODE_NOT_SPECIFIED);
507
    $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- non whitespace character after tag name.');
508

509
    $f = (string) $filter->process("<\0a\0 href=\"http://www.example.com/\">text</a>", Language::LANGCODE_NOT_SPECIFIED);
510
    $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- some nulls.');
511

512
    $f = (string) $filter->process('<a href="http://www.example.com/" rel="follow">text</a>', Language::LANGCODE_NOT_SPECIFIED);
513 514
    $this->assertNoNormalized($f, 'rel="follow"', 'Spam deterrent evasion -- with rel set - rel="follow" removed.');
    $this->assertNormalized($f, 'rel="nofollow"', 'Spam deterrent evasion -- with rel set - rel="nofollow" added.');
515 516 517 518 519 520
  }

  /**
   * Tests the HTML escaping filter.
   */
  function testHtmlEscapeFilter() {
521 522
    // Get FilterHtmlEscape object.
    $filter = $this->filters['filter_html_escape'];
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537

    $tests = array(
      "   One. <!-- \"comment\" --> Two'.\n<p>Three.</p>\n    " => array(
        "One. &lt;!-- &quot;comment&quot; --&gt; Two&#039;.\n&lt;p&gt;Three.&lt;/p&gt;" => TRUE,
        '   One.' => FALSE,
        "</p>\n    " => FALSE,
      ),
    );
    $this->assertFilteredString($filter, $tests);
  }

  /**
   * Tests the URL filter.
   */
  function testUrlFilter() {
538 539
    // Get FilterUrl object.
    $filter = $this->filters['filter_url'];
540
    $filter->setConfiguration(array(
541 542 543 544 545
      'settings' => array(
        'filter_url_length' => 496,
      )
    ));

546 547 548 549
    // @todo Possible categories:
    // - absolute, mail, partial
    // - characters/encoding, surrounding markup, security

550
    // Create a email that is too long.
551 552
    $long_email = str_repeat('a', 254) . '@example.com';
    $too_long_email = str_repeat('b', 255) . '@example.com';
553
    $email_with_plus_sign = 'one+two@example.com';
554 555 556 557 558 559 560 561 562 563 564 565

    // Filter selection/pattern matching.
    $tests = array(
      // HTTP URLs.
      '
http://example.com or www.example.com
' => array(
        '<a href="http://example.com">http://example.com</a>' => TRUE,
        '<a href="http://www.example.com">www.example.com</a>' => TRUE,
      ),
      // MAILTO URLs.
      '
566
person@example.com or mailto:person2@example.com or ' . $email_with_plus_sign . ' or ' . $long_email . ' but not ' . $too_long_email . '
567 568 569 570 571
' => array(
        '<a href="mailto:person@example.com">person@example.com</a>' => TRUE,
        '<a href="mailto:person2@example.com">mailto:person2@example.com</a>' => TRUE,
        '<a href="mailto:' . $long_email . '">' . $long_email . '</a>' => TRUE,
        '<a href="mailto:' . $too_long_email . '">' . $too_long_email . '</a>' => FALSE,
572
        '<a href="mailto:' . $email_with_plus_sign . '">' . $email_with_plus_sign . '</a>' => TRUE,
573 574 575 576 577 578
      ),
      // URI parts and special characters.
      '
http://trailingslash.com/ or www.trailingslash.com/
http://host.com/some/path?query=foo&bar[baz]=beer#fragment or www.host.com/some/path?query=foo&bar[baz]=beer#fragment
http://twitter.com/#!/example/status/22376963142324226
579
http://example.com/@user/
580 581 582 583 584 585 586 587 588
ftp://user:pass@ftp.example.com/~home/dir1
sftp://user@nonstandardport:222/dir
ssh://192.168.0.100/srv/git/drupal.git
' => array(
        '<a href="http://trailingslash.com/">http://trailingslash.com/</a>' => TRUE,
        '<a href="http://www.trailingslash.com/">www.trailingslash.com/</a>' => TRUE,
        '<a href="http://host.com/some/path?query=foo&amp;bar[baz]=beer#fragment">http://host.com/some/path?query=foo&amp;bar[baz]=beer#fragment</a>' => TRUE,
        '<a href="http://www.host.com/some/path?query=foo&amp;bar[baz]=beer#fragment">www.host.com/some/path?query=foo&amp;bar[baz]=beer#fragment</a>' => TRUE,
        '<a href="http://twitter.com/#!/example/status/22376963142324226">http://twitter.com/#!/example/status/22376963142324226</a>' => TRUE,
589
        '<a href="http://example.com/@user/">http://example.com/@user/</a>' => TRUE,
590 591 592 593
        '<a href="ftp://user:pass@ftp.example.com/~home/dir1">ftp://user:pass@ftp.example.com/~home/dir1</a>' => TRUE,
        '<a href="sftp://user@nonstandardport:222/dir">sftp://user@nonstandardport:222/dir</a>' => TRUE,
        '<a href="ssh://192.168.0.100/srv/git/drupal.git">ssh://192.168.0.100/srv/git/drupal.git</a>' => TRUE,
      ),
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
      // International Unicode characters.
      '
http://пример.испытание/
http://مثال.إختبار/
http://例子.測試/
http://12345.中国/
http://例え.テスト/
http://dréißig-bücher.de/
http://méxico-mañana.es/
' => array(
        '<a href="http://пример.испытание/">http://пример.испытание/</a>' => TRUE,
        '<a href="http://مثال.إختبار/">http://مثال.إختبار/</a>' => TRUE,
        '<a href="http://例子.測試/">http://例子.測試/</a>' => TRUE,
        '<a href="http://12345.中国/">http://12345.中国/</a>' => TRUE,
        '<a href="http://例え.テスト/">http://例え.テスト/</a>' => TRUE,
        '<a href="http://dréißig-bücher.de/">http://dréißig-bücher.de/</a>' => TRUE,
        '<a href="http://méxico-mañana.es/">http://méxico-mañana.es/</a>' => TRUE,
      ),
612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
      // Encoding.
      '
http://ampersand.com/?a=1&b=2
http://encoded.com/?a=1&amp;b=2
' => array(
        '<a href="http://ampersand.com/?a=1&amp;b=2">http://ampersand.com/?a=1&amp;b=2</a>' => TRUE,
        '<a href="http://encoded.com/?a=1&amp;b=2">http://encoded.com/?a=1&amp;b=2</a>' => TRUE,
      ),
      // Domain name length.
      '
www.ex.ex or www.example.example or www.toolongdomainexampledomainexampledomainexampledomainexampledomain or
me@me.tv
' => array(
        '<a href="http://www.ex.ex">www.ex.ex</a>' => TRUE,
        '<a href="http://www.example.example">www.example.example</a>' => TRUE,
        'http://www.toolong' => FALSE,
        '<a href="mailto:me@me.tv">me@me.tv</a>' => TRUE,
      ),
      // Absolute URL protocols.
      // The list to test is found in the beginning of _filter_url() at
632
      // $protocols = \Drupal::getContainer()->getParameter('filter_protocols').
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663
      '
https://example.com,
ftp://ftp.example.com,
news://example.net,
telnet://example,
irc://example.host,
ssh://odd.geek,
sftp://secure.host?,
webcal://calendar,
rtsp://127.0.0.1,
not foo://disallowed.com.
' => array(
        'href="https://example.com"' => TRUE,
        'href="ftp://ftp.example.com"' => TRUE,
        'href="news://example.net"' => TRUE,
        'href="telnet://example"' => TRUE,
        'href="irc://example.host"' => TRUE,
        'href="ssh://odd.geek"' => TRUE,
        'href="sftp://secure.host"' => TRUE,
        'href="webcal://calendar"' => TRUE,
        'href="rtsp://127.0.0.1"' => TRUE,
        'href="foo://disallowed.com"' => FALSE,
        'not foo://disallowed.com.' => TRUE,
      ),
    );
    $this->assertFilteredString($filter, $tests);

    // Surrounding text/punctuation.
    $tests = array(
      '
Partial URL with trailing period www.partial.com.
664
Email with trailing comma person@example.com,
665 666 667
Absolute URL with trailing question http://www.absolute.com?
Query string with trailing exclamation www.query.com/index.php?a=!
Partial URL with 3 trailing www.partial.periods...
668
Email with 3 trailing exclamations@example.com!!!
669
Absolute URL and query string with 2 different punctuation characters (http://www.example.com/q=abc).
670
Partial URL with brackets in the URL as well as surrounded brackets (www.foo.com/more_(than)_one_(parens)).
671 672
Absolute URL with square brackets in the URL as well as surrounded brackets [https://www.drupal.org/?class[]=1]
Absolute URL with quotes "https://www.drupal.org/sample"
673

674 675 676 677 678 679 680 681
' => array(
        'period <a href="http://www.partial.com">www.partial.com</a>.' => TRUE,
        'comma <a href="mailto:person@example.com">person@example.com</a>,' => TRUE,
        'question <a href="http://www.absolute.com">http://www.absolute.com</a>?' => TRUE,
        'exclamation <a href="http://www.query.com/index.php?a=">www.query.com/index.php?a=</a>!' => TRUE,
        'trailing <a href="http://www.partial.periods">www.partial.periods</a>...' => TRUE,
        'trailing <a href="mailto:exclamations@example.com">exclamations@example.com</a>!!!' => TRUE,
        'characters (<a href="http://www.example.com/q=abc">http://www.example.com/q=abc</a>).' => TRUE,
682
        'brackets (<a href="http://www.foo.com/more_(than)_one_(parens)">www.foo.com/more_(than)_one_(parens)</a>).' => TRUE,
683 684
        'brackets [<a href="https://www.drupal.org/?class[]=1">https://www.drupal.org/?class[]=1</a>]' => TRUE,
        'quotes "<a href="https://www.drupal.org/sample">https://www.drupal.org/sample</a>"' => TRUE,
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
      ),
      '
(www.parenthesis.com/dir?a=1&b=2#a)
' => array(
        '(<a href="http://www.parenthesis.com/dir?a=1&amp;b=2#a">www.parenthesis.com/dir?a=1&amp;b=2#a</a>)' => TRUE,
      ),
    );
    $this->assertFilteredString($filter, $tests);

    // Surrounding markup.
    $tests = array(
      '
<p xmlns="www.namespace.com" />
<p xmlns="http://namespace.com">
An <a href="http://example.com" title="Read more at www.example.info...">anchor</a>.
</p>
' => array(
        '<p xmlns="www.namespace.com" />' => TRUE,
        '<p xmlns="http://namespace.com">' => TRUE,
        'href="http://www.namespace.com"' => FALSE,
        'href="http://namespace.com"' => FALSE,
        'An <a href="http://example.com" title="Read more at www.example.info...">anchor</a>.' => TRUE,
      ),
      '
Not <a href="foo">www.relative.com</a> or <a href="http://absolute.com">www.absolute.com</a>
but <strong>http://www.strong.net</strong> or <em>www.emphasis.info</em>
' => array(
        '<a href="foo">www.relative.com</a>' => TRUE,
        'href="http://www.relative.com"' => FALSE,
        '<a href="http://absolute.com">www.absolute.com</a>' => TRUE,
        '<strong><a href="http://www.strong.net">http://www.strong.net</a></strong>' => TRUE,
        '<em><a href="http://www.emphasis.info">www.emphasis.info</a></em>' => TRUE,
      ),
      '
Test <code>using www.example.com the code tag</code>.
' => array(
        'href' => FALSE,
        'http' => FALSE,
      ),
      '
Intro.
<blockquote>
Quoted text linking to www.example.com, written by person@example.com, originating from http://origin.example.com. <code>@see www.usage.example.com or <em>www.example.info</em> bla bla</code>.
</blockquote>

Outro.
' => array(
        'href="http://www.example.com"' => TRUE,
        'href="mailto:person@example.com"' => TRUE,
        'href="http://origin.example.com"' => TRUE,
        'http://www.usage.example.com' => FALSE,
        'http://www.example.info' => FALSE,
        'Intro.' => TRUE,
        'Outro.' => TRUE,
      ),
      '
Unknown tag <x>containing x and www.example.com</x>? And a tag <pooh>beginning with p and containing www.example.pooh with p?</pooh>
' => array(
        'href="http://www.example.com"' => TRUE,
        'href="http://www.example.pooh"' => TRUE,
      ),
      '
<p>Test &lt;br/&gt;: This is a www.example17.com example <strong>with</strong> various http://www.example18.com tags. *<br/>
 It is important www.example19.com to *<br/>test different URLs and http://www.example20.com in the same paragraph. *<br>
HTML www.example21.com soup by person@example22.com can litererally http://www.example23.com contain *img*<img> anything. Just a www.example24.com with http://www.example25.com thrown in. www.example26.com from person@example27.com with extra http://www.example28.com.
' => array(
        'href="http://www.example17.com"' => TRUE,
        'href="http://www.example18.com"' => TRUE,
        'href="http://www.example19.com"' => TRUE,
        'href="http://www.example20.com"' => TRUE,
        'href="http://www.example21.com"' => TRUE,
        'href="mailto:person@example22.com"' => TRUE,
        'href="http://www.example23.com"' => TRUE,
        'href="http://www.example24.com"' => TRUE,
        'href="http://www.example25.com"' => TRUE,
        'href="http://www.example26.com"' => TRUE,
        'href="mailto:person@example27.com"' => TRUE,
        'href="http://www.example28.com"' => TRUE,
      ),
      '
<script>
<!--
  // @see www.example.com
  var exampleurl = "http://example.net";
-->
<!--//--><![CDATA[//><!--
  // @see www.example.com
  var exampleurl = "http://example.net";
//--><!]]>
</script>
' => array(
        'href="http://www.example.com"' => FALSE,
        'href="http://example.net"' => FALSE,
      ),
      '
<style>body {
  background: url(http://example.com/pixel.gif);
}</style>
' => array(
        'href' => FALSE,
      ),
      '
<!-- Skip any URLs like www.example.com in comments -->
' => array(
        'href' => FALSE,
      ),
      '
<!-- Skip any URLs like
www.example.com with a newline in comments -->
' => array(
        'href' => FALSE,
      ),
      '
<!-- Skip any URLs like www.comment.com in comments. <p>Also ignore http://commented.out/markup.</p> -->
' => array(
        'href' => FALSE,
      ),
      '
<dl>
<dt>www.example.com</dt>
<dd>http://example.com</dd>
<dd>person@example.com</dd>
<dt>Check www.example.net</dt>
<dd>Some text around http://www.example.info by person@example.info?</dd>
</dl>
' => array(
        'href="http://www.example.com"' => TRUE,
        'href="http://example.com"' => TRUE,
        'href="mailto:person@example.com"' => TRUE,
        'href="http://www.example.net"' => TRUE,
        'href="http://www.example.info"' => TRUE,
        'href="mailto:person@example.info"' => TRUE,
      ),
      '
<div>www.div.com</div>
<ul>
<li>http://listitem.com</li>
<li class="odd">www.class.listitem.com</li>
</ul>
' => array(
        '<div><a href="http://www.div.com">www.div.com</a></div>' => TRUE,
        '<li><a href="http://listitem.com">http://listitem.com</a></li>' => TRUE,
        '<li class="odd"><a href="http://www.class.listitem.com">www.class.listitem.com</a></li>' => TRUE,
      ),
    );
    $this->assertFilteredString($filter, $tests);

    // URL trimming.
833
    $filter->setConfiguration(array(
834 835 836 837
      'settings' => array(
        'filter_url_length' => 20,
      )
    ));
838 839
    $tests = array(
      'www.trimmed.com/d/ff.ext?a=1&b=2#a1' => array(
840
        '<a href="http://www.trimmed.com/d/ff.ext?a=1&amp;b=2#a1">www.trimmed.com/d/f…</a>' => TRUE,
841 842 843 844 845 846 847 848
      ),
    );
    $this->assertFilteredString($filter, $tests);
  }

  /**
   * Asserts multiple filter output expectations for multiple input strings.
   *
849
   * @param FilterInterface $filter
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
   *   A input filter object.
   * @param $tests
   *   An associative array, whereas each key is an arbitrary input string and
   *   each value is again an associative array whose keys are filter output
   *   strings and whose values are Booleans indicating whether the output is
   *   expected or not.
   *
   * For example:
   * @code
   * $tests = array(
   *   'Input string' => array(
   *     '<p>Input string</p>' => TRUE,
   *     'Input string<br' => FALSE,
   *   ),
   * );
   * @endcode
   */
  function assertFilteredString($filter, $tests) {
    foreach ($tests as $source => $tasks) {
869
      $result = $filter->process($source, $filter)->getProcessedText();
870 871 872
      foreach ($tasks as $value => $is_expected) {
        // Not using assertIdentical, since combination with strpos() is hard to grok.
        if ($is_expected) {
873
          $success = $this->assertTrue(strpos($result, $value) !== FALSE, format_string('@source: @value found. Filtered result: @result.', array(
874 875
            '@source' => var_export($source, TRUE),
            '@value' => var_export($value, TRUE),
876
            '@result' => var_export($result, TRUE),
877 878 879
          )));
        }
        else {
880
          $success = $this->assertTrue(strpos($result, $value) === FALSE, format_string('@source: @value not found. Filtered result: @result.', array(
881 882
            '@source' => var_export($source, TRUE),
            '@value' => var_export($value, TRUE),
883
            '@result' => var_export($result, TRUE),
884 885 886
          )));
        }
        if (!$success) {
887 888
          $this->verbose('Source:<pre>' . Html::escape(var_export($source, TRUE)) . '</pre>'
            . '<hr />' . 'Result:<pre>' . Html::escape(var_export($result, TRUE)) . '</pre>'
889
            . '<hr />' . ($is_expected ? 'Expected:' : 'Not expected:')
890
            . '<pre>' . Html::escape(var_export($value, TRUE)) . '</pre>'
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
          );
        }
      }
    }
  }

  /**
   * Tests URL filter on longer content.
   *
   * Filters based on regular expressions should also be tested with a more
   * complex content than just isolated test lines.
   * The most common errors are:
   * - accidental '*' (greedy) match instead of '*?' (minimal) match.
   * - only matching first occurrence instead of all.
   * - newlines not matching '.*'.
   *
   * This test covers:
   * - Document with multiple newlines and paragraphs (two newlines).
   * - Mix of several HTML tags, invalid non-HTML tags, tags to ignore and HTML
   *   comments.
   * - Empty HTML tags (BR, IMG).
912
   * - Mix of absolute and partial URLs, and email addresses in one content.
913 914
   */
  function testUrlFilterContent() {
915 916
    // Get FilterUrl object.
    $filter = $this->filters['filter_url'];
917
    $filter->setConfiguration(array(
918 919 920 921
      'settings' => array(
        'filter_url_length' => 496,
      )
    ));
922 923 924 925 926 927 928 929 930
    $path = drupal_get_path('module', 'filter') . '/tests';

    $input = file_get_contents($path . '/filter.url-input.txt');
    $expected = file_get_contents($path . '/filter.url-output.txt');
    $result = _filter_url($input, $filter);
    $this->assertIdentical($result, $expected, 'Complex HTML document was correctly processed.');
  }

  /**
931
   * Tests the HTML corrector filter.
932 933 934 935 936
   *
   * @todo This test could really use some validity checking function.
   */
  function testHtmlCorrectorFilter() {
    // Tag closing.
937
    $f = Html::normalize('<p>text');
938
    $this->assertEqual($f, '<p>text</p>', 'HTML corrector -- tag closing at the end of input.');
939

940
    $f = Html::normalize('<p>text<p><p>text');
941
    $this->assertEqual($f, '<p>text</p><p></p><p>text</p>', 'HTML corrector -- tag closing.');
942

943
    $f = Html::normalize("<ul><li>e1<li>e2");
944
    $this->assertEqual($f, "<ul><li>e1</li><li>e2</li></ul>", 'HTML corrector -- unclosed list tags.');
945

946
    $f = Html::normalize('<div id="d">content');
947
    $this->assertEqual($f, '<div id="d">content</div>', 'HTML corrector -- unclosed tag with attribute.');
948 949

    // XHTML slash for empty elements.
950
    $f = Html::normalize('<hr><br>');
951
    $this->assertEqual($f, '<hr /><br />', 'HTML corrector -- XHTML closing slash.');
952

953
    $f = Html::normalize('<P>test</P>');
954
    $this->assertEqual($f, '<p>test</p>', 'HTML corrector -- Convert uppercased tags to proper lowercased ones.');
955

956
    $f = Html::normalize('<P>test</p>');
957
    $this->assertEqual($f, '<p>test</p>', 'HTML corrector -- Convert uppercased tags to proper lowercased ones.');
958

959
    $f = Html::normalize('test<hr />');
960
    $this->assertEqual($f, 'test<hr />', 'HTML corrector -- Let proper XHTML pass through.');
961

962
    $f = Html::normalize('test<hr/>');
963
    $this->assertEqual($f, 'test<hr />', 'HTML corrector -- Let proper XHTML pass through, but ensure there is a single space before the closing slash.');
964

965
    $f = Html::normalize('test<hr    />');
966
    $this->assertEqual($f, 'test<hr />', 'HTML corrector -- Let proper XHTML pass through, but ensure there are not too many spaces before the closing slash.');
967

968
    $f = Html::normalize('<span class="test" />');
969
    $this->assertEqual($f, '<span class="test"></span>', 'HTML corrector -- Convert XHTML that is properly formed but that would not be compatible with typical HTML user agents.');
970

971
    $f = Html::normalize('test1<br class="test">test2');
972
    $this->assertEqual($f, 'test1<br class="test" />test2', 'HTML corrector -- Automatically close single tags.');
973

974
    $f = Html::normalize('line1<hr>line2');
975
    $this->assertEqual($f, 'line1<hr />line2', 'HTML corrector -- Automatically close single tags.');
976

977
    $f = Html::normalize('line1<HR>line2');
978
    $this->assertEqual($f, 'line1<hr />line2', 'HTML corrector -- Automatically close single tags.');
979

980
    $f = Html::normalize('<img src="http://example.com/test.jpg">test</img>');
981
    $this->assertEqual($f, '<img src="http://example.com/test.jpg" />test', 'HTML corrector -- Automatically close single tags.');
982

983
    $f = Html::normalize('<br></br>');
984
    $this->assertEqual($f, '<br />', "HTML corrector -- Transform empty tags to a single closed tag if the tag's content model is EMPTY.");
985

986
    $f = Html::normalize('<div></div>');
987
    $this->assertEqual($f, '<div></div>', "HTML corrector -- Do not transform empty tags to a single closed tag if the tag's content model is not EMPTY.");
988

989
    $f = Html::normalize('<p>line1<br/><hr/>line2</p>');
990
    $this->assertEqual($f, '<p>line1<br /></p><hr />line2', 'HTML corrector -- Move non-inline elements outside of inline containers.');
991

992
    $f = Html::normalize('<p>line1<div>line2</div></p>');
993
    $this->assertEqual($f, '<p>line1</p><div>line2</div>', 'HTML corrector -- Move non-inline elements outside of inline containers.');
994

995
    $f = Html::normalize('<p>test<p>test</p>\n');
996
    $this->assertEqual($f, '<p>test</p><p>test</p>\n', 'HTML corrector -- Auto-close improperly nested tags.');
997

998
    $f = Html::normalize('<p>Line1<br><STRONG>bold stuff</b>');
999
    $this->assertEqual($f, '<p>Line1<br /><strong>bold stuff</strong></p>', 'HTML corrector -- Properly close unclosed tags, and remove useless closing tags.');
1000

1001
    $f = Html::normalize('test <!-- this is a comment -->');
1002
    $this->assertEqual($f, 'test <!-- this is a comment -->', 'HTML corrector -- Do not touch HTML comments.');
1003

1004
    $f = Html::normalize('test <!--this is a comment-->');
1005
    $this->assertEqual($f, 'test <!--this is a comment-->', 'HTML corrector -- Do not touch HTML comments.');
1006

1007
    $f = Html::normalize('test <!-- comment <p>another
1008 1009 1010 1011
    <strong>multiple</strong> line
    comment</p> -->');
    $this->assertEqual($f, 'test <!-- comment <p>another
    <strong>multiple</strong> line
1012
    comment</p> -->', 'HTML corrector -- Do not touch HTML comments.');
1013

1014
    $f = Html::normalize('test <!-- comment <p>another comment</p> -->');
1015
    $this->assertEqual($f, 'test <!-- comment <p>another comment</p> -->', 'HTML corrector -- Do not touch HTML comments.');
1016

1017
    $f = Html::normalize('test <!--break-->');
1018
    $this->assertEqual($f, 'test <!--break-->', 'HTML corrector -- Do not touch HTML comments.');
1019

1020
    $f = Html::normalize('<p>test\n</p>\n');
1021
    $this->assertEqual($f, '<p>test\n</p>\n', 'HTML corrector -- New-lines are accepted and kept as-is.');
1022

1023
    $f = Html::normalize('<p>دروبال');
1024
    $this->assertEqual($f, '<p>دروبال</p>', 'HTML corrector -- Encoding is correctly kept.');
1025

1026
    $f = Html::normalize('<script>alert("test")</script>');
1027
    $this->assertEqual($f, '<script>
1028 1029 1030
<!--//--><![CDATA[// ><!--
alert("test")
//--><!]]>
1031
</script>', 'HTML corrector -- CDATA added to script element');
1032

1033
    $f = Html::normalize('<p><script>alert("test")</script></p>');
1034
    $this->assertEqual($f, '<p><script>
1035 1036 1037
<!--//--><![CDATA[// ><!--
alert("test")
//--><!]]>
1038
</script></p>', 'HTML corrector -- CDATA added to a nested script element');
1039

1040
    $f = Html::normalize('<p><style> /* Styling */ body {color:red}</style></p>');
1041 1042 1043 1044
    $this->assertEqual($f, '<p><style>
<!--/*--><![CDATA[/* ><!--*/
 /* Styling */ body {color:red}
/*--><!]]>*/
1045
</style></p>', 'HTML corrector -- CDATA added to a style element.');
1046

1047
    $filtered_data = Html::normalize('<p><style>
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
/*<![CDATA[*/
/* Styling */
body {color:red}
/*]]>*/
</style></p>');
    $this->assertEqual($filtered_data, '<p><style>
<!--/*--><![CDATA[/* ><!--*/

/*<![CDATA[*/
/* Styling */
body {color:red}
/*]]]]><![CDATA[>*/

/*--><!]]>*/
</style></p>',
1063
      format_string('HTML corrector -- Existing cdata section @pattern_name properly escaped', array('@pattern_name' => '/*<![CDATA[*/'))
1064 1065
    );

1066
    $filtered_data = Html::normalize('<p><style>
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081
  <!--/*--><![CDATA[/* ><!--*/
  /* Styling */
  body {color:red}
  /*--><!]]>*/
</style></p>');
    $this->assertEqual($filtered_data, '<p><style>
<!--/*--><![CDATA[/* ><!--*/

  <!--/*--><![CDATA[/* ><!--*/
  /* Styling */
  body {color:red}
  /*--><!]]]]><![CDATA[>*/

/*--><!]]>*/
</style></p>',
1082
      format_string('HTML corrector -- Existing cdata section @pattern_name properly escaped', array('@pattern_name' => '<!--/*--><![CDATA[/* ><!--*/'))
1083 1084
    );

1085
    $filtered_data = Html::normalize('<p><script>
1086 1087 1088 1089
<!--//--><![CDATA[// ><!--
  alert("test");
//--><!]]>
</script></p>');
1090
    $this->assertEqual($filtered_data, '<p><script>
1091 1092 1093 1094 1095 1096 1097 1098
<!--//--><![CDATA[// ><!--

<!--//--><![CDATA[// ><!--
  alert("test");
//--><!]]]]><![CDATA[>

//--><!]]>
</script></p>',
1099
      format_string('HTML corrector -- Existing cdata section @pattern_name properly escaped', array('@pattern_name' => '<!--//--><![CDATA[// ><!--'))
1100 1101
    );

1102
    $filtered_data = Html::normalize('<p><script>
1103 1104 1105 1106
// <![CDATA[
  alert("test");
// ]]>
</script></p>');
1107
    $this->assertEqual($filtered_data, '<p><script>
1108 1109 1110 1111 1112 1113 1114 1115
<!--//--><![CDATA[// ><!--

// <![CDATA[
  alert("test");
// ]]]]><![CDATA[>

//--><!]]>
</script></p>',
1116
      format_string('HTML corrector -- Existing cdata section @pattern_name properly escaped', array('@pattern_name' => '// <![CDATA['))
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132