diff --git a/src/Plugin/migrate/process/Dom.php b/src/Plugin/migrate/process/Dom.php index ca538a61b11f64f87cc901a5b956fad71aa3fee4..aa3051d887b301bab6b2c6ac68214b295f7bc0f0 100644 --- a/src/Plugin/migrate/process/Dom.php +++ b/src/Plugin/migrate/process/Dom.php @@ -10,6 +10,7 @@ use Drupal\migrate\MigrateExecutableInterface; use Drupal\migrate\Plugin\MigrationInterface; use Drupal\migrate\ProcessPluginBase; use Drupal\migrate\Row; +use Masterminds\HTML5; /** * Handles string to DOM and back conversions. @@ -30,6 +31,9 @@ use Drupal\migrate\Row; * declaration. Defaults to '1.0'. * - encoding: (optional) The encoding of the document as part of the XML * declaration. Defaults to 'UTF-8'. + * - import_method: (optional) What parser to use. Possible values: + * - 'html': (default) use dom extension parsing. + * - 'html5': use html5 parsing. * * @codingStandardsIgnoreStart * @@ -95,6 +99,10 @@ class Dom extends ProcessPluginBase { if (!in_array($configuration['method'], ['import', 'export'])) { throw new \InvalidArgumentException('The "method" must be "import" or "export".'); } + $configuration['import_method'] = $configuration['import_method'] ?? 'html'; + if (!in_array($configuration['import_method'], ['html', 'html5'])) { + throw new \InvalidArgumentException('The "import_method" must be "html" or "html5".'); + } parent::__construct($configuration, $plugin_id, $plugin_definition); $this->configuration += $this->defaultValues(); $this->logMessages = (bool) $this->configuration['log_messages']; @@ -159,7 +167,19 @@ class Dom extends ProcessPluginBase { } $document = new \DOMDocument($this->configuration['version'], $this->configuration['encoding']); - $document->loadHTML($html); + switch ($this->configuration['import_method']) { + case 'html5': + $html5 = new HTML5([ + 'target_document' => $document, + 'disable_html_ns' => TRUE, + ]); + $html5->loadHTML($html); + break; + + case 'html': + default: + $document->loadHTML($html); + } if ($this->logMessages) { restore_error_handler(); diff --git a/tests/src/Unit/process/DomTest.php b/tests/src/Unit/process/DomTest.php index d87be09373c8cb00f7221c08cf4fd6d198b4e2bd..6fb9f38389c5824c4189a8898424cc7090463492 100644 --- a/tests/src/Unit/process/DomTest.php +++ b/tests/src/Unit/process/DomTest.php @@ -42,6 +42,17 @@ final class DomTest extends MigrateProcessTestCase { ->transform($value, $this->migrateExecutable, $this->row, 'destinationproperty'); } + /** + * @covers ::__construct + */ + public function testInvalidImportMethod() { + $configuration['method'] = 'import'; + $configuration['import_method'] = 'invalid'; + $this->expectException(\InvalidArgumentException::class); + $this->expectExceptionMessage('The "import_method" must be "html" or "html5".'); + (new Dom($configuration, 'dom', [])); + } + /** * @covers ::import */ @@ -54,6 +65,18 @@ final class DomTest extends MigrateProcessTestCase { $this->assertTrue($document instanceof \DOMDocument); } + /** + * @covers ::import + */ + public function testImportMethodHtml5(): void { + $configuration['method'] = 'import'; + $configuration['import_method'] = 'html5'; + $value = '<p>A simple paragraph.</p>'; + $document = (new Dom($configuration, 'dom', [])) + ->transform($value, $this->migrateExecutable, $this->row, 'destinationproperty'); + $this->assertTrue($document instanceof \DOMDocument); + } + /** * @covers ::import */