diff --git a/custom_elements.services.yml b/custom_elements.services.yml index 8d4369172712b1ecd293553d24dbdc65b0f3bccb..b72094f22e1024efe9c6bb1cdfd2b67e6b6181c0 100644 --- a/custom_elements.services.yml +++ b/custom_elements.services.yml @@ -39,3 +39,5 @@ services: class: Drupal\custom_elements\Processor\TextFieldItemProcessor tags: - { name: custom_elements_processor, priority: -50 } + custom_elements.html_parser: + class: Drupal\custom_elements\HtmlParser diff --git a/src/CustomElementNormalizer.php b/src/CustomElementNormalizer.php index 219fa5b25832b4d120eb84593ab41c823678d723..7666005a96877dbf208cdb472355582ecfee4cce 100644 --- a/src/CustomElementNormalizer.php +++ b/src/CustomElementNormalizer.php @@ -7,21 +7,36 @@ use Drupal\Core\Render\BubbleableMetadata; use Symfony\Component\Serializer\Normalizer\NormalizerInterface; /** - * Formats a custom element structure into an array. + * Formats a custom element structure into an array or HTML string. */ class CustomElementNormalizer implements NormalizerInterface { + /** + * List of boolean attributes that do not need a value. + * + * @var array + */ + protected const BOOLEAN_HTML_ATTRIBUTES = [ + 'checked', 'selected', 'disabled', 'readonly', + 'multiple', 'required', 'autofocus', 'formnovalidate', 'novalidate', + ]; + /** * {@inheritdoc} */ public function normalize(mixed $object, ?string $format = NULL, array $context = []): array|string|int|float|bool|\ArrayObject|null { $cache_metadata = $context['cache_metadata'] ?? new BubbleableMetadata(); + + if ($format === 'html') { + return $this->normalizeToHtml($object, $cache_metadata); + } + $result = $this->normalizeCustomElement($object, $cache_metadata); // By default, convert keys in the outer result array to be valid JS // identifiers. (Actually, // https://vuejs.org/guide/components/registration.html indicates that // PascalCase names, not camelCase, are valid identifiers - but camelCase - // was used since the noram was introduced in v2.) 'key_casing' context + // was used since the norm was introduced in v2.) 'key_casing' context // parameter can override this. if (!isset($context['key_casing']) || $context['key_casing'] !== 'ignore') { $result = $this->convertKeysToCamelCase($result); @@ -47,11 +62,11 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized custom element. */ - protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata) { + protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata): array { $result = ['element' => $element->getPrefixedTag()]; $result = array_merge($result, $this->normalizeAttributes($element->getAttributes(), $cache_metadata)); - // Remove dumb default html wrapping elements. + // Remove dumb default HTML wrapping elements. if ($result['element'] == 'div' || $result['element'] == 'span') { unset($result['element']); } @@ -76,7 +91,7 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized custom element attributes. */ - protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata) { + protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata): array { $result = []; foreach ($attributes as $key => $value) { if ($key == 'slot') { @@ -89,7 +104,7 @@ class CustomElementNormalizer implements NormalizerInterface { } /** - * Normalize slots. + * Normalize slots for non-HTML format. * * @param \Drupal\custom_elements\CustomElement $element * The element for which to normalize slots. @@ -99,7 +114,7 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized slots. */ - protected function normalizeSlots(CustomElement $element, BubbleableMetadata $cache_metadata) { + protected function normalizeSlots(CustomElement $element, BubbleableMetadata $cache_metadata): array { $data = []; foreach ($element->getSortedSlotsByName() as $slot_key => $slot_entries) { $slot_data = []; @@ -138,7 +153,7 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Converted keys. */ - protected function convertKeysToCamelCase(array $array) { + protected function convertKeysToCamelCase(array $array): array { $keys = array_map(function ($key) use (&$array) { if (is_array($array[$key])) { $array[$key] = $this->convertKeysToCamelCase($array[$key]); @@ -160,4 +175,134 @@ class CustomElementNormalizer implements NormalizerInterface { ]; } + /** + * Normalize custom element to HTML string. + * + * @param \Drupal\custom_elements\CustomElement $element + * The custom element. + * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata + * The cache metadata. + * + * @return string + * HTML string representation of the custom element. + */ + protected function normalizeToHtml(CustomElement $element, BubbleableMetadata $cache_metadata): string { + $tag = $element->getPrefixedTag(); + $attributes = $this->normalizeAttributes($element->getAttributes(), $cache_metadata); + $slots = $this->normalizeSlotsHtml($element, $cache_metadata); + + if ($tag === 'text') { + // We assume there's only content for a text element. + return implode('', $slots); + } + + $attributeString = $this->normalizeAttributesHtml($attributes); + $content = implode('', $slots); + + // Define self-closing tags. + $selfClosingTags = CustomElement::getNoEndTags(); + + // Check if the tag is self-closing. + if (in_array(strtolower($tag), $selfClosingTags)) { + return "<{$tag}{$attributeString} />"; + } + + return "<{$tag}{$attributeString}>{$content}</{$tag}>"; + } + + /** + * Convert attributes array to string format for HTML. + * + * @param array $attributes + * The attributes array. + * + * @return string + * Attributes as a string suitable for HTML. + */ + protected function normalizeAttributesHtml(array $attributes): string { + $attributeString = ''; + foreach ($attributes as $key => $value) { + if ($key == 'slot') { + continue; + } + + if (in_array(strtolower($key), self::BOOLEAN_HTML_ATTRIBUTES)) { + // For boolean attributes, if the value is truthy (not 'false' or 0), + // we just add the attribute name. + if ($value !== FALSE && $value !== 'false' && $value !== 0) { + $attributeString .= " {$key}"; + } + // If the value is explicitly false or 0, we skip adding this attribute. + } + else { + // For non-boolean attributes, proceed with key-value. + $attributeString .= " {$key}=\"" . htmlspecialchars($value) . "\""; + } + } + return $attributeString; + } + + /** + * Normalize slots to HTML strings. + * + * @param \Drupal\custom_elements\CustomElement $element + * The element for which to normalize slots. + * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata + * The cache metadata. + * + * @return array + * An array of HTML strings for each slot. + */ + protected function normalizeSlotsHtml(CustomElement $element, BubbleableMetadata $cache_metadata): array { + $html = []; + foreach ($element->getSortedSlotsByName() as $slot_key => $slot_entries) { + foreach ($slot_entries as $slot) { + if (isset($slot['content'])) { + // Treat content as an array for consistent processing. + $contentItems = is_array($slot['content']) ? $slot['content'] : [$slot['content']]; + foreach ($contentItems as $item) { + if ($item instanceof CustomElement) { + $html[] = $this->normalizeToHtml($item, $cache_metadata); + } + elseif ($item instanceof MarkupInterface) { + $html[] = (string) $item; + } + elseif (is_array($item)) { + // Recursively process nested arrays. + $html = array_merge($html, $this->normalizeSlotsHtmlItem($item, $cache_metadata)); + } + else { + // Handle scalars (strings, numbers, etc.). + $html[] = htmlspecialchars((string) $item); + } + } + } + } + } + return $html; + } + + /** + * Helper to process nested arrays in slot content. + */ + private function normalizeSlotsHtmlItem($item, BubbleableMetadata $cache_metadata): array { + $result = []; + if (is_array($item)) { + foreach ($item as $subItem) { + $result = array_merge($result, $this->normalizeSlotsHtmlItem($subItem, $cache_metadata)); + } + } + else { + if ($item instanceof CustomElement) { + $result[] = $this->normalizeToHtml($item, $cache_metadata); + } + elseif ($item instanceof MarkupInterface) { + $result[] = (string) $item; + } + else { + $result[] = htmlspecialchars((string) $item); + } + } + return $result; + } } diff --git a/src/HtmlParser.php b/src/HtmlParser.php new file mode 100644 index 0000000000000000000000000000000000000000..f9c1825091d1f251339e2a7064123ea47dd1d0e3 --- /dev/null +++ b/src/HtmlParser.php @@ -0,0 +1,88 @@ +<?php + +namespace Drupal\custom_elements; + +use Drupal\custom_elements\CustomElement; +use Masterminds\HTML5; + +/** + * Class HtmlParser. + * + * Converts HTML content into a tree of custom elements. + * This class skips the top-level <html> element if present. + */ +class HtmlParser { + + /** + * The HTML5 parser for processing HTML content. + * + * @var \Masterminds\HTML5 + */ + protected HTML5 $html5Parser; + + /** + * Constructs a new HtmlParser object. + */ + public function __construct() { + $this->html5Parser = new HTML5(); + } + + /** + * Converts HTML content to a CustomElement tree structure. + * + * @param string $htmlContent + * The HTML content to convert. + * + * @return \Drupal\custom_elements\CustomElement|null + * Returns the root CustomElement or NULL if conversion fails. + */ + public function convertHtmlToCustomElement(string $htmlContent): ?CustomElement { + $dom = $this->html5Parser->loadHTML($htmlContent); + return $this->convertNode($dom->documentElement); + } + + /** + * Recursively converts a DOM node to a CustomElement. + * + * @param \DOMNode $node + * The DOM node to convert. + * + * @return \Drupal\custom_elements\CustomElement|null + * Returns CustomElement or NULL if the node should be skipped. + */ + protected function convertNode(\DOMNode $node): ?CustomElement { + if ($node->nodeType === XML_TEXT_NODE) { + $emptyText = trim(preg_replace('/\s+/', ' ', $node->nodeValue)); + if (!empty($emptyText)) { + $customElement = CustomElement::create('text'); + $customElement->addSlot('text', $node->nodeValue); + return $customElement; + } + // Skip empty text nodes. + return NULL; + } + + if ($node->nodeType !== XML_ELEMENT_NODE) { + // Skip any non-element nodes other than text. + return NULL; + } + + $tagName = $node->nodeName; + $element = CustomElement::create($tagName); + + // Add attributes. + foreach ($node->attributes as $attr) { + $element->setAttribute($attr->nodeName, $attr->nodeValue); + } + + foreach ($node->childNodes as $childNode) { + $childElement = $this->convertNode($childNode); + if ($childElement) { + // Ensure all children are added to 'content' slot for consistency. + $element->addSlot('content', $childElement); + } + } + + return $element; + } +} diff --git a/tests/src/Kernel/HtmlParserTest.php b/tests/src/Kernel/HtmlParserTest.php new file mode 100644 index 0000000000000000000000000000000000000000..2ee7adc6c72c6aa09e9ce91087d556ea69ad4d32 --- /dev/null +++ b/tests/src/Kernel/HtmlParserTest.php @@ -0,0 +1,80 @@ +<?php + +namespace Drupal\Tests\custom_elements\Kernel; + +use Drupal\KernelTests\KernelTestBase; + +/** + * Tests the HtmlParserTest class. + * + * @group custom_elements + */ +class HtmlParserTest extends KernelTestBase { + + /** + * The HtmlToCustomElement service. + * + * @var \Drupal\custom_elements\HtmlParser + */ + protected $htmlParser; + + /** + * The CustomElementNormalizer service. + * + * @var \Drupal\custom_elements\CustomElementNormalizer + */ + protected $normalizer; + + /** + * {@inheritdoc} + */ + protected static $modules = ['custom_elements']; + + /** + * {@inheritdoc} + */ + protected function setUp(): void { + parent::setUp(); + $this->htmlParser = $this->container->get('custom_elements.html_parser'); + $this->normalizer = $this->container->get('custom_elements.normalizer'); + } + + /** + * Tests converting HTML content to custom elements and back. + */ + public function testHtmlToCustomElementAndBack() { + // Load HTML content from a fixture file for better readability. + $htmlFilePath = __DIR__ . '/fixtures/testMarkup.html'; + $html = file_get_contents($htmlFilePath); + + // Convert HTML to custom elements. + $customElement = $this->htmlParser->convertHtmlToCustomElement($html); + + // Normalize custom elements back to HTML. + $normalizedHtml = $this->normalizer->normalize($customElement, 'html'); + + // Assert that the normalized HTML matches the original HTML. + // Html tags are added as a root element by htmlParser conversion. + $this->assertHtmlEquals('<html>' . $html . '</html>', $normalizedHtml); + } + + /** + * Asserts two HTML strings are equivalent after normalization. + */ + protected function assertHtmlEquals(string $expected, string $actual, string $message = ''): void { + $this->assertSame( + $this->normalizeHtmlWhitespace($expected), + $this->normalizeHtmlWhitespace($actual), + $message ?: 'Rendered HTML should match expected output' + ); + } + + /** + * Normalizes HTML whitespace for consistent comparisons. + */ + protected function normalizeHtmlWhitespace(string $html): string { + $html = preg_replace("/ *\n */m", "", $html); + return preg_replace("/> +</", "><", $html); + } + +} diff --git a/tests/src/Kernel/fixtures/testMarkup.html b/tests/src/Kernel/fixtures/testMarkup.html new file mode 100644 index 0000000000000000000000000000000000000000..c811075b794c48c7d7c481567d61a7be8a4e6b05 --- /dev/null +++ b/tests/src/Kernel/fixtures/testMarkup.html @@ -0,0 +1,146 @@ +<body> + <header> + <h1>Comprehensive HTML Example</h1> + <nav> + <ul> + <li><a href="#section1">Section 1</a></li> + <li><a href="#section2">Section 2</a></li> + <li><a href="#section3">Section 3</a></li> + </ul> + </nav> + </header> + <main> + <article> + <header> + <h2>Article Header</h2> + </header> + <p>This is a paragraph within an article. Here is some <strong>strong text</strong> and <em>emphasized + text</em>with an <a href="#">example link</a>.</p> + <figure> + <img src="example.jpg" alt="Example Image" /> + <figcaption>Figure caption for the example image.</figcaption> + </figure> + <section id="section1"> + <header> + <h3>Section 1: Content and Lists</h3> + </header> + <p>Content in Section 1 includes various elements:</p> + <ul> + <li>Unordered list item 1</li> + <li>Unordered list item 2</li> + <li>Unordered list item 3</li> + </ul> + <ol> + <li>Ordered list item 1</li> + <li>Ordered list item 2</li> + <li>Ordered list item 3</li> + </ol> + <blockquote cite="https://example.com"> + <p>This is a blockquote with a citation.</p> + </blockquote> + <aside> + <p>This aside provides additional context or a sidebar note within Section 1.</p> + </aside> + </section> + <section id="section2"> + <header> + <h3>Section 2: Multimedia and Forms</h3> + </header> + <p>Section 2 includes multimedia content and a sample form.</p> + <video width="320" height="240" controls=""> + <source src="movie.mp4" type="video/mp4" /> + Your browser does not support the video tag. + </video> + <audio controls=""> + <source src="audio.mp3" type="audio/mpeg" /> + Your browser does not support the audio element. + </audio> + <canvas id="myCanvas" width="200" height="100" style="border:1px solid #000;"></canvas> + <form action="#" method="post"> + <fieldset> + <legend>Contact Form</legend> + <label for="name">Name:</label> + <input type="text" id="name" name="name" placeholder="Your Name" /><br /> + <label for="email">Email:</label> + <input type="email" id="email" name="email" placeholder="you@example.com" /><br /> + <label for="message">Message:</label> + <textarea id="message" name="message" rows="4" cols="50">Enter your message here...</textarea><br /> + <button type="submit">Submit</button> + </fieldset> + </form> + </section> + <section id="section3"> + <header> + <h3>Section 3: Miscellaneous Elements</h3> + </header> + <p>This section demonstrates a variety of HTML elements:</p> + <dl> + <dt>Definition Term 1</dt> + <dd>Definition for term 1.</dd> + <dt>Definition Term 2</dt> + <dd>Definition for term 2.</dd> + </dl> + <table border="1"> + <caption>Sample Data Table</caption> + <thead> + <tr> + <th>Header 1</th> + <th>Header 2</th> + <th>Header 3</th> + </tr> + </thead> + <tbody> + <tr> + <td>Data 1</td> + <td>Data 2</td> + <td>Data 3</td> + </tr> + <tr> + <td>Data A</td> + <td>Data B</td> + <td>Data C</td> + </tr> + </tbody> + <tfoot> + <tr> + <td colspan="3">Footer information</td> + </tr> + </tfoot> + </table> + <details> + <summary>Additional Details</summary> + <p>Here is some extra information that can be toggled.</p> + </details> + <mark>Highlighted text</mark> + <time datetime="2025-02-11">February 11, 2025</time> + <progress value="70" max="100">70%</progress> + <meter value="0.7">70%</meter> + <output id="result">Result output here</output> + <code>console.log('Inline code snippet');</code> + <var>variableName</var> + <samp>Sample output text</samp> + </section> + </article> + <aside> + <h2>Sidebar</h2> + <p>This sidebar contains additional navigation and context.</p> + <nav> + <ul> + <li><a href="#">Sidebar Link 1</a></li> + <li><a href="#">Sidebar Link 2</a></li> + <li><a href="#">Sidebar Link 3</a></li> + </ul> + </nav> + </aside> + </main> + <footer> + <p>2025 Comprehensive HTML Example. All rights reserved.</p> + <address>Contact us: <a href="mailto:info@example.com">info@example.com</a></address> + </footer> + <template id="myTemplate"> + <div>This is template content that is not rendered until instantiated.</div> + </template> + <noscript> + <p>Your browser does not support JavaScript or it is disabled.</p> + </noscript> +</body> \ No newline at end of file