From d1ce1ea26dcbf9e6077f4150847b28d73b32a5f5 Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Mon, 20 Jan 2025 21:54:34 +0100 Subject: [PATCH 1/8] Add html to custom element Sanitizer and custom element to html normalizer --- src/CustomElementNormalizer.php | 179 ++++++++++++++++++++++++++++---- src/HtmlToCustomElement.php | 134 ++++++++++++++++++++++++ 2 files changed, 294 insertions(+), 19 deletions(-) create mode 100644 src/HtmlToCustomElement.php diff --git a/src/CustomElementNormalizer.php b/src/CustomElementNormalizer.php index 219fa5b..bebfb8d 100644 --- a/src/CustomElementNormalizer.php +++ b/src/CustomElementNormalizer.php @@ -7,7 +7,7 @@ use Drupal\Core\Render\BubbleableMetadata; use Symfony\Component\Serializer\Normalizer\NormalizerInterface; /** - * Formats a custom element structure into an array. + * Formats a custom element structure into an array or HTML string. */ class CustomElementNormalizer implements NormalizerInterface { @@ -16,6 +16,11 @@ class CustomElementNormalizer implements NormalizerInterface { */ public function normalize(mixed $object, ?string $format = NULL, array $context = []): array|string|int|float|bool|\ArrayObject|null { $cache_metadata = $context['cache_metadata'] ?? new BubbleableMetadata(); + + if ($format === 'html') { + return $this->normalizeToHtml($object, $cache_metadata); + } + $result = $this->normalizeCustomElement($object, $cache_metadata); // By default, convert keys in the outer result array to be valid JS // identifiers. (Actually, @@ -37,32 +42,75 @@ class CustomElementNormalizer implements NormalizerInterface { } /** - * Normalize custom element. + * Normalize custom element to HTML string. * * @param \Drupal\custom_elements\CustomElement $element * The custom element. * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata * The cache metadata. * - * @return array - * Normalized custom element. + * @return string + * HTML string representation of the custom element. */ - protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata) { - $result = ['element' => $element->getPrefixedTag()]; - $result = array_merge($result, $this->normalizeAttributes($element->getAttributes(), $cache_metadata)); + protected function normalizeToHtml(CustomElement $element, BubbleableMetadata $cache_metadata): string { + $tag = $element->getPrefixedTag(); + $attributes = $this->normalizeAttributes($element->getAttributes(), $cache_metadata); + $slots = $this->normalizeSlotsHtml($element, $cache_metadata); - // Remove dumb default html wrapping elements. - if ($result['element'] == 'div' || $result['element'] == 'span') { - unset($result['element']); + if ($tag === 'text') { + // We assume there's only content for a text element. + return implode('', $slots); } - // Collect cache metadata. Since the cache metadata object is passed down - // to slots, custom elements of slots will add their metadata as well. - $cache_metadata->addCacheableDependency($element); + $attributeString = $this->attributesToString($attributes); + $content = implode('', $slots); - $normalized_slots = $this->normalizeSlots($element, $cache_metadata); - $result = array_merge($result, $normalized_slots); - return $result; + // Define self-closing tags. + $selfClosingTags = CustomElement::getNoEndTags(); + + // Check if the tag is self-closing. + if (in_array(strtolower($tag), $selfClosingTags)) { + return "<{$tag}{$attributeString} />"; + } + + return "<{$tag}{$attributeString}>{$content}</{$tag}>"; + } + + /** + * Convert attributes array to string format for HTML. + * + * @param array $attributes + * The attributes array. + * + * @return string + * Attributes as a string suitable for HTML. + */ + protected function attributesToString(array $attributes): string { + $attributeString = ''; + foreach ($attributes as $key => $value) { + if ($key == 'slot') { + continue; + } + + // List of boolean attributes that do not need a value. + $booleanAttributes = ['checked', 'selected', 'disabled', 'readonly', + 'multiple', 'required', 'autofocus', 'formnovalidate', 'novalidate', + ]; + + if (in_array(strtolower($key), $booleanAttributes)) { + // For boolean attributes, if the value is truthy (not 'false' or 0), + // we just add the attribute name. + if ($value !== FALSE && $value !== 'false' && $value !== 0) { + $attributeString .= " {$key}"; + } + // If the value is explicitly false or 0, we skip adding this attribute. + } + else { + // For non-boolean attributes, proceed with key-value. + $attributeString .= " {$key}=\"" . htmlspecialchars($value) . "\""; + } + } + return $attributeString; } /** @@ -76,12 +124,13 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized custom element attributes. */ - protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata) { + protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata): array { $result = []; foreach ($attributes as $key => $value) { if ($key == 'slot') { continue; } + // Remove leading colon from keys. $result_key = strpos($key, ':') === 0 ? substr($key, 1) : $key; $result[$result_key] = $value; } @@ -89,7 +138,75 @@ class CustomElementNormalizer implements NormalizerInterface { } /** - * Normalize slots. + * Normalize slots to HTML strings. + * + * @param \Drupal\custom_elements\CustomElement $element + * The element for which to normalize slots. + * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata + * The cache metadata. + * + * @return array + * An array of HTML strings for each slot. + */ + protected function normalizeSlotsHtml(CustomElement $element, BubbleableMetadata $cache_metadata): array { + $html = []; + foreach ($element->getSortedSlotsByName() as $slot_key => $slot_entries) { + foreach ($slot_entries as $slot) { + if (isset($slot['content'])) { + if ($slot['content'] instanceof CustomElement) { + // Recursively normalize nested custom elements. + $html[] = $this->normalizeToHtml($slot['content'], $cache_metadata); + } + elseif ($slot['content'] instanceof MarkupInterface) { + // If it's MarkupInterface, directly convert to string. + $html[] = (string) $slot['content']; + } + elseif (is_string($slot['content'])) { + // Handle plain text content. + // Escape for security. + $html[] = htmlspecialchars($slot['content']); + } + else { + // If content is an array, + // we assume it's a structure that needs to be stringified. + $html[] = $this->stringifyArray($slot['content']); + } + } + } + } + return $html; + } + + /** + * Normalize custom element. + * + * @param \Drupal\custom_elements\CustomElement $element + * The custom element. + * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata + * The cache metadata. + * + * @return array + * Normalized custom element. + */ + protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata) { + $result = ['element' => $element->getPrefixedTag()]; + $result = array_merge($result, $this->normalizeAttributes($element->getAttributes(), $cache_metadata)); + + // Remove default html wrapping elements. + if ($result['element'] == 'div' || $result['element'] == 'span') { + unset($result['element']); + } + + // Collect cache metadata. + $cache_metadata->addCacheableDependency($element); + + $normalized_slots = $this->normalizeSlots($element, $cache_metadata); + $result = array_merge($result, $normalized_slots); + return $result; + } + + /** + * Normalize slots for non-HTML format. * * @param \Drupal\custom_elements\CustomElement $element * The element for which to normalize slots. @@ -121,7 +238,6 @@ class CustomElementNormalizer implements NormalizerInterface { $slot_data = reset($slot_data); } - // Default to 'content' key for default slots. $data_key = $slot_key == 'default' ? 'content' : $slot_key; $data[$data_key] = $slot_data; } @@ -151,6 +267,31 @@ class CustomElementNormalizer implements NormalizerInterface { return array_combine($keys, $array); } + /** + * Helper method to convert an array to a string representation. + * + * @param array $array + * The array to convert to string. + * + * @return string + * String representation of the array. + */ + protected function stringifyArray(array $array): string { + $strings = []; + foreach ($array as $item) { + if ($item instanceof CustomElement) { + $strings[] = $this->normalizeToHtml($item, new BubbleableMetadata()); + } + elseif (is_array($item)) { + $strings[] = $this->stringifyArray($item); + } + else { + $strings[] = htmlspecialchars((string) $item); + } + } + return implode('', $strings); + } + /** * {@inheritDoc} */ diff --git a/src/HtmlToCustomElement.php b/src/HtmlToCustomElement.php new file mode 100644 index 0000000..ed818ac --- /dev/null +++ b/src/HtmlToCustomElement.php @@ -0,0 +1,134 @@ +<?php + +namespace Drupal\custom_elements; + +use Drupal\custom_elements\CustomElement; +use Masterminds\HTML5; +use Symfony\Component\DependencyInjection\ContainerInterface; +use DOMNode; + +/** + * Class HtmlToCustomElement + * + * Converts HTML content into a tree of custom elements for use within Drupal systems. + * This class skips the top-level <html> element if present. + */ +class HtmlToCustomElement { + + /** + * The HTML5 parser for processing HTML content. + * + * @var \Masterminds\HTML5 + */ + protected HTML5 $html5Parser; + + /** + * Constructs a new HtmlToCustomElement object. + * + * @param \Masterminds\HTML5 $html5_parser + * An instance of the HTML5 parser. + */ + public function __construct(HTML5 $html5_parser) { + $this->html5Parser = $html5_parser; + } + + /** + * Creates an instance of the HtmlToCustomElement using dependency injection. + * + * @param \Symfony\Component\DependencyInjection\ContainerInterface $container + * The container to retrieve services from. + * + * @return static + */ + public static function create(ContainerInterface $container): static { + $html5_parser = $container->get('html5_parser'); + return new static($html5_parser); + } + + /** + * Converts HTML content to a CustomElement tree structure, skipping the top-level <html> tag. + * + * @param string $htmlContent + * The HTML content to convert. + * + * @return \Drupal\custom_elements\CustomElement|null + * Returns the root CustomElement or NULL if conversion fails or only <html> is found. + */ + /** + * Converts HTML content to a CustomElement tree structure. + * + * @param string $htmlContent + * The HTML content to convert. + * + * @return \Drupal\custom_elements\CustomElement|null + * Returns the root CustomElement or NULL if conversion fails. + */ + public function convertHtmlToCustomElement(string $htmlContent): ?CustomElement { + $dom = $this->html5Parser->loadHTML($htmlContent); + return $this->convertNode($dom->documentElement); + } + + /** + * Recursively converts a DOM node to a CustomElement. + * + * @param \DOMNode $node + * The DOM node to convert. + * + * @return \Drupal\custom_elements\CustomElement|null + * Returns the converted CustomElement or NULL if the node should be skipped. + */ + protected function convertNode(DOMNode $node): ?CustomElement { + if ($node->nodeType === XML_TEXT_NODE) { + $emptyText = trim(preg_replace('/\s+/', ' ', $node->nodeValue)); + if (!empty($emptyText)) { + $customElement = CustomElement::create('text'); + $customElement->addSlot('text', $node->nodeValue); + return $customElement; + } + return NULL; // Skip empty text nodes + } + + if ($node->nodeType !== XML_ELEMENT_NODE) { + return NULL; // Skip any non-element nodes other than text + } + + $tagName = $node->nodeName; + $element = CustomElement::create($tagName); + + // Add attributes + foreach ($node->attributes as $attr) { + $element->setAttribute($attr->nodeName, $attr->nodeValue); + } + + $hasContent = FALSE; + foreach ($node->childNodes as $childNode) { + $childElement = $this->convertNode($childNode); + if ($childElement) { + // Ensure all children are added to 'content' slot for consistency unless specified otherwise + $element->addSlot('content', $childElement); + $hasContent = TRUE; + } + } + + // Skip elements without content or attributes unless they're self-closing + if (!$hasContent && empty($element->getAttributes()) && !$this->isSelfClosing($tagName)) { + return NULL; + } + + return $element; + } + + /** + * Checks if a tag name represents a self-closing HTML element. + * + * @param string $tagName + * The tag name to check. + * + * @return bool + * True if the tag is self-closing, false otherwise. + */ + protected function isSelfClosing(string $tagName): bool { + return in_array(strtolower($tagName), CustomElement::getNoEndTags()); + } + +} -- GitLab From ed9cfb3b87b7bdd34e6b79feaae1c53046852246 Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Tue, 21 Jan 2025 10:23:31 +0100 Subject: [PATCH 2/8] Html to CE class fix PHPCS --- src/HtmlToCustomElement.php | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/HtmlToCustomElement.php b/src/HtmlToCustomElement.php index ed818ac..50290c4 100644 --- a/src/HtmlToCustomElement.php +++ b/src/HtmlToCustomElement.php @@ -8,9 +8,9 @@ use Symfony\Component\DependencyInjection\ContainerInterface; use DOMNode; /** - * Class HtmlToCustomElement + * Class HtmlToCustomElement. * - * Converts HTML content into a tree of custom elements for use within Drupal systems. + * Converts HTML content into a tree of custom elements. * This class skips the top-level <html> element if present. */ class HtmlToCustomElement { @@ -45,15 +45,6 @@ class HtmlToCustomElement { return new static($html5_parser); } - /** - * Converts HTML content to a CustomElement tree structure, skipping the top-level <html> tag. - * - * @param string $htmlContent - * The HTML content to convert. - * - * @return \Drupal\custom_elements\CustomElement|null - * Returns the root CustomElement or NULL if conversion fails or only <html> is found. - */ /** * Converts HTML content to a CustomElement tree structure. * @@ -75,9 +66,9 @@ class HtmlToCustomElement { * The DOM node to convert. * * @return \Drupal\custom_elements\CustomElement|null - * Returns the converted CustomElement or NULL if the node should be skipped. + * Returns CustomElement or NULL if the node should be skipped. */ - protected function convertNode(DOMNode $node): ?CustomElement { + protected function convertNode(\DOMNode $node): ?CustomElement { if ($node->nodeType === XML_TEXT_NODE) { $emptyText = trim(preg_replace('/\s+/', ' ', $node->nodeValue)); if (!empty($emptyText)) { @@ -85,17 +76,19 @@ class HtmlToCustomElement { $customElement->addSlot('text', $node->nodeValue); return $customElement; } - return NULL; // Skip empty text nodes + // Skip empty text nodes. + return NULL; } if ($node->nodeType !== XML_ELEMENT_NODE) { - return NULL; // Skip any non-element nodes other than text + // Skip any non-element nodes other than text. + return NULL; } $tagName = $node->nodeName; $element = CustomElement::create($tagName); - // Add attributes + // Add attributes. foreach ($node->attributes as $attr) { $element->setAttribute($attr->nodeName, $attr->nodeValue); } @@ -104,13 +97,13 @@ class HtmlToCustomElement { foreach ($node->childNodes as $childNode) { $childElement = $this->convertNode($childNode); if ($childElement) { - // Ensure all children are added to 'content' slot for consistency unless specified otherwise + // Ensure all children are added to 'content' slot for consistency. $element->addSlot('content', $childElement); $hasContent = TRUE; } } - // Skip elements without content or attributes unless they're self-closing + // Skip elements without content or attributes unless they're self-closing. if (!$hasContent && empty($element->getAttributes()) && !$this->isSelfClosing($tagName)) { return NULL; } -- GitLab From 268d4ea4162df5f6309b1c9c3fe693da6cfbb37b Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Wed, 12 Feb 2025 09:33:00 +0100 Subject: [PATCH 3/8] Add HTML to Ce and back test --- tests/src/Kernel/HtmlToCustomElementTest.php | 80 ++++++++++ tests/src/Kernel/fixtures/testMarkup.html | 145 +++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 tests/src/Kernel/HtmlToCustomElementTest.php create mode 100644 tests/src/Kernel/fixtures/testMarkup.html diff --git a/tests/src/Kernel/HtmlToCustomElementTest.php b/tests/src/Kernel/HtmlToCustomElementTest.php new file mode 100644 index 0000000..325e5df --- /dev/null +++ b/tests/src/Kernel/HtmlToCustomElementTest.php @@ -0,0 +1,80 @@ +<?php + +namespace Drupal\Tests\custom_elements\Kernel; + +use Drupal\KernelTests\KernelTestBase; + +/** + * Tests the HtmlToCustomElement class. + * + * @group custom_elements + */ +class HtmlToCustomElementTest extends KernelTestBase { + + /** + * The HtmlToCustomElement service. + * + * @var \Drupal\custom_elements\HtmlToCustomElement + */ + protected $htmlToCustomElement; + + /** + * The CustomElementNormalizer service. + * + * @var \Drupal\custom_elements\CustomElementNormalizer + */ + protected $normalizer; + + /** + * {@inheritdoc} + */ + protected static $modules = ['custom_elements']; + + /** + * {@inheritdoc} + */ + protected function setUp(): void { + parent::setUp(); + $this->htmlToCustomElement = $this->container->get('custom_elements.html_to_custom_element'); + $this->normalizer = $this->container->get('custom_elements.normalizer'); + } + + /** + * Tests converting HTML content to custom elements and back. + */ + public function testHtmlToCustomElementAndBack() { + // Load HTML content from a fixture file for better readability. + $htmlFilePath = __DIR__ . '/fixtures/testMarkup.html'; + $html = file_get_contents($htmlFilePath); + + // Convert HTML to custom elements. + $customElement = $this->htmlToCustomElement->convertHtmlToCustomElement($html); + + // Normalize custom elements back to HTML. + $normalizedHtml = $this->normalizer->normalize($customElement, 'html'); + + // Assert that the normalized HTML matches the original HTML. + // Html tags are added as a root element by htmlToCustomElement conversion. + $this->assertHtmlEquals('<html>' . $html . '</html>', $normalizedHtml); + } + + /** + * Asserts two HTML strings are equivalent after normalization. + */ + protected function assertHtmlEquals(string $expected, string $actual, string $message = ''): void { + $this->assertSame( + $this->normalizeHtmlWhitespace($expected), + $this->normalizeHtmlWhitespace($actual), + $message ?: 'Rendered HTML should match expected output' + ); + } + + /** + * Normalizes HTML whitespace for consistent comparisons. + */ + protected function normalizeHtmlWhitespace(string $html): string { + $html = preg_replace("/ *\n */m", "", $html); + return preg_replace("/> +</", "><", $html); + } + +} diff --git a/tests/src/Kernel/fixtures/testMarkup.html b/tests/src/Kernel/fixtures/testMarkup.html new file mode 100644 index 0000000..9486ffa --- /dev/null +++ b/tests/src/Kernel/fixtures/testMarkup.html @@ -0,0 +1,145 @@ +<body> +<header> + <h1>Comprehensive HTML Example</h1> + <nav> + <ul> + <li><a href="#section1">Section 1</a></li> + <li><a href="#section2">Section 2</a></li> + <li><a href="#section3">Section 3</a></li> + </ul> + </nav> +</header> +<main> + <article> + <header> + <h2>Article Header</h2> + </header> + <p>This is a paragraph within an article. Here is some <strong>strong text</strong> and <em>emphasized text</em>with an <a href="#">example link</a>.</p> + <figure> + <img src="example.jpg" alt="Example Image" /> + <figcaption>Figure caption for the example image.</figcaption> + </figure> + <section id="section1"> + <header> + <h3>Section 1: Content and Lists</h3> + </header> + <p>Content in Section 1 includes various elements:</p> + <ul> + <li>Unordered list item 1</li> + <li>Unordered list item 2</li> + <li>Unordered list item 3</li> + </ul> + <ol> + <li>Ordered list item 1</li> + <li>Ordered list item 2</li> + <li>Ordered list item 3</li> + </ol> + <blockquote cite="https://example.com"> + <p>This is a blockquote with a citation.</p> + </blockquote> + <aside> + <p>This aside provides additional context or a sidebar note within Section 1.</p> + </aside> + </section> + <section id="section2"> + <header> + <h3>Section 2: Multimedia and Forms</h3> + </header> + <p>Section 2 includes multimedia content and a sample form.</p> + <video width="320" height="240" controls=""> + <source src="movie.mp4" type="video/mp4" /> + Your browser does not support the video tag. + </video> + <audio controls=""> + <source src="audio.mp3" type="audio/mpeg" /> + Your browser does not support the audio element. + </audio> + <canvas id="myCanvas" width="200" height="100" style="border:1px solid #000;"></canvas> + <form action="#" method="post"> + <fieldset> + <legend>Contact Form</legend> + <label for="name">Name:</label> + <input type="text" id="name" name="name" placeholder="Your Name" /><br /> + <label for="email">Email:</label> + <input type="email" id="email" name="email" placeholder="you@example.com" /><br /> + <label for="message">Message:</label> + <textarea id="message" name="message" rows="4" cols="50">Enter your message here...</textarea><br /> + <button type="submit">Submit</button> + </fieldset> + </form> + </section> + <section id="section3"> + <header> + <h3>Section 3: Miscellaneous Elements</h3> + </header> + <p>This section demonstrates a variety of HTML elements:</p> + <dl> + <dt>Definition Term 1</dt> + <dd>Definition for term 1.</dd> + <dt>Definition Term 2</dt> + <dd>Definition for term 2.</dd> + </dl> + <table border="1"> + <caption>Sample Data Table</caption> + <thead> + <tr> + <th>Header 1</th> + <th>Header 2</th> + <th>Header 3</th> + </tr> + </thead> + <tbody> + <tr> + <td>Data 1</td> + <td>Data 2</td> + <td>Data 3</td> + </tr> + <tr> + <td>Data A</td> + <td>Data B</td> + <td>Data C</td> + </tr> + </tbody> + <tfoot> + <tr> + <td colspan="3">Footer information</td> + </tr> + </tfoot> + </table> + <details> + <summary>Additional Details</summary> + <p>Here is some extra information that can be toggled.</p> + </details> + <mark>Highlighted text</mark> + <time datetime="2025-02-11">February 11, 2025</time> + <progress value="70" max="100">70%</progress> + <meter value="0.7">70%</meter> + <output id="result">Result output here</output> + <code>console.log('Inline code snippet');</code> + <var>variableName</var> + <samp>Sample output text</samp> + </section> + </article> + <aside> + <h2>Sidebar</h2> + <p>This sidebar contains additional navigation and context.</p> + <nav> + <ul> + <li><a href="#">Sidebar Link 1</a></li> + <li><a href="#">Sidebar Link 2</a></li> + <li><a href="#">Sidebar Link 3</a></li> + </ul> + </nav> + </aside> +</main> +<footer> + <p>2025 Comprehensive HTML Example. All rights reserved.</p> + <address>Contact us: <a href="mailto:info@example.com">info@example.com</a></address> +</footer> +<template id="myTemplate"> + <div>This is template content that is not rendered until instantiated.</div> +</template> +<noscript> + <p>Your browser does not support JavaScript or it is disabled.</p> +</noscript> +</body> -- GitLab From 98bd3de056f90199919ef7b3829fc7b0055a9003 Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Wed, 12 Feb 2025 09:38:39 +0100 Subject: [PATCH 4/8] rollback unnecessary changes --- src/CustomElementNormalizer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CustomElementNormalizer.php b/src/CustomElementNormalizer.php index bebfb8d..ce63e3d 100644 --- a/src/CustomElementNormalizer.php +++ b/src/CustomElementNormalizer.php @@ -130,7 +130,6 @@ class CustomElementNormalizer implements NormalizerInterface { if ($key == 'slot') { continue; } - // Remove leading colon from keys. $result_key = strpos($key, ':') === 0 ? substr($key, 1) : $key; $result[$result_key] = $value; } @@ -238,6 +237,7 @@ class CustomElementNormalizer implements NormalizerInterface { $slot_data = reset($slot_data); } + // Default to 'content' key for default slots. $data_key = $slot_key == 'default' ? 'content' : $slot_key; $data[$data_key] = $slot_data; } -- GitLab From 446dbe388257b9cdddf47607cecf64c161431196 Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Mon, 24 Feb 2025 10:06:22 +0100 Subject: [PATCH 5/8] rename service --- custom_elements.services.yml | 5 + src/CustomElementNormalizer.php | 177 ++---------------- ...HtmlToCustomElement.php => HtmlParser.php} | 29 +-- 3 files changed, 27 insertions(+), 184 deletions(-) rename src/{HtmlToCustomElement.php => HtmlParser.php} (77%) diff --git a/custom_elements.services.yml b/custom_elements.services.yml index 8d43691..676b32d 100644 --- a/custom_elements.services.yml +++ b/custom_elements.services.yml @@ -39,3 +39,8 @@ services: class: Drupal\custom_elements\Processor\TextFieldItemProcessor tags: - { name: custom_elements_processor, priority: -50 } + custom_elements.html_parser: + class: Drupal\custom_elements\HtmlParser + arguments: ['@html5_parser'] + html5_parser: + class: Masterminds\HTML5 diff --git a/src/CustomElementNormalizer.php b/src/CustomElementNormalizer.php index ce63e3d..219fa5b 100644 --- a/src/CustomElementNormalizer.php +++ b/src/CustomElementNormalizer.php @@ -7,7 +7,7 @@ use Drupal\Core\Render\BubbleableMetadata; use Symfony\Component\Serializer\Normalizer\NormalizerInterface; /** - * Formats a custom element structure into an array or HTML string. + * Formats a custom element structure into an array. */ class CustomElementNormalizer implements NormalizerInterface { @@ -16,11 +16,6 @@ class CustomElementNormalizer implements NormalizerInterface { */ public function normalize(mixed $object, ?string $format = NULL, array $context = []): array|string|int|float|bool|\ArrayObject|null { $cache_metadata = $context['cache_metadata'] ?? new BubbleableMetadata(); - - if ($format === 'html') { - return $this->normalizeToHtml($object, $cache_metadata); - } - $result = $this->normalizeCustomElement($object, $cache_metadata); // By default, convert keys in the outer result array to be valid JS // identifiers. (Actually, @@ -42,75 +37,32 @@ class CustomElementNormalizer implements NormalizerInterface { } /** - * Normalize custom element to HTML string. + * Normalize custom element. * * @param \Drupal\custom_elements\CustomElement $element * The custom element. * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata * The cache metadata. * - * @return string - * HTML string representation of the custom element. + * @return array + * Normalized custom element. */ - protected function normalizeToHtml(CustomElement $element, BubbleableMetadata $cache_metadata): string { - $tag = $element->getPrefixedTag(); - $attributes = $this->normalizeAttributes($element->getAttributes(), $cache_metadata); - $slots = $this->normalizeSlotsHtml($element, $cache_metadata); - - if ($tag === 'text') { - // We assume there's only content for a text element. - return implode('', $slots); - } - - $attributeString = $this->attributesToString($attributes); - $content = implode('', $slots); - - // Define self-closing tags. - $selfClosingTags = CustomElement::getNoEndTags(); + protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata) { + $result = ['element' => $element->getPrefixedTag()]; + $result = array_merge($result, $this->normalizeAttributes($element->getAttributes(), $cache_metadata)); - // Check if the tag is self-closing. - if (in_array(strtolower($tag), $selfClosingTags)) { - return "<{$tag}{$attributeString} />"; + // Remove dumb default html wrapping elements. + if ($result['element'] == 'div' || $result['element'] == 'span') { + unset($result['element']); } - return "<{$tag}{$attributeString}>{$content}</{$tag}>"; - } - - /** - * Convert attributes array to string format for HTML. - * - * @param array $attributes - * The attributes array. - * - * @return string - * Attributes as a string suitable for HTML. - */ - protected function attributesToString(array $attributes): string { - $attributeString = ''; - foreach ($attributes as $key => $value) { - if ($key == 'slot') { - continue; - } - - // List of boolean attributes that do not need a value. - $booleanAttributes = ['checked', 'selected', 'disabled', 'readonly', - 'multiple', 'required', 'autofocus', 'formnovalidate', 'novalidate', - ]; + // Collect cache metadata. Since the cache metadata object is passed down + // to slots, custom elements of slots will add their metadata as well. + $cache_metadata->addCacheableDependency($element); - if (in_array(strtolower($key), $booleanAttributes)) { - // For boolean attributes, if the value is truthy (not 'false' or 0), - // we just add the attribute name. - if ($value !== FALSE && $value !== 'false' && $value !== 0) { - $attributeString .= " {$key}"; - } - // If the value is explicitly false or 0, we skip adding this attribute. - } - else { - // For non-boolean attributes, proceed with key-value. - $attributeString .= " {$key}=\"" . htmlspecialchars($value) . "\""; - } - } - return $attributeString; + $normalized_slots = $this->normalizeSlots($element, $cache_metadata); + $result = array_merge($result, $normalized_slots); + return $result; } /** @@ -124,7 +76,7 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized custom element attributes. */ - protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata): array { + protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata) { $result = []; foreach ($attributes as $key => $value) { if ($key == 'slot') { @@ -137,75 +89,7 @@ class CustomElementNormalizer implements NormalizerInterface { } /** - * Normalize slots to HTML strings. - * - * @param \Drupal\custom_elements\CustomElement $element - * The element for which to normalize slots. - * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata - * The cache metadata. - * - * @return array - * An array of HTML strings for each slot. - */ - protected function normalizeSlotsHtml(CustomElement $element, BubbleableMetadata $cache_metadata): array { - $html = []; - foreach ($element->getSortedSlotsByName() as $slot_key => $slot_entries) { - foreach ($slot_entries as $slot) { - if (isset($slot['content'])) { - if ($slot['content'] instanceof CustomElement) { - // Recursively normalize nested custom elements. - $html[] = $this->normalizeToHtml($slot['content'], $cache_metadata); - } - elseif ($slot['content'] instanceof MarkupInterface) { - // If it's MarkupInterface, directly convert to string. - $html[] = (string) $slot['content']; - } - elseif (is_string($slot['content'])) { - // Handle plain text content. - // Escape for security. - $html[] = htmlspecialchars($slot['content']); - } - else { - // If content is an array, - // we assume it's a structure that needs to be stringified. - $html[] = $this->stringifyArray($slot['content']); - } - } - } - } - return $html; - } - - /** - * Normalize custom element. - * - * @param \Drupal\custom_elements\CustomElement $element - * The custom element. - * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata - * The cache metadata. - * - * @return array - * Normalized custom element. - */ - protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata) { - $result = ['element' => $element->getPrefixedTag()]; - $result = array_merge($result, $this->normalizeAttributes($element->getAttributes(), $cache_metadata)); - - // Remove default html wrapping elements. - if ($result['element'] == 'div' || $result['element'] == 'span') { - unset($result['element']); - } - - // Collect cache metadata. - $cache_metadata->addCacheableDependency($element); - - $normalized_slots = $this->normalizeSlots($element, $cache_metadata); - $result = array_merge($result, $normalized_slots); - return $result; - } - - /** - * Normalize slots for non-HTML format. + * Normalize slots. * * @param \Drupal\custom_elements\CustomElement $element * The element for which to normalize slots. @@ -267,31 +151,6 @@ class CustomElementNormalizer implements NormalizerInterface { return array_combine($keys, $array); } - /** - * Helper method to convert an array to a string representation. - * - * @param array $array - * The array to convert to string. - * - * @return string - * String representation of the array. - */ - protected function stringifyArray(array $array): string { - $strings = []; - foreach ($array as $item) { - if ($item instanceof CustomElement) { - $strings[] = $this->normalizeToHtml($item, new BubbleableMetadata()); - } - elseif (is_array($item)) { - $strings[] = $this->stringifyArray($item); - } - else { - $strings[] = htmlspecialchars((string) $item); - } - } - return implode('', $strings); - } - /** * {@inheritDoc} */ diff --git a/src/HtmlToCustomElement.php b/src/HtmlParser.php similarity index 77% rename from src/HtmlToCustomElement.php rename to src/HtmlParser.php index 50290c4..b5236ca 100644 --- a/src/HtmlToCustomElement.php +++ b/src/HtmlParser.php @@ -5,15 +5,14 @@ namespace Drupal\custom_elements; use Drupal\custom_elements\CustomElement; use Masterminds\HTML5; use Symfony\Component\DependencyInjection\ContainerInterface; -use DOMNode; /** - * Class HtmlToCustomElement. + * Class HtmlParser. * * Converts HTML content into a tree of custom elements. * This class skips the top-level <html> element if present. */ -class HtmlToCustomElement { +class HtmlParser { /** * The HTML5 parser for processing HTML content. @@ -23,7 +22,7 @@ class HtmlToCustomElement { protected HTML5 $html5Parser; /** - * Constructs a new HtmlToCustomElement object. + * Constructs a new HtmlParser object. * * @param \Masterminds\HTML5 $html5_parser * An instance of the HTML5 parser. @@ -33,7 +32,7 @@ class HtmlToCustomElement { } /** - * Creates an instance of the HtmlToCustomElement using dependency injection. + * Creates an instance of the HtmlParser using dependency injection. * * @param \Symfony\Component\DependencyInjection\ContainerInterface $container * The container to retrieve services from. @@ -93,35 +92,15 @@ class HtmlToCustomElement { $element->setAttribute($attr->nodeName, $attr->nodeValue); } - $hasContent = FALSE; foreach ($node->childNodes as $childNode) { $childElement = $this->convertNode($childNode); if ($childElement) { // Ensure all children are added to 'content' slot for consistency. $element->addSlot('content', $childElement); - $hasContent = TRUE; } } - // Skip elements without content or attributes unless they're self-closing. - if (!$hasContent && empty($element->getAttributes()) && !$this->isSelfClosing($tagName)) { - return NULL; - } - return $element; } - /** - * Checks if a tag name represents a self-closing HTML element. - * - * @param string $tagName - * The tag name to check. - * - * @return bool - * True if the tag is self-closing, false otherwise. - */ - protected function isSelfClosing(string $tagName): bool { - return in_array(strtolower($tagName), CustomElement::getNoEndTags()); - } - } -- GitLab From a577aceb0c19fc7a8575407861d143f890d1e775 Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Mon, 24 Feb 2025 12:02:52 +0100 Subject: [PATCH 6/8] rename tests --- src/CustomElementNormalizer.php | 161 ++++++++++++- tests/src/Kernel/HtmlParserTest.php | 80 ++++++ tests/src/Kernel/fixtures/testMarkup.html | 281 +++++++++++----------- 3 files changed, 374 insertions(+), 148 deletions(-) create mode 100644 tests/src/Kernel/HtmlParserTest.php diff --git a/src/CustomElementNormalizer.php b/src/CustomElementNormalizer.php index 219fa5b..7666005 100644 --- a/src/CustomElementNormalizer.php +++ b/src/CustomElementNormalizer.php @@ -7,21 +7,36 @@ use Drupal\Core\Render\BubbleableMetadata; use Symfony\Component\Serializer\Normalizer\NormalizerInterface; /** - * Formats a custom element structure into an array. + * Formats a custom element structure into an array or HTML string. */ class CustomElementNormalizer implements NormalizerInterface { + /** + * List of boolean attributes that do not need a value. + * + * @var array + */ + protected const BOOLEAN_HTML_ATTRIBUTES = [ + 'checked', 'selected', 'disabled', 'readonly', + 'multiple', 'required', 'autofocus', 'formnovalidate', 'novalidate', + ]; + /** * {@inheritdoc} */ public function normalize(mixed $object, ?string $format = NULL, array $context = []): array|string|int|float|bool|\ArrayObject|null { $cache_metadata = $context['cache_metadata'] ?? new BubbleableMetadata(); + + if ($format === 'html') { + return $this->normalizeToHtml($object, $cache_metadata); + } + $result = $this->normalizeCustomElement($object, $cache_metadata); // By default, convert keys in the outer result array to be valid JS // identifiers. (Actually, // https://vuejs.org/guide/components/registration.html indicates that // PascalCase names, not camelCase, are valid identifiers - but camelCase - // was used since the noram was introduced in v2.) 'key_casing' context + // was used since the norm was introduced in v2.) 'key_casing' context // parameter can override this. if (!isset($context['key_casing']) || $context['key_casing'] !== 'ignore') { $result = $this->convertKeysToCamelCase($result); @@ -47,11 +62,11 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized custom element. */ - protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata) { + protected function normalizeCustomElement(CustomElement $element, BubbleableMetadata $cache_metadata): array { $result = ['element' => $element->getPrefixedTag()]; $result = array_merge($result, $this->normalizeAttributes($element->getAttributes(), $cache_metadata)); - // Remove dumb default html wrapping elements. + // Remove dumb default HTML wrapping elements. if ($result['element'] == 'div' || $result['element'] == 'span') { unset($result['element']); } @@ -76,7 +91,7 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized custom element attributes. */ - protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata) { + protected function normalizeAttributes(array $attributes, BubbleableMetadata $cache_metadata): array { $result = []; foreach ($attributes as $key => $value) { if ($key == 'slot') { @@ -89,7 +104,7 @@ class CustomElementNormalizer implements NormalizerInterface { } /** - * Normalize slots. + * Normalize slots for non-HTML format. * * @param \Drupal\custom_elements\CustomElement $element * The element for which to normalize slots. @@ -99,7 +114,7 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Normalized slots. */ - protected function normalizeSlots(CustomElement $element, BubbleableMetadata $cache_metadata) { + protected function normalizeSlots(CustomElement $element, BubbleableMetadata $cache_metadata): array { $data = []; foreach ($element->getSortedSlotsByName() as $slot_key => $slot_entries) { $slot_data = []; @@ -138,7 +153,7 @@ class CustomElementNormalizer implements NormalizerInterface { * @return array * Converted keys. */ - protected function convertKeysToCamelCase(array $array) { + protected function convertKeysToCamelCase(array $array): array { $keys = array_map(function ($key) use (&$array) { if (is_array($array[$key])) { $array[$key] = $this->convertKeysToCamelCase($array[$key]); @@ -160,4 +175,134 @@ class CustomElementNormalizer implements NormalizerInterface { ]; } + /** + * Normalize custom element to HTML string. + * + * @param \Drupal\custom_elements\CustomElement $element + * The custom element. + * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata + * The cache metadata. + * + * @return string + * HTML string representation of the custom element. + */ + protected function normalizeToHtml(CustomElement $element, BubbleableMetadata $cache_metadata): string { + $tag = $element->getPrefixedTag(); + $attributes = $this->normalizeAttributes($element->getAttributes(), $cache_metadata); + $slots = $this->normalizeSlotsHtml($element, $cache_metadata); + + if ($tag === 'text') { + // We assume there's only content for a text element. + return implode('', $slots); + } + + $attributeString = $this->normalizeAttributesHtml($attributes); + $content = implode('', $slots); + + // Define self-closing tags. + $selfClosingTags = CustomElement::getNoEndTags(); + + // Check if the tag is self-closing. + if (in_array(strtolower($tag), $selfClosingTags)) { + return "<{$tag}{$attributeString} />"; + } + + return "<{$tag}{$attributeString}>{$content}</{$tag}>"; + } + + /** + * Convert attributes array to string format for HTML. + * + * @param array $attributes + * The attributes array. + * + * @return string + * Attributes as a string suitable for HTML. + */ + protected function normalizeAttributesHtml(array $attributes): string { + $attributeString = ''; + foreach ($attributes as $key => $value) { + if ($key == 'slot') { + continue; + } + + if (in_array(strtolower($key), self::BOOLEAN_HTML_ATTRIBUTES)) { + // For boolean attributes, if the value is truthy (not 'false' or 0), + // we just add the attribute name. + if ($value !== FALSE && $value !== 'false' && $value !== 0) { + $attributeString .= " {$key}"; + } + // If the value is explicitly false or 0, we skip adding this attribute. + } + else { + // For non-boolean attributes, proceed with key-value. + $attributeString .= " {$key}=\"" . htmlspecialchars($value) . "\""; + } + } + return $attributeString; + } + + /** + * Normalize slots to HTML strings. + * + * @param \Drupal\custom_elements\CustomElement $element + * The element for which to normalize slots. + * @param \Drupal\Core\Render\BubbleableMetadata $cache_metadata + * The cache metadata. + * + * @return array + * An array of HTML strings for each slot. + */ + protected function normalizeSlotsHtml(CustomElement $element, BubbleableMetadata $cache_metadata): array { + $html = []; + foreach ($element->getSortedSlotsByName() as $slot_key => $slot_entries) { + foreach ($slot_entries as $slot) { + if (isset($slot['content'])) { + // Treat content as an array for consistent processing. + $contentItems = is_array($slot['content']) ? $slot['content'] : [$slot['content']]; + foreach ($contentItems as $item) { + if ($item instanceof CustomElement) { + $html[] = $this->normalizeToHtml($item, $cache_metadata); + } + elseif ($item instanceof MarkupInterface) { + $html[] = (string) $item; + } + elseif (is_array($item)) { + // Recursively process nested arrays. + $html = array_merge($html, $this->normalizeSlotsHtmlItem($item, $cache_metadata)); + } + else { + // Handle scalars (strings, numbers, etc.). + $html[] = htmlspecialchars((string) $item); + } + } + } + } + } + return $html; + } + + /** + * Helper to process nested arrays in slot content. + */ + private function normalizeSlotsHtmlItem($item, BubbleableMetadata $cache_metadata): array { + $result = []; + if (is_array($item)) { + foreach ($item as $subItem) { + $result = array_merge($result, $this->normalizeSlotsHtmlItem($subItem, $cache_metadata)); + } + } + else { + if ($item instanceof CustomElement) { + $result[] = $this->normalizeToHtml($item, $cache_metadata); + } + elseif ($item instanceof MarkupInterface) { + $result[] = (string) $item; + } + else { + $result[] = htmlspecialchars((string) $item); + } + } + return $result; + } } diff --git a/tests/src/Kernel/HtmlParserTest.php b/tests/src/Kernel/HtmlParserTest.php new file mode 100644 index 0000000..2ee7adc --- /dev/null +++ b/tests/src/Kernel/HtmlParserTest.php @@ -0,0 +1,80 @@ +<?php + +namespace Drupal\Tests\custom_elements\Kernel; + +use Drupal\KernelTests\KernelTestBase; + +/** + * Tests the HtmlParserTest class. + * + * @group custom_elements + */ +class HtmlParserTest extends KernelTestBase { + + /** + * The HtmlToCustomElement service. + * + * @var \Drupal\custom_elements\HtmlParser + */ + protected $htmlParser; + + /** + * The CustomElementNormalizer service. + * + * @var \Drupal\custom_elements\CustomElementNormalizer + */ + protected $normalizer; + + /** + * {@inheritdoc} + */ + protected static $modules = ['custom_elements']; + + /** + * {@inheritdoc} + */ + protected function setUp(): void { + parent::setUp(); + $this->htmlParser = $this->container->get('custom_elements.html_parser'); + $this->normalizer = $this->container->get('custom_elements.normalizer'); + } + + /** + * Tests converting HTML content to custom elements and back. + */ + public function testHtmlToCustomElementAndBack() { + // Load HTML content from a fixture file for better readability. + $htmlFilePath = __DIR__ . '/fixtures/testMarkup.html'; + $html = file_get_contents($htmlFilePath); + + // Convert HTML to custom elements. + $customElement = $this->htmlParser->convertHtmlToCustomElement($html); + + // Normalize custom elements back to HTML. + $normalizedHtml = $this->normalizer->normalize($customElement, 'html'); + + // Assert that the normalized HTML matches the original HTML. + // Html tags are added as a root element by htmlParser conversion. + $this->assertHtmlEquals('<html>' . $html . '</html>', $normalizedHtml); + } + + /** + * Asserts two HTML strings are equivalent after normalization. + */ + protected function assertHtmlEquals(string $expected, string $actual, string $message = ''): void { + $this->assertSame( + $this->normalizeHtmlWhitespace($expected), + $this->normalizeHtmlWhitespace($actual), + $message ?: 'Rendered HTML should match expected output' + ); + } + + /** + * Normalizes HTML whitespace for consistent comparisons. + */ + protected function normalizeHtmlWhitespace(string $html): string { + $html = preg_replace("/ *\n */m", "", $html); + return preg_replace("/> +</", "><", $html); + } + +} diff --git a/tests/src/Kernel/fixtures/testMarkup.html b/tests/src/Kernel/fixtures/testMarkup.html index 9486ffa..c811075 100644 --- a/tests/src/Kernel/fixtures/testMarkup.html +++ b/tests/src/Kernel/fixtures/testMarkup.html @@ -1,145 +1,146 @@ <body> -<header> - <h1>Comprehensive HTML Example</h1> - <nav> - <ul> - <li><a href="#section1">Section 1</a></li> - <li><a href="#section2">Section 2</a></li> - <li><a href="#section3">Section 3</a></li> - </ul> - </nav> -</header> -<main> - <article> - <header> - <h2>Article Header</h2> - </header> - <p>This is a paragraph within an article. Here is some <strong>strong text</strong> and <em>emphasized text</em>with an <a href="#">example link</a>.</p> - <figure> - <img src="example.jpg" alt="Example Image" /> - <figcaption>Figure caption for the example image.</figcaption> - </figure> - <section id="section1"> - <header> - <h3>Section 1: Content and Lists</h3> - </header> - <p>Content in Section 1 includes various elements:</p> - <ul> - <li>Unordered list item 1</li> - <li>Unordered list item 2</li> - <li>Unordered list item 3</li> - </ul> - <ol> - <li>Ordered list item 1</li> - <li>Ordered list item 2</li> - <li>Ordered list item 3</li> - </ol> - <blockquote cite="https://example.com"> - <p>This is a blockquote with a citation.</p> - </blockquote> - <aside> - <p>This aside provides additional context or a sidebar note within Section 1.</p> - </aside> - </section> - <section id="section2"> - <header> - <h3>Section 2: Multimedia and Forms</h3> - </header> - <p>Section 2 includes multimedia content and a sample form.</p> - <video width="320" height="240" controls=""> - <source src="movie.mp4" type="video/mp4" /> - Your browser does not support the video tag. - </video> - <audio controls=""> - <source src="audio.mp3" type="audio/mpeg" /> - Your browser does not support the audio element. - </audio> - <canvas id="myCanvas" width="200" height="100" style="border:1px solid #000;"></canvas> - <form action="#" method="post"> - <fieldset> - <legend>Contact Form</legend> - <label for="name">Name:</label> - <input type="text" id="name" name="name" placeholder="Your Name" /><br /> - <label for="email">Email:</label> - <input type="email" id="email" name="email" placeholder="you@example.com" /><br /> - <label for="message">Message:</label> - <textarea id="message" name="message" rows="4" cols="50">Enter your message here...</textarea><br /> - <button type="submit">Submit</button> - </fieldset> - </form> - </section> - <section id="section3"> - <header> - <h3>Section 3: Miscellaneous Elements</h3> - </header> - <p>This section demonstrates a variety of HTML elements:</p> - <dl> - <dt>Definition Term 1</dt> - <dd>Definition for term 1.</dd> - <dt>Definition Term 2</dt> - <dd>Definition for term 2.</dd> - </dl> - <table border="1"> - <caption>Sample Data Table</caption> - <thead> - <tr> - <th>Header 1</th> - <th>Header 2</th> - <th>Header 3</th> - </tr> - </thead> - <tbody> - <tr> - <td>Data 1</td> - <td>Data 2</td> - <td>Data 3</td> - </tr> - <tr> - <td>Data A</td> - <td>Data B</td> - <td>Data C</td> - </tr> - </tbody> - <tfoot> - <tr> - <td colspan="3">Footer information</td> - </tr> - </tfoot> - </table> - <details> - <summary>Additional Details</summary> - <p>Here is some extra information that can be toggled.</p> - </details> - <mark>Highlighted text</mark> - <time datetime="2025-02-11">February 11, 2025</time> - <progress value="70" max="100">70%</progress> - <meter value="0.7">70%</meter> - <output id="result">Result output here</output> - <code>console.log('Inline code snippet');</code> - <var>variableName</var> - <samp>Sample output text</samp> - </section> - </article> - <aside> - <h2>Sidebar</h2> - <p>This sidebar contains additional navigation and context.</p> + <header> + <h1>Comprehensive HTML Example</h1> <nav> <ul> - <li><a href="#">Sidebar Link 1</a></li> - <li><a href="#">Sidebar Link 2</a></li> - <li><a href="#">Sidebar Link 3</a></li> + <li><a href="#section1">Section 1</a></li> + <li><a href="#section2">Section 2</a></li> + <li><a href="#section3">Section 3</a></li> </ul> </nav> - </aside> -</main> -<footer> - <p>2025 Comprehensive HTML Example. All rights reserved.</p> - <address>Contact us: <a href="mailto:info@example.com">info@example.com</a></address> -</footer> -<template id="myTemplate"> - <div>This is template content that is not rendered until instantiated.</div> -</template> -<noscript> - <p>Your browser does not support JavaScript or it is disabled.</p> -</noscript> -</body> + </header> + <main> + <article> + <header> + <h2>Article Header</h2> + </header> + <p>This is a paragraph within an article. Here is some <strong>strong text</strong> and <em>emphasized + text</em>with an <a href="#">example link</a>.</p> + <figure> + <img src="example.jpg" alt="Example Image" /> + <figcaption>Figure caption for the example image.</figcaption> + </figure> + <section id="section1"> + <header> + <h3>Section 1: Content and Lists</h3> + </header> + <p>Content in Section 1 includes various elements:</p> + <ul> + <li>Unordered list item 1</li> + <li>Unordered list item 2</li> + <li>Unordered list item 3</li> + </ul> + <ol> + <li>Ordered list item 1</li> + <li>Ordered list item 2</li> + <li>Ordered list item 3</li> + </ol> + <blockquote cite="https://example.com"> + <p>This is a blockquote with a citation.</p> + </blockquote> + <aside> + <p>This aside provides additional context or a sidebar note within Section 1.</p> + </aside> + </section> + <section id="section2"> + <header> + <h3>Section 2: Multimedia and Forms</h3> + </header> + <p>Section 2 includes multimedia content and a sample form.</p> + <video width="320" height="240" controls=""> + <source src="movie.mp4" type="video/mp4" /> + Your browser does not support the video tag. + </video> + <audio controls=""> + <source src="audio.mp3" type="audio/mpeg" /> + Your browser does not support the audio element. + </audio> + <canvas id="myCanvas" width="200" height="100" style="border:1px solid #000;"></canvas> + <form action="#" method="post"> + <fieldset> + <legend>Contact Form</legend> + <label for="name">Name:</label> + <input type="text" id="name" name="name" placeholder="Your Name" /><br /> + <label for="email">Email:</label> + <input type="email" id="email" name="email" placeholder="you@example.com" /><br /> + <label for="message">Message:</label> + <textarea id="message" name="message" rows="4" cols="50">Enter your message here...</textarea><br /> + <button type="submit">Submit</button> + </fieldset> + </form> + </section> + <section id="section3"> + <header> + <h3>Section 3: Miscellaneous Elements</h3> + </header> + <p>This section demonstrates a variety of HTML elements:</p> + <dl> + <dt>Definition Term 1</dt> + <dd>Definition for term 1.</dd> + <dt>Definition Term 2</dt> + <dd>Definition for term 2.</dd> + </dl> + <table border="1"> + <caption>Sample Data Table</caption> + <thead> + <tr> + <th>Header 1</th> + <th>Header 2</th> + <th>Header 3</th> + </tr> + </thead> + <tbody> + <tr> + <td>Data 1</td> + <td>Data 2</td> + <td>Data 3</td> + </tr> + <tr> + <td>Data A</td> + <td>Data B</td> + <td>Data C</td> + </tr> + </tbody> + <tfoot> + <tr> + <td colspan="3">Footer information</td> + </tr> + </tfoot> + </table> + <details> + <summary>Additional Details</summary> + <p>Here is some extra information that can be toggled.</p> + </details> + <mark>Highlighted text</mark> + <time datetime="2025-02-11">February 11, 2025</time> + <progress value="70" max="100">70%</progress> + <meter value="0.7">70%</meter> + <output id="result">Result output here</output> + <code>console.log('Inline code snippet');</code> + <var>variableName</var> + <samp>Sample output text</samp> + </section> + </article> + <aside> + <h2>Sidebar</h2> + <p>This sidebar contains additional navigation and context.</p> + <nav> + <ul> + <li><a href="#">Sidebar Link 1</a></li> + <li><a href="#">Sidebar Link 2</a></li> + <li><a href="#">Sidebar Link 3</a></li> + </ul> + </nav> + </aside> + </main> + <footer> + <p>2025 Comprehensive HTML Example. All rights reserved.</p> + <address>Contact us: <a href="mailto:info@example.com">info@example.com</a></address> + </footer> + <template id="myTemplate"> + <div>This is template content that is not rendered until instantiated.</div> + </template> + <noscript> + <p>Your browser does not support JavaScript or it is disabled.</p> + </noscript> +</body> \ No newline at end of file -- GitLab From 0f65771d72628cf2c76d23f97e2c94d56ff848dd Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Mon, 24 Feb 2025 12:55:45 +0100 Subject: [PATCH 7/8] remove old test --- tests/src/Kernel/HtmlToCustomElementTest.php | 80 -------------------- 1 file changed, 80 deletions(-) delete mode 100644 tests/src/Kernel/HtmlToCustomElementTest.php diff --git a/tests/src/Kernel/HtmlToCustomElementTest.php b/tests/src/Kernel/HtmlToCustomElementTest.php deleted file mode 100644 index 325e5df..0000000 --- a/tests/src/Kernel/HtmlToCustomElementTest.php +++ /dev/null @@ -1,80 +0,0 @@ -<?php - -namespace Drupal\Tests\custom_elements\Kernel; - -use Drupal\KernelTests\KernelTestBase; - -/** - * Tests the HtmlToCustomElement class. - * - * @group custom_elements - */ -class HtmlToCustomElementTest extends KernelTestBase { - - /** - * The HtmlToCustomElement service. - * - * @var \Drupal\custom_elements\HtmlToCustomElement - */ - protected $htmlToCustomElement; - - /** - * The CustomElementNormalizer service. - * - * @var \Drupal\custom_elements\CustomElementNormalizer - */ - protected $normalizer; - - /** - * {@inheritdoc} - */ - protected static $modules = ['custom_elements']; - - /** - * {@inheritdoc} - */ - protected function setUp(): void { - parent::setUp(); - $this->htmlToCustomElement = $this->container->get('custom_elements.html_to_custom_element'); - $this->normalizer = $this->container->get('custom_elements.normalizer'); - } - - /** - * Tests converting HTML content to custom elements and back. - */ - public function testHtmlToCustomElementAndBack() { - // Load HTML content from a fixture file for better readability. - $htmlFilePath = __DIR__ . '/fixtures/testMarkup.html'; - $html = file_get_contents($htmlFilePath); - - // Convert HTML to custom elements. - $customElement = $this->htmlToCustomElement->convertHtmlToCustomElement($html); - - // Normalize custom elements back to HTML. - $normalizedHtml = $this->normalizer->normalize($customElement, 'html'); - - // Assert that the normalized HTML matches the original HTML. - // Html tags are added as a root element by htmlToCustomElement conversion. - $this->assertHtmlEquals('<html>' . $html . '</html>', $normalizedHtml); - } - - /** - * Asserts two HTML strings are equivalent after normalization. - */ - protected function assertHtmlEquals(string $expected, string $actual, string $message = ''): void { - $this->assertSame( - $this->normalizeHtmlWhitespace($expected), - $this->normalizeHtmlWhitespace($actual), - $message ?: 'Rendered HTML should match expected output' - ); - } - - /** - * Normalizes HTML whitespace for consistent comparisons. - */ - protected function normalizeHtmlWhitespace(string $html): string { - $html = preg_replace("/ *\n */m", "", $html); - return preg_replace("/> +</", "><", $html); - } - -} -- GitLab From 7482dce2d10369eb90ef22abe33392870743a5e1 Mon Sep 17 00:00:00 2001 From: benellefimostfa <benellefimostfa@gmail.com> Date: Mon, 24 Feb 2025 13:49:45 +0100 Subject: [PATCH 8/8] remove unneccessary html5 DI --- custom_elements.services.yml | 3 --- src/HtmlParser.php | 22 ++-------------------- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/custom_elements.services.yml b/custom_elements.services.yml index 676b32d..b72094f 100644 --- a/custom_elements.services.yml +++ b/custom_elements.services.yml @@ -41,6 +41,3 @@ services: - { name: custom_elements_processor, priority: -50 } custom_elements.html_parser: class: Drupal\custom_elements\HtmlParser - arguments: ['@html5_parser'] - html5_parser: - class: Masterminds\HTML5 diff --git a/src/HtmlParser.php b/src/HtmlParser.php index b5236ca..f9c1825 100644 --- a/src/HtmlParser.php +++ b/src/HtmlParser.php @@ -4,7 +4,6 @@ namespace Drupal\custom_elements; use Drupal\custom_elements\CustomElement; use Masterminds\HTML5; -use Symfony\Component\DependencyInjection\ContainerInterface; /** * Class HtmlParser. @@ -23,25 +22,9 @@ class HtmlParser { /** * Constructs a new HtmlParser object. - * - * @param \Masterminds\HTML5 $html5_parser - * An instance of the HTML5 parser. - */ - public function __construct(HTML5 $html5_parser) { - $this->html5Parser = $html5_parser; - } - - /** - * Creates an instance of the HtmlParser using dependency injection. - * - * @param \Symfony\Component\DependencyInjection\ContainerInterface $container - * The container to retrieve services from. - * - * @return static */ - public static function create(ContainerInterface $container): static { - $html5_parser = $container->get('html5_parser'); - return new static($html5_parser); + public function __construct() { + $this->html5Parser = new HTML5(); } /** @@ -102,5 +85,4 @@ class HtmlParser { return $element; } - } -- GitLab