From 13ce78a86fc33df5fec4745818528d2fa4806334 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dezs=C5=91=20BICZ=C3=93?= <dezso.biczo@pronovix.com>
Date: Wed, 19 Feb 2025 15:41:19 +0100
Subject: [PATCH 1/2] Add failing test

---
 .../UniqueValuesConstraintValidatorTest.php   | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/core/tests/Drupal/KernelTests/Core/Validation/UniqueValuesConstraintValidatorTest.php b/core/tests/Drupal/KernelTests/Core/Validation/UniqueValuesConstraintValidatorTest.php
index 1171baf76500..7beaec547546 100644
--- a/core/tests/Drupal/KernelTests/Core/Validation/UniqueValuesConstraintValidatorTest.php
+++ b/core/tests/Drupal/KernelTests/Core/Validation/UniqueValuesConstraintValidatorTest.php
@@ -334,4 +334,38 @@ public function testValidationCaseInsensitive(): void {
     $this->assertEquals('field_test_text.0', $violations[0]->getPropertyPath());
   }
 
+  /**
+   * Tests the UniqueField validation constraint validator with regards to accent-insensitivity.
+   *
+   * Case 6: Attempt to create another entity with an existing unique field value
+   * where only the accent differs, which should still trigger a validation error.
+   *
+   * @throws \Drupal\Core\Entity\EntityStorageException
+   *
+   * @covers ::validate
+   */
+  public function testValidationAccentSensitive(): void {
+    // Create an entity with the non-accented version of the string.
+    $definition = [
+      'user_id' => 0,
+      'field_test_text' => ['cafe'],
+    ];
+    $entity = EntityTestUniqueConstraint::create($definition);
+    $entity->save();
+
+    // Create another entity with the accented version of the string.
+    $definition = [
+      'user_id' => 0,
+      'field_test_text' => ['café'],
+    ];
+    $entity = EntityTestUniqueConstraint::create($definition);
+
+    // Validate the entity.
+    $violations = $entity->validate();
+
+    // Assert that a violation exists.
+    $this->assertCount(1, $violations, 'Validation error expected for accent-insensitive uniqueness.');
+    $this->assertEquals('field_test_text.0', $violations[0]->getPropertyPath(), 'Violation occurred on the expected field.');
+  }
+
 }
-- 
GitLab


From 5b80c85428a62cc51579255164b7d5f3019eb0d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dezs=C5=91=20BICZ=C3=93?= <dezso.biczo@pronovix.com>
Date: Thu, 20 Feb 2025 12:56:20 +0100
Subject: [PATCH 2/2] Add a potential fix

---
 .../Constraint/UniqueFieldValueValidator.php  | 43 ++++++++++++++-----
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/core/lib/Drupal/Core/Validation/Plugin/Validation/Constraint/UniqueFieldValueValidator.php b/core/lib/Drupal/Core/Validation/Plugin/Validation/Constraint/UniqueFieldValueValidator.php
index 3bae5c3a5bce..e0d58259cfe1 100644
--- a/core/lib/Drupal/Core/Validation/Plugin/Validation/Constraint/UniqueFieldValueValidator.php
+++ b/core/lib/Drupal/Core/Validation/Plugin/Validation/Constraint/UniqueFieldValueValidator.php
@@ -123,25 +123,46 @@ public function validate($items, Constraint $constraint): void {
     }
   }
 
+  // cspell:ignore théâtre TRANSLIT
+
   /**
-   * Perform a case-insensitive array intersection, but keep original capitalization.
+   * Perform a case and accent-insensitive array intersection while preserving original formatting.
+   *
+   * This method normalizes strings by removing diacritical marks and converting to lowercase
+   * before comparison, but returns the original values with their formatting intact.
    *
    * @param array $orig_values
-   *   The original values to be returned.
+   *   The original values to be returned. These values maintain their original
+   *   capitalization and accents in the result.
    * @param array $comp_values
-   *   The values to intersect $orig_values with.
+   *   The values to intersect with $orig_values. The comparison is done in a
+   *   case and accent-insensitive manner.
    *
    * @return array
-   *   Elements of $orig_values contained in $comp_values when ignoring
-   *   capitalization.
+   *   Elements of $orig_values that match elements in $comp_values when ignoring
+   *   capitalization and diacritical marks. The returned values preserve their
+   *   original formatting.
+   *
+   * @code
+   *   $result = caseInsensitiveArrayIntersect(['café', 'théâtre'], ['CAFE', 'THEATRE']);
+   *   // Returns: ['café', 'théâtre']
+   * @endcode
    */
   private function caseInsensitiveArrayIntersect(array $orig_values, array $comp_values): array {
-    $lowercase_comp_values = array_map('strtolower', $comp_values);
-    $intersect_map = array_map(fn (string $x) => in_array(strtolower($x), $lowercase_comp_values, TRUE) ? $x : NULL, $orig_values);
-
-    return array_filter($intersect_map, function ($x) {
-      return $x !== NULL;
-    });
+    $normalize = static function (string $value): string {
+      $decomposed = iconv("UTF-8", "ASCII//TRANSLIT", $value);
+      return strtolower(preg_replace('/\p{Mn}/u', '', $decomposed));
+    };
+    // Normalize and lowercase comparison values
+    $normalized_comp_values = array_map($normalize, $comp_values);
+
+    // Create intersection using same normalization
+    $intersect_map = array_map(function ($value) use ($normalize, $normalized_comp_values) {
+      $normalized = $normalize($value);
+      return in_array($normalized, $normalized_comp_values, TRUE) ? $value : NULL;
+    }, $orig_values);
+
+    return array_filter($intersect_map, static fn($x) => $x !== NULL);
   }
 
   /**
-- 
GitLab