From a61ad6764e940ef63444528732fd77537ddcc58f Mon Sep 17 00:00:00 2001 From: Alex Pott <alex.a.pott@googlemail.com> Date: Wed, 25 Sep 2024 16:02:56 +0200 Subject: [PATCH] Issue #1630568 by sokru, Laureatus, quietone, smustgrave, ravi.shankar: Validate that uploaded .po files are UTF8 --- .../Constraint/FileEncodingConstraint.php | 34 ++++++ .../FileEncodingConstraintValidator.php | 53 +++++++++ .../FileEncodingConstraintValidatorTest.php | 106 ++++++++++++++++++ core/modules/locale/src/Form/ImportForm.php | 1 + .../Functional/LocaleImportFunctionalTest.php | 30 ++++- 5 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraint.php create mode 100644 core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraintValidator.php create mode 100644 core/modules/file/tests/src/Kernel/Plugin/Validation/Constraint/FileEncodingConstraintValidatorTest.php diff --git a/core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraint.php b/core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraint.php new file mode 100644 index 000000000000..5416fd760423 --- /dev/null +++ b/core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraint.php @@ -0,0 +1,34 @@ +<?php + +declare(strict_types=1); + +namespace Drupal\file\Plugin\Validation\Constraint; + +use Drupal\Core\StringTranslation\TranslatableMarkup; +use Drupal\Core\Validation\Attribute\Constraint; +use Symfony\Component\Validator\Constraint as SymfonyConstraint; + +/** + * Defines an encoding constraint for files. + */ +#[Constraint( + id: 'FileEncoding', + label: new TranslatableMarkup('File encoding', [], ['context' => 'Validation']) +)] +class FileEncodingConstraint extends SymfonyConstraint { + + /** + * The error message. + * + * @var string + */ + public string $message = "The file is encoded with %detected. It must be encoded with %encoding"; + + /** + * The allowed file encodings. + * + * @var array + */ + public array $encodings; + +} diff --git a/core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraintValidator.php b/core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraintValidator.php new file mode 100644 index 000000000000..e6b580c80d69 --- /dev/null +++ b/core/modules/file/src/Plugin/Validation/Constraint/FileEncodingConstraintValidator.php @@ -0,0 +1,53 @@ +<?php + +declare(strict_types=1); + +namespace Drupal\file\Plugin\Validation\Constraint; + +use Symfony\Component\Validator\Constraint; +use Symfony\Component\Validator\Exception\UnexpectedTypeException; + +/** + * Validates the file encoding constraint. + */ +class FileEncodingConstraintValidator extends BaseFileConstraintValidator { + + /** + * {@inheritdoc} + */ + public function validate(mixed $value, Constraint $constraint): void { + + /** @var \Drupal\file\Entity\FileInterface $file */ + $file = $this->assertValueIsFile($value); + if (!$constraint instanceof FileEncodingConstraint) { + throw new UnexpectedTypeException($constraint, FileEncodingConstraint::class); + } + + $encodings = $constraint->encodings; + $data = file_get_contents($file->getFileUri()); + foreach ($encodings as $encoding) { + $this->validateEncoding($data, $encoding, $constraint); + } + } + + /** + * Validates the encoding of the file. + * + * @param string $data + * The file data. + * @param string $encoding + * The encoding to validate. + * @param \Drupal\file\Plugin\Validation\Constraint\FileEncodingConstraint $constraint + * The constraint. + */ + protected function validateEncoding(string $data, string $encoding, FileEncodingConstraint $constraint): void { + if (mb_check_encoding($data, $encoding)) { + return; + } + $this->context->addViolation($constraint->message, [ + '%encoding' => $encoding, + '%detected' => mb_detect_encoding($data), + ]); + } + +} diff --git a/core/modules/file/tests/src/Kernel/Plugin/Validation/Constraint/FileEncodingConstraintValidatorTest.php b/core/modules/file/tests/src/Kernel/Plugin/Validation/Constraint/FileEncodingConstraintValidatorTest.php new file mode 100644 index 000000000000..1dd03fa84346 --- /dev/null +++ b/core/modules/file/tests/src/Kernel/Plugin/Validation/Constraint/FileEncodingConstraintValidatorTest.php @@ -0,0 +1,106 @@ +<?php + +declare(strict_types=1); + +namespace Drupal\Tests\file\Kernel\Plugin\Validation\Constraint; + +use Drupal\file\Entity\File; +use Drupal\Tests\file\Kernel\Validation\FileValidatorTestBase; + +// cspell:ignore räme + +/** + * Tests the FileEncodingConstraintValidator. + * + * @group file + * @coversDefaultClass \Drupal\file\Plugin\Validation\Constraint\FileEncodingConstraintValidator + */ +class FileEncodingConstraintValidatorTest extends FileValidatorTestBase { + + /** + * Tests the FileEncodingConstraintValidator. + * + * @param array $file_properties + * The properties of the file being validated. + * @param string[] $encodings + * An array of the allowed file encodings. + * @param string[] $expected_errors + * The expected error messages as string. + * + * @dataProvider providerTestFileValidateEncodings + * @covers ::validate + */ + public function testFileEncodings(array $file_properties, array $encodings, array $expected_errors): void { + $data = 'Räme'; + $data = mb_convert_encoding($data, $file_properties['encoding']); + file_put_contents($file_properties['uri'], $data); + $file = File::create($file_properties); + // Test for failure. + $validators = [ + 'FileEncoding' => [ + 'encodings' => $encodings, + ], + ]; + + $violations = $this->validator->validate($file, $validators); + $actual_errors = []; + foreach ($violations as $violation) { + $actual_errors[] = $violation->getMessage(); + } + $this->assertEquals($expected_errors, $actual_errors); + } + + /** + * Data provider for ::testFileEncoding. + * + * @return array[][] + * The test cases. + */ + public static function providerTestFileValidateEncodings(): array { + $utf8_encoded_txt_file_properties = [ + 'filename' => 'druplicon.txt', + 'uri' => 'public://druplicon.txt', + 'status' => 0, + 'encoding' => 'UTF-8', + ]; + $windows1252_encoded_txt_file = [ + 'filename' => 'druplicon-win.txt', + 'uri' => 'public://druplicon-win.txt', + 'status' => 1, + 'encoding' => 'windows-1252', + ]; + return [ + 'UTF-8 encoded file validated with "UTF-8" encoding' => [ + 'file_properties' => $utf8_encoded_txt_file_properties, + 'encodings' => ['UTF-8'], + 'expected_errors' => [], + ], + 'Windows-1252 encoded file validated with "UTF-8" encoding' => [ + 'file_properties' => $windows1252_encoded_txt_file, + 'encodings' => ['UTF-8'], + 'expected_errors' => [ + 'The file is encoded with ASCII. It must be encoded with UTF-8', + ], + ], + ]; + } + + /** + * Helper function that returns a .po file with invalid encoding. + */ + public function getInvalidEncodedPoFile() { + return <<< EOF +msgid "" +msgstr "" +"Project-Id-Version: Drupal 8\\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=Windows-1252\\n" +"Content-Transfer-Encoding: 8bit\\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\\n" + +msgid "Swamp" +msgstr "Räme" +EOF; + } + +} diff --git a/core/modules/locale/src/Form/ImportForm.php b/core/modules/locale/src/Form/ImportForm.php index 132adb194ed7..544332fde100 100644 --- a/core/modules/locale/src/Form/ImportForm.php +++ b/core/modules/locale/src/Form/ImportForm.php @@ -101,6 +101,7 @@ public function buildForm(array $form, FormStateInterface $form_state) { $validators = [ 'FileExtension' => ['extensions' => 'po'], 'FileSizeLimit' => ['fileLimit' => Environment::getUploadMaxSize()], + 'FileEncoding' => ['encodings' => ['UTF-8']], ]; $form['file'] = [ '#type' => 'file', diff --git a/core/modules/locale/tests/src/Functional/LocaleImportFunctionalTest.php b/core/modules/locale/tests/src/Functional/LocaleImportFunctionalTest.php index af93c0e99c25..db50cef06719 100644 --- a/core/modules/locale/tests/src/Functional/LocaleImportFunctionalTest.php +++ b/core/modules/locale/tests/src/Functional/LocaleImportFunctionalTest.php @@ -11,7 +11,7 @@ use Drupal\Tests\BrowserTestBase; // cspell:ignore chien chiens deutsch januari lundi montag moutons műveletek -// cspell:ignore svibanj +// cspell:ignore svibanj räme /** * Tests the import of locale files. @@ -250,6 +250,9 @@ public function testStandalonePoFile(): void { $this->submitForm($search, 'Filter'); $this->assertSession()->pageTextNotContains('No strings available.'); + // Try importing a .po file with invalid encoding. + $this->importPoFile($this->getInvalidEncodedPoFile(), [], ['Windows-1252']); + $this->assertSession()->pageTextContains('The file is encoded with ASCII. It must be encoded with UTF-8'); } /** @@ -402,10 +405,15 @@ public function testCreatedLanguageTranslation(): void { * Contents of the .po file to import. * @param array $options * (optional) Additional options to pass to the translation import form. + * @param array $encodings + * (optional) The encoding of the file. */ - public function importPoFile($contents, array $options = []) { + public function importPoFile($contents, array $options = [], array $encodings = []) { $file_system = \Drupal::service('file_system'); $name = $file_system->tempnam('temporary://', "po_") . '.po'; + foreach ($encodings as $encoding) { + $contents = mb_convert_encoding($contents, $encoding); + } file_put_contents($name, $contents); $options['files[file]'] = $name; $this->drupalGet('admin/config/regional/translate/import'); @@ -672,6 +680,24 @@ public function getPoFileWithConfigDe() { msgid "German" msgstr "Deutsch" +EOF; + } + + /** + * Helper function that returns a .po file with invalid encoding. + */ + public function getInvalidEncodedPoFile() { + return <<< EOF +msgid "" +msgstr "" +"Project-Id-Version: Drupal 8\\n" +"MIME-Version: 1.0\\n" +"Content-Type: text/plain; charset=Windows-1252\\n" +"Content-Transfer-Encoding: 8bit\\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\\n" + +msgid "Swamp" +msgstr "Räme" EOF; } -- GitLab