diff --git a/core/modules/file/config/install/file.settings.yml b/core/modules/file/config/install/file.settings.yml index 9191a0b8004e61f6174aa7f935f79d7007470530..a3809aaa9d2f99b52b9c4d8299057009e1f27449 100644 --- a/core/modules/file/config/install/file.settings.yml +++ b/core/modules/file/config/install/file.settings.yml @@ -4,3 +4,10 @@ description: icon: directory: 'core/modules/file/icons' make_unused_managed_files_temporary: false +filename_sanitization: + transliterate: false + replace_whitespace: false + replace_non_alphanumeric: false + deduplicate_separators: false + lowercase: false + replacement_character: '-' diff --git a/core/modules/file/config/schema/file.schema.yml b/core/modules/file/config/schema/file.schema.yml index f94fbec950f14c28d23bd71a84058f985ae759aa..aaa74af750c4fa9a7a4abc49d3b615d585fd8637 100644 --- a/core/modules/file/config/schema/file.schema.yml +++ b/core/modules/file/config/schema/file.schema.yml @@ -24,6 +24,28 @@ file.settings: make_unused_managed_files_temporary: type: boolean label: 'Controls if unused files should be marked temporary' + filename_sanitization: + type: mapping + label: 'Uploaded filename sanitization options' + mapping: + transliterate: + type: boolean + label: 'Transliterate' + replace_whitespace: + type: boolean + label: 'Replace whitespace' + replace_non_alphanumeric: + type: boolean + label: 'Replace non-alphanumeric characters except dot, underscore and dash' + deduplicate_separators: + type: boolean + label: 'Replace sequences of dots, underscores and/or dashes with the replacement character' + lowercase: + type: boolean + label: 'Convert to lowercase' + replacement_character: + type: string + label: 'Character to use in replacements' field.storage_settings.file: type: base_entity_reference_field_settings diff --git a/core/modules/file/file.module b/core/modules/file/file.module index 34b00de5c68027bdd7a342b17b839a6b0a8e79ed..840850806999ad811a051cf52c0e90f958c9fb53 100644 --- a/core/modules/file/file.module +++ b/core/modules/file/file.module @@ -1552,3 +1552,80 @@ function file_field_find_file_reference_column(FieldDefinitionInterface $field) } return FALSE; } + +/** + * Implements hook_form_FORM_ID_alter(). + * + * Injects the file sanitization options into /admin/config/media/file-system. + * + * These settings are enforced during upload by the FileEventSubscriber that + * listens to the FileUploadSanitizeNameEvent event. + * + * @see \Drupal\system\Form\FileSystemForm + * @see \Drupal\Core\File\Event\FileUploadSanitizeNameEvent + * @see \Drupal\file\EventSubscriber\FileEventSubscriber + */ +function file_form_system_file_system_settings_alter(array &$form, FormStateInterface $form_state) { + $config = \Drupal::config('file.settings'); + $form['filename_sanitization'] = [ + '#type' => 'details', + '#title' => t('Sanitize filenames'), + '#description' => t('These settings only apply to new files as they are uploaded. Changes here do not affect existing file names.'), + '#open' => TRUE, + '#tree' => TRUE, + ]; + + $form['filename_sanitization']['replacement_character'] = [ + '#type' => 'select', + '#title' => t('Replacement character'), + '#default_value' => $config->get('filename_sanitization.replacement_character'), + '#options' => [ + '-' => t('Dash (-)'), + '_' => t('Underscore (_)'), + ], + '#description' => t('Used when replacing whitespace, replacing non-alphanumeric characters or transliterating unknown characters.'), + ]; + + $form['filename_sanitization']['transliterate'] = [ + '#type' => 'checkbox', + '#title' => t('Transliterate'), + '#default_value' => $config->get('filename_sanitization.transliterate'), + '#description' => t('Transliteration replaces any characters that are not alphanumeric, underscores, periods or hyphens with the replacement character. It ensures filenames only contain ASCII characters. It is recommended to keep transliteration enabled.'), + ]; + + $form['filename_sanitization']['replace_whitespace'] = [ + '#type' => 'checkbox', + '#title' => t('Replace whitespace with the replacement character'), + '#default_value' => $config->get('filename_sanitization.replace_whitespace'), + ]; + + $form['filename_sanitization']['replace_non_alphanumeric'] = [ + '#type' => 'checkbox', + '#title' => t('Replace non-alphanumeric characters with the replacement character'), + '#default_value' => $config->get('filename_sanitization.replace_non_alphanumeric'), + '#description' => t('Alphanumeric characters, dots <span aria-hidden="true">(.)</span>, underscores <span aria-hidden="true">(_)</span> and dashes <span aria-hidden="true">(-)</span> are preserved.'), + ]; + + $form['filename_sanitization']['deduplicate_separators'] = [ + '#type' => 'checkbox', + '#title' => t('Replace sequences of dots, underscores and/or dashes with the replacement character'), + '#default_value' => $config->get('filename_sanitization.deduplicate_separators'), + ]; + + $form['filename_sanitization']['lowercase'] = [ + '#type' => 'checkbox', + '#title' => t('Convert to lowercase'), + '#default_value' => $config->get('filename_sanitization.lowercase'), + ]; + + $form['#submit'][] = 'file_system_settings_submit'; +} + +/** + * Form submission handler for file system settings form. + */ +function file_system_settings_submit(array &$form, FormStateInterface $form_state) { + $config = \Drupal::configFactory()->getEditable('file.settings') + ->set('filename_sanitization', $form_state->getValue('filename_sanitization')); + $config->save(); +} diff --git a/core/modules/file/file.post_update.php b/core/modules/file/file.post_update.php index 22bbdd512ed42bf6ecde0cb839fc40fb1624045e..73c815b1cc0233c03d950cf278ff024527b06d79 100644 --- a/core/modules/file/file.post_update.php +++ b/core/modules/file/file.post_update.php @@ -29,3 +29,17 @@ function file_post_update_add_permissions_to_roles(?array &$sandbox = NULL): voi return TRUE; }); } + +/** + * Add default filename sanitization configuration. + */ +function file_post_update_add_default_filename_sanitization_configuration() { + $config = \Drupal::configFactory()->getEditable('file.settings'); + $config->set('filename_sanitization.transliterate', FALSE); + $config->set('filename_sanitization.replace_whitespace', FALSE); + $config->set('filename_sanitization.replace_non_alphanumeric', FALSE); + $config->set('filename_sanitization.deduplicate_separators', FALSE); + $config->set('filename_sanitization.lowercase', FALSE); + $config->set('filename_sanitization.replacement_character', '-'); + $config->save(); +} diff --git a/core/modules/file/file.services.yml b/core/modules/file/file.services.yml index 64c612cfbbca3fae76b8b1a035e3a6e7a98e1073..370972c5ff0b754b8db32717c180b808258c6675 100644 --- a/core/modules/file/file.services.yml +++ b/core/modules/file/file.services.yml @@ -1,4 +1,9 @@ services: + file.event.subscriber: + class: Drupal\file\EventSubscriber\FileEventSubscriber + arguments: ['@config.factory', '@transliteration', '@language_manager'] + tags: + - { name: event_subscriber } file.usage: class: Drupal\file\FileUsage\DatabaseFileUsageBackend arguments: ['@config.factory', '@database', 'file_usage'] diff --git a/core/modules/file/src/EventSubscriber/FileEventSubscriber.php b/core/modules/file/src/EventSubscriber/FileEventSubscriber.php new file mode 100644 index 0000000000000000000000000000000000000000..17608c583fb48589fa5824c8c66da3fbe9fb9cb4 --- /dev/null +++ b/core/modules/file/src/EventSubscriber/FileEventSubscriber.php @@ -0,0 +1,110 @@ +<?php + +namespace Drupal\file\EventSubscriber; + +use Drupal\Component\Transliteration\TransliterationInterface; +use Drupal\Core\Config\ConfigFactoryInterface; +use Drupal\Core\Language\LanguageInterface; +use Drupal\Core\Language\LanguageManagerInterface; +use Drupal\Core\File\Event\FileUploadSanitizeNameEvent; +use Symfony\Component\EventDispatcher\EventSubscriberInterface; + +/** + * Sanitizes uploaded filenames. + * + * @package Drupal\file\EventSubscriber + */ +class FileEventSubscriber implements EventSubscriberInterface { + + /** + * Constructs a new file event listener. + * + * @param \Drupal\Core\Config\ConfigFactoryInterface $configFactory + * The config factory. + * @param \Drupal\Component\Transliteration\TransliterationInterface $transliteration + * The transliteration service. + * @param \Drupal\Core\Language\LanguageManagerInterface $languageManager + * The language manager. + */ + public function __construct( + protected ConfigFactoryInterface $configFactory, + protected TransliterationInterface $transliteration, + protected LanguageManagerInterface $languageManager, + ) {} + + /** + * {@inheritdoc} + */ + public static function getSubscribedEvents(): array { + return [ + FileUploadSanitizeNameEvent::class => 'sanitizeFilename', + ]; + } + + /** + * Sanitizes the filename of a file being uploaded. + * + * @param \Drupal\Core\File\Event\FileUploadSanitizeNameEvent $event + * File upload sanitize name event. + * + * @see file_form_system_file_system_settings_alter() + */ + public function sanitizeFilename(FileUploadSanitizeNameEvent $event) { + $fileSettings = $this->configFactory->get('file.settings'); + $transliterate = $fileSettings->get('filename_sanitization.transliterate'); + + $filename = $event->getFilename(); + $extension = pathinfo($filename, PATHINFO_EXTENSION); + if ($extension !== '') { + $extension = '.' . $extension; + $filename = pathinfo($filename, PATHINFO_FILENAME); + } + + // Sanitize the filename according to configuration. + $alphanumeric = $fileSettings->get('filename_sanitization.replace_non_alphanumeric'); + $replacement = $fileSettings->get('filename_sanitization.replacement_character'); + if ($transliterate) { + $transliterated_filename = $this->transliteration->transliterate( + $filename, + $this->languageManager->getCurrentLanguage(LanguageInterface::TYPE_CONTENT)->getId(), + $replacement + ); + if (mb_strlen($transliterated_filename) > 0) { + $filename = $transliterated_filename; + } + else { + // If transliteration has resulted in a zero length string enable the + // 'replace_non_alphanumeric' option and ignore the result of + // transliteration. + $alphanumeric = TRUE; + } + } + if ($fileSettings->get('filename_sanitization.replace_whitespace')) { + $filename = preg_replace('/\s/u', $replacement, trim($filename)); + } + // Only honor replace_non_alphanumeric if transliterate is enabled. + if ($transliterate && $alphanumeric) { + $filename = preg_replace('/[^0-9A-Za-z_.-]/u', $replacement, $filename); + } + if ($fileSettings->get('filename_sanitization.deduplicate_separators')) { + $filename = preg_replace('/(_)_+|(\.)\.+|(-)-+/u', $replacement, $filename); + // Replace multiple separators with single one. + $filename = preg_replace('/(_|\.|\-)[(_|\.|\-)]+/u', $replacement, $filename); + $filename = preg_replace('/' . preg_quote($replacement) . '[' . preg_quote($replacement) . ']*/u', $replacement, $filename); + // Remove replacement character from the end of the filename. + $filename = rtrim($filename, $replacement); + + // If there is an extension remove dots from the end of the filename to + // prevent duplicate dots. + if (!empty($extension)) { + $filename = rtrim($filename, '.'); + } + } + if ($fileSettings->get('filename_sanitization.lowercase')) { + // Force lowercase to prevent issues on case-insensitive file systems. + $filename = mb_strtolower($filename); + } + $event->setFilename($filename . $extension); + } + +} diff --git a/core/modules/file/tests/src/Functional/SaveUploadTest.php b/core/modules/file/tests/src/Functional/SaveUploadTest.php index d7ffe5a252e7ed062f25c0e46e1a71a738b076b5..863ea5ebec3683e98df0e8b888ae2d9d3268d217 100644 --- a/core/modules/file/tests/src/Functional/SaveUploadTest.php +++ b/core/modules/file/tests/src/Functional/SaveUploadTest.php @@ -8,6 +8,8 @@ use Drupal\file\Entity\File; use Drupal\Tests\TestFileCreationTrait; +// cSpell:ignore TÉXT Pácê + /** * Tests the file_save_upload() function. * @@ -59,13 +61,20 @@ class SaveUploadTest extends FileManagedTestBase { */ protected $imageExtension; + /** + * The user used by the test. + * + * @var \Drupal\user\Entity\User + */ + protected $account; + /** * {@inheritdoc} */ protected function setUp(): void { parent::setUp(); - $account = $this->drupalCreateUser(['access site reports']); - $this->drupalLogin($account); + $this->account = $this->drupalCreateUser(['access site reports']); + $this->drupalLogin($this->account); $image_files = $this->drupalGetTestFiles('image'); $this->image = File::create((array) current($image_files)); @@ -756,4 +765,135 @@ public function testRequired() { $this->assertSession()->responseContains('You WIN!'); } + /** + * Tests filename sanitization. + */ + public function testSanitization() { + $file = $this->generateFile('TÉXT-œ', 64, 5, 'text'); + + $this->drupalGet('file-test/upload'); + // Upload a file with a name with uppercase and unicode characters. + $edit = [ + 'files[file_test_upload]' => \Drupal::service('file_system')->realpath($file), + 'extensions' => 'txt', + 'is_image_file' => FALSE, + ]; + $this->submitForm($edit, 'Submit'); + $this->assertSession()->statusCodeEquals(200); + // Test that the file name has not been sanitized. + $this->assertSession()->responseContains('File name is TÉXT-œ.txt.'); + + // Enable sanitization via the UI. + $admin = $this->createUser(['administer site configuration']); + $this->drupalLogin($admin); + + // For now, just transliterate, with no other transformations. + $options = [ + 'filename_sanitization[transliterate]' => TRUE, + 'filename_sanitization[replace_whitespace]' => FALSE, + 'filename_sanitization[replace_non_alphanumeric]' => FALSE, + 'filename_sanitization[deduplicate_separators]' => FALSE, + 'filename_sanitization[lowercase]' => FALSE, + 'filename_sanitization[replacement_character]' => '-', + ]; + $this->drupalGet('admin/config/media/file-system'); + $this->submitForm($options, 'Save configuration'); + + $this->drupalLogin($this->account); + + // Upload a file with a name with uppercase and unicode characters. + $this->drupalGet('file-test/upload'); + $this->submitForm($edit, 'Submit'); + $this->assertSession()->statusCodeEquals(200); + // Test that the file name has been transliterated. + $this->assertSession()->responseContains('File name is TEXT-oe.txt.'); + // Make sure we got a message about the rename. + $message = 'Your upload has been renamed to <em class="placeholder">TEXT-oe.txt</em>'; + $this->assertSession()->responseContains($message); + + // Generate another file with a name with All The Things(tm) we care about. + $file = $this->generateFile('S Pácê--táb# #--🙈', 64, 5, 'text'); + $edit = [ + 'files[file_test_upload]' => \Drupal::service('file_system')->realpath($file), + 'extensions' => 'txt', + 'is_image_file' => FALSE, + ]; + $this->submitForm($edit, 'Submit'); + $this->assertSession()->statusCodeEquals(200); + // Test that the file name has only been transliterated. + $this->assertSession()->responseContains('File name is S Pace--tab# #---.txt.'); + + // Leave transliteration on and enable whitespace replacement. + $this->drupalLogin($admin); + $options['filename_sanitization[replace_whitespace]'] = TRUE; + $this->drupalGet('admin/config/media/file-system'); + $this->submitForm($options, 'Save configuration'); + $this->drupalLogin($this->account); + + // Try again with the monster filename. + $this->drupalGet('file-test/upload'); + $this->submitForm($edit, 'Submit'); + $this->assertSession()->statusCodeEquals(200); + // Test that the file name has been transliterated and whitespace replaced. + $this->assertSession()->responseContains('File name is S--Pace--tab#-#---.txt.'); + + // Leave transliteration and whitespace replacement on, replace non-alpha. + $this->drupalLogin($admin); + $options['filename_sanitization[replace_non_alphanumeric]'] = TRUE; + $options['filename_sanitization[replacement_character]'] = '_'; + $this->drupalGet('admin/config/media/file-system'); + $this->submitForm($options, 'Save configuration'); + $this->drupalLogin($this->account); + + // Try again with the monster filename. + $this->drupalGet('file-test/upload'); + $this->submitForm($edit, 'Submit'); + $this->assertSession()->statusCodeEquals(200); + + // Test that the file name has been transliterated, whitespace replaced with + // '_', and non-alphanumeric characters replaced with '_'. + $this->assertSession()->responseContains('File name is S__Pace--tab___--_.txt.'); + + // Now turn on the setting to remove duplicate separators. + $this->drupalLogin($admin); + $options['filename_sanitization[deduplicate_separators]'] = TRUE; + $options['filename_sanitization[replacement_character]'] = '-'; + $this->drupalGet('admin/config/media/file-system'); + $this->submitForm($options, 'Save configuration'); + $this->drupalLogin($this->account); + + // Try again with the monster filename. + $this->drupalGet('file-test/upload'); + $this->submitForm($edit, 'Submit'); + $this->assertSession()->statusCodeEquals(200); + + // Test that the file name has been transliterated, whitespace replaced, + // non-alphanumeric characters replaced, and duplicate separators removed. + $this->assertSession()->responseContains('File name is S-Pace-tab.txt.'); + + // Finally, check the lowercase setting. + $this->drupalLogin($admin); + $options['filename_sanitization[lowercase]'] = TRUE; + $this->drupalGet('admin/config/media/file-system'); + $this->submitForm($options, 'Save configuration'); + $this->drupalLogin($this->account); + + // Generate another file since we're going to start getting collisions with + // previously uploaded and renamed copies. + $file = $this->generateFile('S Pácê--táb# #--🙈-2', 64, 5, 'text'); + $edit = [ + 'files[file_test_upload]' => \Drupal::service('file_system')->realpath($file), + 'extensions' => 'txt', + 'is_image_file' => FALSE, + ]; + $this->drupalGet('file-test/upload'); + $this->submitForm($edit, 'Submit'); + $this->assertSession()->statusCodeEquals(200); + // Make sure all the sanitization options work as intended. + $this->assertSession()->responseContains('File name is s-pace-tab-2.txt.'); + // Make sure we got a message about the rename. + $message = 'Your upload has been renamed to <em class="placeholder">s-pace-tab-2.txt</em>'; + $this->assertSession()->responseContains($message); + } + } diff --git a/core/modules/file/tests/src/Unit/SanitizeNameTest.php b/core/modules/file/tests/src/Unit/SanitizeNameTest.php new file mode 100644 index 0000000000000000000000000000000000000000..dfe3b4675c2162511bc617694726660555662bc9 --- /dev/null +++ b/core/modules/file/tests/src/Unit/SanitizeNameTest.php @@ -0,0 +1,247 @@ +<?php + +namespace Drupal\Tests\file\Unit; + +use Drupal\Component\Transliteration\PhpTransliteration; +use Drupal\Core\File\Event\FileUploadSanitizeNameEvent; +use Drupal\Core\Language\Language; +use Drupal\Core\Language\LanguageInterface; +use Drupal\Core\Language\LanguageManagerInterface; +use Drupal\file\EventSubscriber\FileEventSubscriber; +use Drupal\Tests\UnitTestCase; + +// cSpell:ignore TÉXT äöüåøhello aouaohello aeoeueaohello Pácê + +/** + * Filename sanitization tests. + * + * @group file + */ +class SanitizeNameTest extends UnitTestCase { + + /** + * Test file name sanitization. + * + * @param string $original + * The original filename. + * @param string $expected + * The expected filename. + * @param array $options + * Array of filename sanitization options, in this order: + * 0: boolean Transliterate. + * 1: string Replace whitespace. + * 2: string Replace non-alphanumeric characters. + * 3: boolean De-duplicate separators. + * 4: boolean Convert to lowercase. + * @param string $language_id + * Optional language code for transliteration. Defaults to 'en'. + * + * @dataProvider provideFilenames + * + * @covers \Drupal\file\EventSubscriber\FileEventSubscriber::sanitizeFilename + * @covers \Drupal\Core\File\Event\FileUploadSanitizeNameEvent::__construct + */ + public function testFileNameTransliteration($original, $expected, array $options, $language_id = 'en') { + $sanitization_options = [ + 'transliterate' => $options[0], + 'replacement_character' => $options[1], + 'replace_whitespace' => $options[2], + 'replace_non_alphanumeric' => $options[3], + 'deduplicate_separators' => $options[4], + 'lowercase' => $options[5], + ]; + $config_factory = $this->getConfigFactoryStub([ + 'file.settings' => [ + 'filename_sanitization' => $sanitization_options, + ], + ]); + + $language = new Language(['id' => $language_id]); + $language_manager = $this->prophesize(LanguageManagerInterface::class); + $language_manager->getCurrentLanguage(LanguageInterface::TYPE_CONTENT)->willReturn($language); + + $event = new FileUploadSanitizeNameEvent($original, $language_id); + $subscriber = new FileEventSubscriber($config_factory, new PhpTransliteration(), $language_manager->reveal()); + $subscriber->sanitizeFilename($event); + + // Check the results of the configured sanitization. + $this->assertEquals($expected, $event->getFilename()); + } + + /** + * Provides data for testFileNameTransliteration(). + * + * @return array + * Arrays with original name, expected name, and sanitization options. + */ + public function provideFilenames() { + return [ + 'Test default options' => [ + 'TÉXT-œ.txt', + 'TÉXT-œ.txt', + [FALSE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Test raw file without extension' => [ + 'TÉXT-œ', + 'TÉXT-œ', + [FALSE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Test only transliteration: simple' => [ + 'Á-TÉXT-œ.txt', + 'A-TEXT-oe.txt', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Test only transliteration: raw file without extension' => [ + 'Á-TÉXT-œ', + 'A-TEXT-oe', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Test only transliteration: complex and replace (-)' => [ + 'S Pácê--táb# #--🙈.jpg', + 'S Pace--tab# #---.jpg', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Test only transliteration: complex and replace (_)' => [ + 'S Pácê--táb# #--🙈.jpg', + 'S Pace--tab# #--_.jpg', + [TRUE, '_', FALSE, FALSE, FALSE, FALSE], + ], + 'Test transliteration, replace (-) and replace whitespace (trim front)' => [ + ' S Pácê--táb# #--🙈.png', + 'S--Pace--tab#-#---.png', + [TRUE, '-', TRUE, FALSE, FALSE, FALSE], + ], + 'Test transliteration, replace (-) and replace whitespace (trim both sides)' => [ + ' S Pácê--táb# #--🙈 .jpg', + 'S--Pace--tab#-#---.jpg', + [TRUE, '-', TRUE, FALSE, FALSE, FALSE], + ], + 'Test transliteration, replace (_) and replace whitespace (trim both sides)' => [ + ' S Pácê--táb# #--🙈 .jpg', + 'S__Pace--tab#_#--_.jpg', + [TRUE, '_', TRUE, FALSE, FALSE, FALSE], + ], + 'Test transliteration, replace (_), replace whitespace and replace non-alphanumeric' => [ + ' S Pácê--táb# #--🙈.txt', + 'S__Pace--tab___--_.txt', + [TRUE, '_', TRUE, TRUE, FALSE, FALSE], + ], + 'Test transliteration, replace (-), replace whitespace and replace non-alphanumeric' => [ + ' S Pácê--táb# #--🙈.txt', + 'S--Pace--tab------.txt', + [TRUE, '-', TRUE, TRUE, FALSE, FALSE], + ], + 'Test transliteration, replace (-), replace whitespace, replace non-alphanumeric and removing duplicate separators' => [ + 'S Pácê--táb# #--🙈.txt', + 'S-Pace-tab.txt', + [TRUE, '-', TRUE, TRUE, TRUE, FALSE], + ], + 'Test transliteration, replace (-), replace whitespace and deduplicate separators' => [ + ' S Pácê--táb# #--🙈.txt', + 'S-Pace-tab#-#.txt', + [TRUE, '-', TRUE, FALSE, TRUE, FALSE], + ], + 'Test transliteration, replace (_), replace whitespace, replace non-alphanumeric and deduplicate separators' => [ + 'S Pácê--táb# #--🙈.txt', + 'S_Pace_tab.txt', + [TRUE, '_', TRUE, TRUE, TRUE, FALSE], + ], + 'Test transliteration, replace (-), replace whitespace, replace non-alphanumeric, deduplicate separators and lowercase conversion' => [ + 'S Pácê--táb# #--🙈.jpg', + 's-pace-tab.jpg', + [TRUE, '-', TRUE, TRUE, TRUE, TRUE], + ], + 'Test transliteration, replace (_), replace whitespace, replace non-alphanumeric, deduplicate separators and lowercase conversion' => [ + 'S Pácê--táb# #--🙈.txt', + 's_pace_tab.txt', + [TRUE, '_', TRUE, TRUE, TRUE, TRUE], + ], + 'Ignore non-alphanumeric replacement if transliteration is not set, but still replace whitespace, deduplicate separators, and lowercase' => [ + ' 2S Pácê--táb# #--🙈.txt', + '2s-pácê-táb#-#-🙈.txt', + [FALSE, '-', TRUE, TRUE, TRUE, TRUE], + ], + 'Only lowercase, simple' => [ + 'TEXT.txt', + 'text.txt', + [TRUE, '-', FALSE, FALSE, FALSE, TRUE], + ], + 'Only lowercase, with unicode' => [ + 'TÉXT.txt', + 'text.txt', + [TRUE, '-', FALSE, FALSE, FALSE, TRUE], + ], + 'No transformations' => [ + 'Ä Ö Ü Å Ø äöüåøhello.txt', + 'Ä Ö Ü Å Ø äöüåøhello.txt', + [FALSE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Transliterate via en (not de), no other transformations' => [ + 'Ä Ö Ü Å Ø äöüåøhello.txt', + 'A O U A O aouaohello.txt', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Transliterate via de (not en), no other transformations' => [ + 'Ä Ö Ü Å Ø äöüåøhello.txt', + 'Ae Oe Ue A O aeoeueaohello.txt', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], 'de', + ], + 'Transliterate via de not en, plus whitespace + lowercase' => [ + 'Ä Ö Ü Å Ø äöüåøhello.txt', + 'ae-oe-ue-a-o-aeoeueaohello.txt', + [TRUE, '-', TRUE, FALSE, FALSE, TRUE], 'de', + ], + 'Remove duplicate separators with falsey extension' => [ + 'foo.....0', + 'foo.0', + [TRUE, '-', FALSE, FALSE, TRUE, FALSE], + ], + 'Remove duplicate separators with extension and ending in dot' => [ + 'foo.....txt', + 'foo.txt', + [TRUE, '-', FALSE, FALSE, TRUE, FALSE], + ], + 'Remove duplicate separators without extension and ending in dot' => [ + 'foo.....', + 'foo', + [TRUE, '-', FALSE, FALSE, TRUE, FALSE], + ], + 'All unknown unicode' => [ + '🙈🙈🙈.txt', + '---.txt', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], + ], + '✓ unicode' => [ + '✓.txt', + '-.txt', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Multiple ✓ unicode' => [ + '✓✓✓.txt', + '---.txt', + [TRUE, '-', FALSE, FALSE, FALSE, FALSE], + ], + 'Test transliteration, replace (-), replace whitespace and removing multiple duplicate separators #1' => [ + 'Test_-_file.png', + 'test-file.png', + [TRUE, '-', TRUE, TRUE, TRUE, TRUE], + ], + 'Test transliteration, replace (-), replace whitespace and removing multiple duplicate separators #2' => [ + 'Test .. File.png', + 'test-file.png', + [TRUE, '-', TRUE, TRUE, TRUE, TRUE], + ], + 'Test transliteration, replace (-), replace whitespace and removing multiple duplicate separators #3' => [ + 'Test -..__-- file.png', + 'test-file.png', + [TRUE, '-', TRUE, TRUE, TRUE, TRUE], + ], + 'Test transliteration, replace (-), replace sequences of dots, underscores and/or dashes with the replacement character' => [ + 'abc. --_._-- .abc.jpeg', + 'abc. - .abc.jpeg', + [TRUE, '-', FALSE, FALSE, TRUE, FALSE], + ], + ]; + } + +}