Skip to content
Snippets Groups Projects
Commit 1fef4ecc authored by Jonathan Smith's avatar Jonathan Smith Committed by Fran Garcia-Linares
Browse files

Issue #3439240 by jonathan1055, fjgarlin, grimreaper: Cspell: sanitize...

Issue #3439240 by jonathan1055, fjgarlin, grimreaper: Cspell: sanitize suggested words for dictionary
parent 4df6d2cd
No related branches found
No related tags found
1 merge request!303#3439240 Lower-case deduplicated words file
Pipeline #379512 passed
{ {
"description": "This default cspell configuration for contrib projects is based on core/.cspell.json. See https://project.pages.drupalcode.org/gitlab_templates/jobs/cspell/ for more details.", "description": "This default cspell configuration for contrib projects is based on core/.cspell.json. Some of the arrays are expanded in scripts/prepare-cspell.php. See https://project.pages.drupalcode.org/gitlab_templates/jobs/cspell/ for more details.",
"language": "en-US", "language": "en-US",
"allowCompoundWords": false, "allowCompoundWords": false,
"globRoot": ".", "globRoot": ".",
......
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
".git", ".git",
"vendor", "vendor",
"node_modules", "node_modules",
"assets" "assets/.cspell.json",
".cspell.json"
], ],
"dictionaryDefinitions": [ "dictionaryDefinitions": [
{ {
......
...@@ -19,7 +19,11 @@ variables: ...@@ -19,7 +19,11 @@ variables:
The words should be comma-separated but each word _does not_ need to be quoted individually. The list is not case-sensitive. The words should be comma-separated but each word _does not_ need to be quoted individually. The list is not case-sensitive.
### Custom project dictionary ### Custom project dictionary
If there are many words in your project that are invented or that are not included in the default dictionaries you can add a `.cspell-project-words.txt` file to your project. Each word should be on a separate line, and blank lines and comments starting with `#` are ignored. [CSpell's Words List Syntax](https://cspell.org/docs/dictionaries-custom/#words-list-syntax) has more details. If there are many words in your project that are invented or that are not included in the default dictionaries you can add a custom dictionary text file to your project. Each word should be on a separate line, and blank lines and comments starting with `#` are ignored. [CSpell's Words List Syntax](https://cspell.org/docs/dictionaries-custom/#words-list-syntax) has more details. The default name of the project dictionary file is `.cspell-project-words.txt` but you can have a custom name, by defining a `_CSPELL_DICTIONARY` variable:
```
variables:
_CSPELL_DICTIONARY: 'my-project-dictionary.txt'
```
### Ignore words specific to one file ### Ignore words specific to one file
If a file contains some reported words that are only used in that file, instead of adding them to the project dictionary they can be listed at the top of the file. This is done by adding a special style of comment that CSpell will interpret. The format for a list of words is `cspell:ignore mycustomthing madeupword` If a file contains some reported words that are only used in that file, instead of adding them to the project dictionary they can be listed at the top of the file. This is done by adding a special style of comment that CSpell will interpret. The format for a list of words is `cspell:ignore mycustomthing madeupword`
......
...@@ -880,6 +880,7 @@ cspell: ...@@ -880,6 +880,7 @@ cspell:
name: artifacts-$CI_PIPELINE_ID-$CI_JOB_NAME_SLUG name: artifacts-$CI_PIPELINE_ID-$CI_JOB_NAME_SLUG
paths: paths:
- _cspell_unrecognized_words.txt - _cspell_unrecognized_words.txt
- _cspell_updated_project_words.txt
- _cspell_json.txt - _cspell_json.txt
script: script:
- echo "Executing curl -OL https://git.drupalcode.org/$_CURL_TEMPLATES_REPO/-/raw/$_CURL_TEMPLATES_REF/scripts/prepare-cspell.php" - echo "Executing curl -OL https://git.drupalcode.org/$_CURL_TEMPLATES_REPO/-/raw/$_CURL_TEMPLATES_REF/scripts/prepare-cspell.php"
...@@ -901,15 +902,21 @@ cspell: ...@@ -901,15 +902,21 @@ cspell:
- echo "Executing $CI_PROJECT_DIR/$_WEB_ROOT/core/node_modules/.bin/cspell -c .cspell.json --show-suggestions --show-context --no-progress $_CSPELL_EXTRA $CSPELL_SEARCH" - echo "Executing $CI_PROJECT_DIR/$_WEB_ROOT/core/node_modules/.bin/cspell -c .cspell.json --show-suggestions --show-context --no-progress $_CSPELL_EXTRA $CSPELL_SEARCH"
- $CI_PROJECT_DIR/$_WEB_ROOT/core/node_modules/.bin/cspell -c .cspell.json --show-suggestions --show-context --no-progress $_CSPELL_EXTRA $CSPELL_SEARCH || EXIT_CODE=$? - $CI_PROJECT_DIR/$_WEB_ROOT/core/node_modules/.bin/cspell -c .cspell.json --show-suggestions --show-context --no-progress $_CSPELL_EXTRA $CSPELL_SEARCH || EXIT_CODE=$?
- WORDS_FILE=_cspell_unrecognized_words.txt - WORDS_FILE=_cspell_unrecognized_words.txt
- touch $WORDS_FILE - UPDATED_PROJECT_DICTIONARY=_cspell_updated_project_words.txt
- touch $_CSPELL_DICTIONARY $WORDS_FILE $UPDATED_PROJECT_DICTIONARY
- | - |
if [ "$EXIT_CODE" != "" ]; then if [ "$EXIT_CODE" != "" ]; then
# There are some unrecognized words so create an artifact file containing the unique list. # There are some unrecognized words so create an artifact file containing the unique list.
$CI_PROJECT_DIR/$_WEB_ROOT/core/node_modules/.bin/cspell -c .cspell.json --words-only --unique --no-progress $_CSPELL_EXTRA $CSPELL_SEARCH | sort --ignore-case >> $WORDS_FILE || true $CI_PROJECT_DIR/$_WEB_ROOT/core/node_modules/.bin/cspell -c .cspell.json --words-only --unique --no-progress $_CSPELL_EXTRA $CSPELL_SEARCH | sort --ignore-case >> $WORDS_FILE || true
echo "The number of unrecognised/misspelled words is $(wc -l < $WORDS_FILE)" # Convert all words to lower-case and de-duplicate the list.
echo "An artifact file has been created containing a list of these unrecognized words, for you to browse or download." tr '[:upper:]' '[:lower:]' < $WORDS_FILE | LC_ALL=C sort -u -o $WORDS_FILE
echo "The number of distinct unrecognised/misspelled words is $(wc -l < $WORDS_FILE)"
echo "------------" && cat $WORDS_FILE && echo "------------"
echo "An artifact $WORDS_FILE has been created containing these unrecognized words, for you to browse or download."
# Read the project dictionary and the new unrecognized words and create a new complete project dictionary artifact file.
cat $_CSPELL_DICTIONARY $WORDS_FILE | tr '[:upper:]' '[:lower:]' | LC_ALL=C sort -u -o $UPDATED_PROJECT_DICTIONARY
echo "An artifact $UPDATED_PROJECT_DICTIONARY has been created containing the complete list of words in your project dictionary (if you have one) plus any newly reported words."
echo "For hints on getting this CSpell job to pass see https://project.pages.drupalcode.org/gitlab_templates/jobs/cspell/" echo "For hints on getting this CSpell job to pass see https://project.pages.drupalcode.org/gitlab_templates/jobs/cspell/"
echo "=== This is $WORDS_FILE ===" && cat $WORDS_FILE
fi fi
- cp .cspell.json _cspell_json.txt - cp .cspell.json _cspell_json.txt
- echo "Exiting with EXIT_CODE=$EXIT_CODE" - echo "Exiting with EXIT_CODE=$EXIT_CODE"
......
...@@ -116,6 +116,10 @@ variables: ...@@ -116,6 +116,10 @@ variables:
value: '' value: ''
description: 'A comma-separated list of words to add to the CSpell dictionary. For example `mycustomthing, madeupword`. Quotes are not required.' description: 'A comma-separated list of words to add to the CSpell dictionary. For example `mycustomthing, madeupword`. Quotes are not required.'
_CSPELL_DICTIONARY:
value: '.cspell-project-words.txt'
description: 'The name of the project dictionary of custom words. The default is `.cspell-project-words.txt` but a project can have a custom name if required.'
_CSPELL_FLAGWORDS: _CSPELL_FLAGWORDS:
value: '' value: ''
description: 'A comma-separated list of real words in other directories that should not be used. For example Drupal Core has `please` as a Flag Word. Quotes are not required.' description: 'A comma-separated list of real words in other directories that should not be used. For example Drupal Core has `please` as a Flag Word. Quotes are not required.'
......
...@@ -5,11 +5,21 @@ ...@@ -5,11 +5,21 @@
* @file * @file
* Prepares a .cspell.json file customized for the gitlab templates environment. * Prepares a .cspell.json file customized for the gitlab templates environment.
* *
* Param 1 = test_suffix (optional) - Additional suffix to append to the input * Arguments:
* filename, before writing out. This is used when running the script locally *
* during development, to avoid overwriting the input .cspell.json file. * -s --suffix Optional suffix to append to the input filename before
* writing out. Useful when running locally during development
* to avoid overwriting the input .cspell.json file.
*
* -v --verbose Show verbose debug output.
*/ */
// Get the arguments.
$options = getopt('s:v', ['suffix', 'verbose']);
$quiet = !array_key_exists('v', $options) && !array_key_exists('verbose', $options);
$suffix = $options['s'] ?? $options['suffix'] ?? '';
$quiet ?: print '$suffix=' . $suffix . PHP_EOL;
// Get the contents of .cspell.json into an array. This file will be either the // Get the contents of .cspell.json into an array. This file will be either the
// projects own .cspell.json or the default copied from /assets. // projects own .cspell.json or the default copied from /assets.
$cspell_filename = '.cspell.json'; $cspell_filename = '.cspell.json';
...@@ -17,14 +27,10 @@ $cspell_json = json_decode(file_get_contents($cspell_filename), TRUE); ...@@ -17,14 +27,10 @@ $cspell_json = json_decode(file_get_contents($cspell_filename), TRUE);
if (empty($cspell_json)) { if (empty($cspell_json)) {
throw new RuntimeException("Unable to read $cspell_filename"); throw new RuntimeException("Unable to read $cspell_filename");
} }
$quiet ?: print 'At start cspell_json=' . print_r($cspell_json, TRUE) . PHP_EOL;
// Allow for easy testing by avoiding overwriting the input file.
$test_suffix = $argv[1] ?? '';
$cspell_filename .= $test_suffix;
$webRoot = getenv('_WEB_ROOT') ?: 'web';
// Some directories in the project root are not part of the project. // Some directories in the project root are not part of the project.
$webRoot = getenv('_WEB_ROOT') ?: 'web';
$non_project_directories = ["$webRoot", 'vendor', 'node_modules', '.git']; $non_project_directories = ["$webRoot", 'vendor', 'node_modules', '.git'];
// Specify the files that are always ignored. // Specify the files that are always ignored.
...@@ -60,10 +66,12 @@ $filenames_to_find = [ ...@@ -60,10 +66,12 @@ $filenames_to_find = [
// //
// Get the words from $_CSPELL_WORDS. // Get the words from $_CSPELL_WORDS.
if ($cspell_words = getenv('_CSPELL_WORDS')) { if ($cspell_words = getenv('_CSPELL_WORDS')) {
$quiet ?: print 'Initial $cspell_words=' . $cspell_words . PHP_EOL;
// Remove all double quotes and spaces. // Remove all double quotes and spaces.
$cspell_words = str_replace(['"', ' '], ['', ''], $cspell_words); $cspell_words = str_replace(['"', ' '], ['', ''], $cspell_words);
// Remove single quotes from start and end of words, but not from the middle. // Remove single quotes from start and end of words, but not from the middle.
$words = str_replace([",'", "',"], [',', ','], ',' . $cspell_words . ','); $words = str_replace([",'", "',"], [',', ','], ',' . $cspell_words . ',');
$quiet ?: print '$words=' . $words . PHP_EOL;
} }
// The module's machine name might not be a real word, so add this. The value of // The module's machine name might not be a real word, so add this. The value of
...@@ -94,10 +102,11 @@ foreach (new RecursiveIteratorIterator(new RecursiveDirectoryIterator('.', Recur ...@@ -94,10 +102,11 @@ foreach (new RecursiveIteratorIterator(new RecursiveDirectoryIterator('.', Recur
$ignore_standard_files[] = $file->getPathname(); $ignore_standard_files[] = $file->getPathname();
} }
} }
$quiet ?: print '$module_name_parts=' . print_r($module_name_parts, TRUE) . PHP_EOL;
// Merge into the existing json 'words' value, but cater for that being empty. // Merge into the existing json 'words' value, but cater for that being empty.
// array_values() is needed after array_unique() to restore the keys to numeric. // array_values() is needed after array_unique() to restore the keys to numeric.
$cspell_json['words'] = array_values(array_unique(array_merge( $cspell_json['words'] = array_values(array_filter(array_unique(array_merge(
$cspell_json['words'] ?? [], $cspell_json['words'] ?? [],
array_filter(explode(',', $words ?? '')), array_filter(explode(',', $words ?? '')),
$module_name_parts, $module_name_parts,
...@@ -107,7 +116,8 @@ $cspell_json['words'] = array_values(array_unique(array_merge( ...@@ -107,7 +116,8 @@ $cspell_json['words'] = array_values(array_unique(array_merge(
// Add some common words that were dropped from core dictionary in Drupal 11.1 // Add some common words that were dropped from core dictionary in Drupal 11.1
// See https://www.drupal.org/project/gitlab_templates/issues/3494834 // See https://www.drupal.org/project/gitlab_templates/issues/3494834
['endapply', 'nightwatchjs', 'testgroups'], ['endapply', 'nightwatchjs', 'testgroups'],
))); ))));
$quiet ?: print '$cspell_json[\'words\']=' . print_r($cspell_json['words'], TRUE) . PHP_EOL;
// ---------- // ----------
// Flag Words // Flag Words
...@@ -115,12 +125,14 @@ $cspell_json['words'] = array_values(array_unique(array_merge( ...@@ -115,12 +125,14 @@ $cspell_json['words'] = array_values(array_unique(array_merge(
// //
// Get any flagged words from $_CSPELL_FLAGWORDS. // Get any flagged words from $_CSPELL_FLAGWORDS.
if ($cspell_flagwords = getenv('_CSPELL_FLAGWORDS')) { if ($cspell_flagwords = getenv('_CSPELL_FLAGWORDS')) {
$quiet ?: print 'Input $cspell_flagwords=' . $cspell_flagwords . PHP_EOL;
// Remove any quotes and spaces. Double quotes are added in json_encode. // Remove any quotes and spaces. Double quotes are added in json_encode.
$cspell_flagwords = str_replace(["'", '"', ' '], ['', '', ''], $cspell_flagwords); $cspell_flagwords = str_replace(["'", '"', ' '], ['', '', ''], $cspell_flagwords);
$cspell_json['flagWords'] = array_values(array_unique(array_merge( $cspell_json['flagWords'] = array_values(array_unique(array_merge(
$cspell_json['flagWords'] ?? [], $cspell_json['flagWords'] ?? [],
array_filter(explode(',', $cspell_flagwords)), array_filter(explode(',', $cspell_flagwords)),
))); )));
$quiet ?: print '$cspell_json[\'flagWords\']=' . print_r($cspell_json['flagWords'], TRUE) . PHP_EOL;
} }
// ------------ // ------------
...@@ -163,12 +175,17 @@ $dictionary_definitions = [ ...@@ -163,12 +175,17 @@ $dictionary_definitions = [
'name' => 'dictionary', 'name' => 'dictionary',
'path' => $webRoot . '/core/misc/cspell/dictionary.txt', 'path' => $webRoot . '/core/misc/cspell/dictionary.txt',
], ],
[ ];
if ($project_dictionary = getenv('_CSPELL_DICTIONARY')) {
$quiet ?: print '$project_dictionary=' . $project_dictionary . PHP_EOL;
$dictionary_definitions[] = [
'name' => 'project-words', 'name' => 'project-words',
'path' => './.cspell-project-words.txt', 'path' => './' . $project_dictionary,
'description' => "The project's own custom dictionary (optional)", 'description' => "The project's own custom dictionary (optional)",
], ];
]; }
$quiet ?: print 'Initial $dictionary_definitions=' . print_r($dictionary_definitions, TRUE) . PHP_EOL;
$dictionary_names = []; $dictionary_names = [];
foreach ($dictionary_definitions as $key => $data) { foreach ($dictionary_definitions as $key => $data) {
// Add the 'name' if the file exists. Remove the array entry if it does not. // Add the 'name' if the file exists. Remove the array entry if it does not.
...@@ -179,6 +196,8 @@ foreach ($dictionary_definitions as $key => $data) { ...@@ -179,6 +196,8 @@ foreach ($dictionary_definitions as $key => $data) {
unset($dictionary_definitions[$key]); unset($dictionary_definitions[$key]);
} }
} }
$quiet ?: print 'After checking files, $dictionary_definitions=' . print_r($dictionary_definitions, TRUE) . PHP_EOL;
// These dictionaries are provided by CSpell. // These dictionaries are provided by CSpell.
$built_in_dictionaries = [ $built_in_dictionaries = [
'companies', 'companies',
...@@ -208,10 +227,14 @@ foreach ($cspell_json['dictionaryDefinitions'] ?? [] as $key => $dic) { ...@@ -208,10 +227,14 @@ foreach ($cspell_json['dictionaryDefinitions'] ?? [] as $key => $dic) {
} }
} }
$cspell_json['dictionaryDefinitions'] = merge_deep($dictionary_definitions, $cspell_json['dictionaryDefinitions'] ?? []); $cspell_json['dictionaryDefinitions'] = merge_deep($dictionary_definitions, $cspell_json['dictionaryDefinitions'] ?? []);
$quiet ?: print '$cspell_json[\'dictionaryDefinitions\']=' . print_r($cspell_json['dictionaryDefinitions'], TRUE) . PHP_EOL;
// --------------------------- // ---------------------------
// Write out the modified file // Write out the modified file
// --------------------------- // ---------------------------
// Allow for easy testing by avoiding overwriting the input file.
$cspell_filename .= $suffix;
$quiet ?: print 'At end $cspell_json=' . print_r($cspell_json, TRUE) . PHP_EOL;
print "Writing json array to {$cspell_filename}" . PHP_EOL; print "Writing json array to {$cspell_filename}" . PHP_EOL;
file_put_contents($cspell_filename, json_encode($cspell_json, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES)); file_put_contents($cspell_filename, json_encode($cspell_json, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment