diff --git a/core/lib/Drupal/Component/Utility/Unicode.php b/core/lib/Drupal/Component/Utility/Unicode.php index 934b7450fade03bd8fb7d26706852a2ef1099a52..7691e00abedf23e3f9060cf9f31d61a43efff077 100644 --- a/core/lib/Drupal/Component/Utility/Unicode.php +++ b/core/lib/Drupal/Component/Utility/Unicode.php @@ -183,6 +183,38 @@ public static function check() { return ''; } + /** + * Decodes UTF byte-order mark (BOM) into the encoding's name. + * + * @param string $data + * The data possibly containing a BOM. This can be the entire contents of + * a file, or just a fragment containing at least the first five bytes. + * + * @return string|bool + * The name of the encoding, or FALSE if no byte order mark was present. + */ + public static function encodingFromBOM($data) { + static $bomMap = array( + "\xEF\xBB\xBF" => 'UTF-8', + "\xFE\xFF" => 'UTF-16BE', + "\xFF\xFE" => 'UTF-16LE', + "\x00\x00\xFE\xFF" => 'UTF-32BE', + "\xFF\xFE\x00\x00" => 'UTF-32LE', + "\x2B\x2F\x76\x38" => 'UTF-7', + "\x2B\x2F\x76\x39" => 'UTF-7', + "\x2B\x2F\x76\x2B" => 'UTF-7', + "\x2B\x2F\x76\x2F" => 'UTF-7', + "\x2B\x2F\x76\x38\x2D" => 'UTF-7', + ); + + foreach ($bomMap as $bom => $encoding) { + if (strpos($data, $bom) === 0) { + return $encoding; + } + } + return FALSE; + } + /** * Converts data to UTF-8. * diff --git a/core/lib/Drupal/Core/Asset/CssOptimizer.php b/core/lib/Drupal/Core/Asset/CssOptimizer.php index 673012b87f467aba54b1e9a6718bfa5ff3fb1b8c..4b968885ae9a23b1390ef92501492bb35552b844 100644 --- a/core/lib/Drupal/Core/Asset/CssOptimizer.php +++ b/core/lib/Drupal/Core/Asset/CssOptimizer.php @@ -7,6 +7,7 @@ namespace Drupal\Core\Asset; use Drupal\Core\Asset\AssetOptimizerInterface; +use Drupal\Component\Utility\Unicode; /** * Optimizes a CSS asset. @@ -125,6 +126,19 @@ public function loadFile($file, $optimize = NULL, $reset_basepath = TRUE) { // but are merely there to disable certain module CSS files. $content = ''; if ($contents = @file_get_contents($file)) { + // If a BOM is found, convert the file to UTF-8, then use substr() to + // remove the BOM from the result. + if ($encoding = (Unicode::encodingFromBOM($contents))) { + $contents = Unicode::substr(Unicode::convertToUtf8($contents, $encoding), 1); + } + // If no BOM, check for fallback encoding. Per CSS spec the regex is very strict. + elseif (preg_match('/^@charset "([^"]+)";/', $contents, $matches)) { + if ($matches[1] !== 'utf-8' && $matches[1] !== 'UTF-8') { + $contents = substr($contents, strlen($matches[0])); + $contents = Unicode::convertToUtf8($contents, $matches[1]); + } + } + // Return the processed stylesheet. $content = $this->processCss($contents, $_optimize); } diff --git a/core/lib/Drupal/Core/Asset/JsOptimizer.php b/core/lib/Drupal/Core/Asset/JsOptimizer.php index bd004a512a3d103f53cd055658757bb0a2394712..33aca695d8b9026bd5287ae3b26f2e661ea501f5 100644 --- a/core/lib/Drupal/Core/Asset/JsOptimizer.php +++ b/core/lib/Drupal/Core/Asset/JsOptimizer.php @@ -7,6 +7,7 @@ namespace Drupal\Core\Asset; use Drupal\Core\Asset\AssetOptimizerInterface; +use Drupal\Component\Utility\Unicode; /** * Optimizes a JavaScript asset. @@ -24,8 +25,19 @@ public function optimize(array $js_asset) { throw new \Exception('Only file JavaScript assets with preprocessing enabled can be optimized.'); } + // If a BOM is found, convert the file to UTF-8, then use substr() to + // remove the BOM from the result. + $data = file_get_contents($js_asset['data']); + if ($encoding = (Unicode::encodingFromBOM($data))) { + $data = Unicode::substr(Unicode::convertToUtf8($data, $encoding), 1); + } + // If no BOM is found, check for the charset attribute. + elseif (isset($js_asset['attributes']['charset'])) { + $data = Unicode::convertToUtf8($data, $js_asset['attributes']['charset']); + } + // No-op optimizer: no optimizations are applied to JavaScript assets. - return file_get_contents($js_asset['data']); + return $data; } /** diff --git a/core/tests/Drupal/Tests/Core/Asset/CssOptimizerUnitTest.php b/core/tests/Drupal/Tests/Core/Asset/CssOptimizerUnitTest.php index f4d083cddb0527dc96a12f20fa9f7d2ca5cab125..a805e857700773c79a2f31f150197f3c7c5647b1 100644 --- a/core/tests/Drupal/Tests/Core/Asset/CssOptimizerUnitTest.php +++ b/core/tests/Drupal/Tests/Core/Asset/CssOptimizerUnitTest.php @@ -170,6 +170,62 @@ function providerTestOptimize() { ), file_get_contents($path . 'charset.css.optimized.css'), ), + 6 => array( + array( + 'group' => -100, + 'every_page' => TRUE, + 'type' => 'file', + 'weight' => 0.013, + 'media' => 'all', + 'preprocess' => TRUE, + 'data' => $path . 'css_input_with_bom.css', + 'browsers' => array('IE' => TRUE, '!IE' => TRUE), + 'basename' => 'css_input_with_bom.css', + ), + '.byte-order-mark-test{content:"☃";}'. "\n", + ), + 7 => array( + array( + 'group' => -100, + 'every_page' => TRUE, + 'type' => 'file', + 'weight' => 0.013, + 'media' => 'all', + 'preprocess' => TRUE, + 'data' => $path . 'css_input_with_charset.css', + 'browsers' => array('IE' => TRUE, '!IE' => TRUE), + 'basename' => 'css_input_with_charset.css', + ), + '.charset-test{content:"€";}' . "\n", + ), + 8 => array( + array( + 'group' => -100, + 'every_page' => TRUE, + 'type' => 'file', + 'weight' => 0.013, + 'media' => 'all', + 'preprocess' => TRUE, + 'data' => $path . 'css_input_with_bom_and_charset.css', + 'browsers' => array('IE' => TRUE, '!IE' => TRUE), + 'basename' => 'css_input_with_bom_and_charset.css', + ), + '.byte-order-mark-charset-test{content:"☃";}' . "\n", + ), + 9 => array( + array( + 'group' => -100, + 'every_page' => TRUE, + 'type' => 'file', + 'weight' => 0.013, + 'media' => 'all', + 'preprocess' => TRUE, + 'data' => $path . 'css_input_with_utf16_bom.css', + 'browsers' => array('IE' => TRUE, '!IE' => TRUE), + 'basename' => 'css_input_with_utf16_bom.css', + ), + '.utf16-byte-order-mark-test{content:"☃";}' . "\n", + ), ); } diff --git a/core/tests/Drupal/Tests/Core/Asset/JsOptimizerUnitTest.php b/core/tests/Drupal/Tests/Core/Asset/JsOptimizerUnitTest.php index 13bd77cbbc68e9b0b7b9eb3570968e02329fc6d4..c7490b91b0fe3a685f31b46cf47cf46775ab18a6 100644 --- a/core/tests/Drupal/Tests/Core/Asset/JsOptimizerUnitTest.php +++ b/core/tests/Drupal/Tests/Core/Asset/JsOptimizerUnitTest.php @@ -80,4 +80,52 @@ function testClean($js_asset, $expected) { $this->assertEquals($expected, $this->optimizer->clean($js_asset)); } + /** + * Provides data for the JS asset optimize test. + * + * @see \Drupal\Core\Asset\JsOptimizer::optimize(). + * + * @returns array + * An array of test data. + */ + function providerTestOptimize() { + $path = dirname(__FILE__) . '/js_test_files/'; + return array( + 0 => array( + array( + 'type' => 'file', + 'preprocess' => TRUE, + 'data' => $path . 'utf8_bom.js', + ), + file_get_contents($path . 'utf8_bom.js.optimized.js'), + ), + 1 => array( + array( + 'type' => 'file', + 'preprocess' => TRUE, + 'data' => $path . 'utf16_bom.js', + ), + file_get_contents($path . 'utf16_bom.js.optimized.js'), + ), + 2 => array( + array( + 'type' => 'file', + 'preprocess' => TRUE, + 'data' => $path . 'latin_9.js', + 'attributes' => array('charset' => 'ISO-8859-15'), + ), + file_get_contents($path . 'latin_9.js.optimized.js'), + ), + ); + } + + /** + * Tests cleaning of a JS asset group containing 'type' => 'file'. + * + * @dataProvider providerTestOptimize + */ + function testOptimize($js_asset, $expected) { + $this->assertEquals($expected, $this->optimizer->optimize($js_asset)); + } + } diff --git a/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_bom.css b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_bom.css new file mode 100644 index 0000000000000000000000000000000000000000..bc821851e5c88e82ce3ce29cfa9ce1c870a76d85 --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_bom.css @@ -0,0 +1,3 @@ +.byte-order-mark-test { + content: "☃"; +} diff --git a/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_bom_and_charset.css b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_bom_and_charset.css new file mode 100644 index 0000000000000000000000000000000000000000..9ffae69d242ba53fc8bb36b424bec869a4714eed --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_bom_and_charset.css @@ -0,0 +1,4 @@ +@charset "utf-8"; +.byte-order-mark-charset-test { + content: "☃"; +} diff --git a/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_charset.css b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_charset.css new file mode 100644 index 0000000000000000000000000000000000000000..949f6ca569b20b1c05853a630335f8c7c59415be --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_charset.css @@ -0,0 +1,4 @@ +@charset "iso-8859-15"; +.charset-test { + content: "¤"; +} diff --git a/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_utf16_bom.css b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_utf16_bom.css new file mode 100644 index 0000000000000000000000000000000000000000..dcbd5c89237fdf3ba13ed9fb7b3f0624fdb1cb0b Binary files /dev/null and b/core/tests/Drupal/Tests/Core/Asset/css_test_files/css_input_with_utf16_bom.css differ diff --git a/core/tests/Drupal/Tests/Core/Asset/js_test_files/latin_9.js b/core/tests/Drupal/Tests/Core/Asset/js_test_files/latin_9.js new file mode 100644 index 0000000000000000000000000000000000000000..a488429fe5c9ada71e2b8dcaaac821fe488c0d44 --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/js_test_files/latin_9.js @@ -0,0 +1 @@ +var latin9Char = '¤'; diff --git a/core/tests/Drupal/Tests/Core/Asset/js_test_files/latin_9.js.optimized.js b/core/tests/Drupal/Tests/Core/Asset/js_test_files/latin_9.js.optimized.js new file mode 100644 index 0000000000000000000000000000000000000000..f1583822702b67b4f4ff231b148eae814db11475 --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/js_test_files/latin_9.js.optimized.js @@ -0,0 +1 @@ +var latin9Char = '€'; diff --git a/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf16_bom.js b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf16_bom.js new file mode 100644 index 0000000000000000000000000000000000000000..ad50d5f38e4ceec8b6a2d2448fbd50d861a5cf4d Binary files /dev/null and b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf16_bom.js differ diff --git a/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf16_bom.js.optimized.js b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf16_bom.js.optimized.js new file mode 100644 index 0000000000000000000000000000000000000000..6a0d6a625ea4c1642f082dbb26e48a91f78e09ad --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf16_bom.js.optimized.js @@ -0,0 +1 @@ +var utf8BOM = '☃'; diff --git a/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf8_bom.js b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf8_bom.js new file mode 100644 index 0000000000000000000000000000000000000000..f4eb31a796663d5c6ef30c6319de05635b0b5a4d --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf8_bom.js @@ -0,0 +1 @@ +var utf8BOM = '☃'; diff --git a/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf8_bom.js.optimized.js b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf8_bom.js.optimized.js new file mode 100644 index 0000000000000000000000000000000000000000..6a0d6a625ea4c1642f082dbb26e48a91f78e09ad --- /dev/null +++ b/core/tests/Drupal/Tests/Core/Asset/js_test_files/utf8_bom.js.optimized.js @@ -0,0 +1 @@ +var utf8BOM = '☃';