Commit 10c6e1da authored by alexpott's avatar alexpott

Issue #1833356 by grendzy: CSS files encoded in UTF-8 with BOM break the...

Issue #1833356 by grendzy: CSS files encoded in UTF-8 with BOM break the design when enabling CSS aggregation
parent e0505159
......@@ -183,6 +183,38 @@ public static function check() {
return '';
}
/**
* Decodes UTF byte-order mark (BOM) into the encoding's name.
*
* @param string $data
* The data possibly containing a BOM. This can be the entire contents of
* a file, or just a fragment containing at least the first five bytes.
*
* @return string|bool
* The name of the encoding, or FALSE if no byte order mark was present.
*/
public static function encodingFromBOM($data) {
static $bomMap = array(
"\xEF\xBB\xBF" => 'UTF-8',
"\xFE\xFF" => 'UTF-16BE',
"\xFF\xFE" => 'UTF-16LE',
"\x00\x00\xFE\xFF" => 'UTF-32BE',
"\xFF\xFE\x00\x00" => 'UTF-32LE',
"\x2B\x2F\x76\x38" => 'UTF-7',
"\x2B\x2F\x76\x39" => 'UTF-7',
"\x2B\x2F\x76\x2B" => 'UTF-7',
"\x2B\x2F\x76\x2F" => 'UTF-7',
"\x2B\x2F\x76\x38\x2D" => 'UTF-7',
);
foreach ($bomMap as $bom => $encoding) {
if (strpos($data, $bom) === 0) {
return $encoding;
}
}
return FALSE;
}
/**
* Converts data to UTF-8.
*
......
......@@ -7,6 +7,7 @@
namespace Drupal\Core\Asset;
use Drupal\Core\Asset\AssetOptimizerInterface;
use Drupal\Component\Utility\Unicode;
/**
* Optimizes a CSS asset.
......@@ -125,6 +126,19 @@ public function loadFile($file, $optimize = NULL, $reset_basepath = TRUE) {
// but are merely there to disable certain module CSS files.
$content = '';
if ($contents = @file_get_contents($file)) {
// If a BOM is found, convert the file to UTF-8, then use substr() to
// remove the BOM from the result.
if ($encoding = (Unicode::encodingFromBOM($contents))) {
$contents = Unicode::substr(Unicode::convertToUtf8($contents, $encoding), 1);
}
// If no BOM, check for fallback encoding. Per CSS spec the regex is very strict.
elseif (preg_match('/^@charset "([^"]+)";/', $contents, $matches)) {
if ($matches[1] !== 'utf-8' && $matches[1] !== 'UTF-8') {
$contents = substr($contents, strlen($matches[0]));
$contents = Unicode::convertToUtf8($contents, $matches[1]);
}
}
// Return the processed stylesheet.
$content = $this->processCss($contents, $_optimize);
}
......
......@@ -7,6 +7,7 @@
namespace Drupal\Core\Asset;
use Drupal\Core\Asset\AssetOptimizerInterface;
use Drupal\Component\Utility\Unicode;
/**
* Optimizes a JavaScript asset.
......@@ -24,8 +25,19 @@ public function optimize(array $js_asset) {
throw new \Exception('Only file JavaScript assets with preprocessing enabled can be optimized.');
}
// If a BOM is found, convert the file to UTF-8, then use substr() to
// remove the BOM from the result.
$data = file_get_contents($js_asset['data']);
if ($encoding = (Unicode::encodingFromBOM($data))) {
$data = Unicode::substr(Unicode::convertToUtf8($data, $encoding), 1);
}
// If no BOM is found, check for the charset attribute.
elseif (isset($js_asset['attributes']['charset'])) {
$data = Unicode::convertToUtf8($data, $js_asset['attributes']['charset']);
}
// No-op optimizer: no optimizations are applied to JavaScript assets.
return file_get_contents($js_asset['data']);
return $data;
}
/**
......
......@@ -170,6 +170,62 @@ function providerTestOptimize() {
),
file_get_contents($path . 'charset.css.optimized.css'),
),
6 => array(
array(
'group' => -100,
'every_page' => TRUE,
'type' => 'file',
'weight' => 0.013,
'media' => 'all',
'preprocess' => TRUE,
'data' => $path . 'css_input_with_bom.css',
'browsers' => array('IE' => TRUE, '!IE' => TRUE),
'basename' => 'css_input_with_bom.css',
),
'.byte-order-mark-test{content:"☃";}'. "\n",
),
7 => array(
array(
'group' => -100,
'every_page' => TRUE,
'type' => 'file',
'weight' => 0.013,
'media' => 'all',
'preprocess' => TRUE,
'data' => $path . 'css_input_with_charset.css',
'browsers' => array('IE' => TRUE, '!IE' => TRUE),
'basename' => 'css_input_with_charset.css',
),
'.charset-test{content:"€";}' . "\n",
),
8 => array(
array(
'group' => -100,
'every_page' => TRUE,
'type' => 'file',
'weight' => 0.013,
'media' => 'all',
'preprocess' => TRUE,
'data' => $path . 'css_input_with_bom_and_charset.css',
'browsers' => array('IE' => TRUE, '!IE' => TRUE),
'basename' => 'css_input_with_bom_and_charset.css',
),
'.byte-order-mark-charset-test{content:"☃";}' . "\n",
),
9 => array(
array(
'group' => -100,
'every_page' => TRUE,
'type' => 'file',
'weight' => 0.013,
'media' => 'all',
'preprocess' => TRUE,
'data' => $path . 'css_input_with_utf16_bom.css',
'browsers' => array('IE' => TRUE, '!IE' => TRUE),
'basename' => 'css_input_with_utf16_bom.css',
),
'.utf16-byte-order-mark-test{content:"☃";}' . "\n",
),
);
}
......
......@@ -80,4 +80,52 @@ function testClean($js_asset, $expected) {
$this->assertEquals($expected, $this->optimizer->clean($js_asset));
}
/**
* Provides data for the JS asset optimize test.
*
* @see \Drupal\Core\Asset\JsOptimizer::optimize().
*
* @returns array
* An array of test data.
*/
function providerTestOptimize() {
$path = dirname(__FILE__) . '/js_test_files/';
return array(
0 => array(
array(
'type' => 'file',
'preprocess' => TRUE,
'data' => $path . 'utf8_bom.js',
),
file_get_contents($path . 'utf8_bom.js.optimized.js'),
),
1 => array(
array(
'type' => 'file',
'preprocess' => TRUE,
'data' => $path . 'utf16_bom.js',
),
file_get_contents($path . 'utf16_bom.js.optimized.js'),
),
2 => array(
array(
'type' => 'file',
'preprocess' => TRUE,
'data' => $path . 'latin_9.js',
'attributes' => array('charset' => 'ISO-8859-15'),
),
file_get_contents($path . 'latin_9.js.optimized.js'),
),
);
}
/**
* Tests cleaning of a JS asset group containing 'type' => 'file'.
*
* @dataProvider providerTestOptimize
*/
function testOptimize($js_asset, $expected) {
$this->assertEquals($expected, $this->optimizer->optimize($js_asset));
}
}
@charset "utf-8";
.byte-order-mark-charset-test {
content: "☃";
}
@charset "iso-8859-15";
.charset-test {
content: "¤";
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment