Commit 526401c4 authored by Dries's avatar Dries
Browse files

- Patch #147310 by c960657 et al: better cache headers for reverse proxies.

parent 2bc19555
......@@ -48,8 +48,10 @@ DirectoryIndex index.php
# Cache all files for 2 weeks after access (A).
ExpiresDefault A1209600
# Do not cache dynamically generated pages.
ExpiresByType text/html A1
<Files index.php>
# Caching headers for dynamically generated pages are set from PHP.
ExpiresActive Off
</Files>
</IfModule>
# Various rewrite rules.
......
......@@ -42,6 +42,8 @@ Drupal 7.0, xxxx-xx-xx (development version)
- Performance:
* Improved performance on uncached page views by loading multiple core
objects in a single database query.
* Improved support for HTTP proxies (including reverse proxies), allowing
anonymous pageviews to be served entirely from the proxy.
- Documentation:
* Hook API documentation now included in Drupal core.
- News aggregator:
......
......@@ -743,34 +743,212 @@ function drupal_load($type, $name) {
return FALSE;
}
/**
* Set an HTTP response header for the current page.
*
* Note: When sending a Content-Type header, always include a 'charset' type,
* too. This is necessary to avoid security bugs (e.g. UTF-7 XSS).
*
* @param $name
* The HTTP header name, or a status code followed by a reason phrase, e.g.
* "404 Not Found".
* @param $value
* The HTTP header value; if omitted, the specified header is unset.
* @param $append
* Whether to append the value to an existing header or to replace it.
*/
function drupal_set_header($name = NULL, $value = NULL, $append = FALSE) {
// The headers as name/value pairs.
$headers = &drupal_static(__FUNCTION__, array());
if (!isset($name)) {
return $headers;
}
// Save status codes using the special key ":status".
if (preg_match('/^\d{3} /', $name)) {
$value = $name;
$name = ':status';
}
else {
_drupal_set_preferred_header_name($name);
$name = strtolower($name);
}
if (!isset($value)) {
$headers[$name] = FALSE;
}
elseif (isset($headers[$name]) && $append) {
// Multiple headers with identical names may be combined using comma (RFC
// 2616, section 4.2).
$headers[$name] .= ',' . $value;
}
else {
$headers[$name] = $value;
}
drupal_send_headers(array($name => $headers[$name]), TRUE);
}
/**
* Get the HTTP response headers for the current page.
*
* @param $name
* An HTTP header name. If omitted, all headers are returned as name/value
* pairs. If an array value is FALSE, the header has been unset.
* @return
* A string containing the header value, or FALSE if the header has been set,
* or NULL if the header has not been set.
*/
function drupal_get_header($name = NULL) {
$headers = drupal_set_header();
if (isset($name)) {
$name = strtolower($name);
return isset($headers[$name]) ? $headers[$name] : NULL;
}
else {
return $headers;
}
}
/**
* Header names are case-insensitive, but for maximum compatibility they should
* follow "common form" (see RFC 2617, section 4.2).
*/
function _drupal_set_preferred_header_name($name = NULL) {
static $header_names = array();
if (!isset($name)) {
return $header_names;
}
$header_names[strtolower($name)] = $name;
}
/**
* Send the HTTP response headers previously set using drupal_set_header().
* Add default headers, unless they have been replaced or unset using
* drupal_set_header().
*
* @param $default_headers
* An array of headers as name/value pairs.
* @param $single
* If TRUE and headers have already be sent, send only the specified header.
*/
function drupal_send_headers($default_headers = array(), $only_default = FALSE) {
$headers_sent = &drupal_static(__FUNCTION__, FALSE);
$headers = drupal_get_header();
if ($only_default && $headers_sent) {
$headers = array();
}
$headers_sent = TRUE;
$header_names = _drupal_set_preferred_header_name();
foreach ($default_headers as $name => $value) {
$name_lower = strtolower($name);
if (!isset($headers[$name_lower])) {
$headers[$name_lower] = $value;
$header_names[$name_lower] = $name;
}
}
foreach ($headers as $name_lower => $value) {
if ($name_lower == ':status') {
header($_SERVER['SERVER_PROTOCOL'] . ' ' . $value);
}
// Skip headers that have been unset.
elseif ($value) {
header($header_names[$name_lower] . ': ' . $value);
}
}
}
/**
* Set HTTP headers in preparation for a page response.
*
* Authenticated users are always given a 'no-cache' header, and will
* fetch a fresh page on every request. This prevents authenticated
* users seeing locally cached pages that show them as logged out.
* Authenticated users are always given a 'no-cache' header, and will fetch a
* fresh page on every request. This prevents authenticated users from seeing
* locally cached pages.
*
* Also give each page a unique ETag. This will force clients to include both
* an If-Modified-Since header and an If-None-Match header when doing
* conditional requests for the page (required by RFC 2616, section 13.3.4),
* making the validation more robust. This is a workaround for a bug in Mozilla
* Firefox that is triggered when Drupal's caching is enabled and the user
* accesses Drupal via an HTTP proxy (see
* https://bugzilla.mozilla.org/show_bug.cgi?id=269303): When an authenticated
* user requests a page, and then logs out and requests the same page again,
* Firefox may send a conditional request based on the page that was cached
* locally when the user was logged in. If this page did not have an ETag
* header, the request only contains an If-Modified-Since header. The date will
* be recent, because with authenticated users the Last-Modified header always
* refers to the time of the request. If the user accesses Drupal via a proxy
* server, and the proxy already has a cached copy of the anonymous page with an
* older Last-Modified date, the proxy may respond with 304 Not Modified, making
* the client think that the anonymous and authenticated pageviews are
* identical.
*
* @see page_set_cache()
*/
function drupal_page_header() {
header("Expires: Sun, 19 Nov 1978 05:00:00 GMT");
header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT");
header("Cache-Control: store, no-cache, must-revalidate");
header("Cache-Control: post-check=0, pre-check=0", FALSE);
$headers_sent = &drupal_static(__FUNCTION__, FALSE);
if ($headers_sent) {
return TRUE;
}
$headers_sent = TRUE;
$default_headers = array(
'Expires' => 'Sun, 19 Nov 1978 05:00:00 GMT',
'Last-Modified' => gmdate(DATE_RFC1123, REQUEST_TIME),
'Cache-Control' => 'no-cache, must-revalidate, post-check=0, pre-check=0',
'ETag' => '"' . REQUEST_TIME . '"',
);
drupal_send_headers($default_headers);
}
/**
* Set HTTP headers in preparation for a cached page response.
*
* The general approach here is that anonymous users can keep a local
* cache of the page, but must revalidate it on every request. Then,
* they are given a '304 Not Modified' response as long as they stay
* logged out and the page has not been modified.
*
*/
function drupal_page_cache_header($cache) {
// Create entity tag based on cache update time.
$etag = '"' . md5($cache->created) . '"';
* The headers allow as much as possible in proxies and browsers without any
* particular knowledge about the pages. Modules can override these headers
* using drupal_set_header().
*
* If the request is conditional (using If-Modified-Since and If-None-Match),
* and the conditions match those currently in the cache, a 304 Not Modified
* response is sent.
*/
function drupal_page_cache_header(stdClass $cache) {
// Negotiate whether to use compression.
$page_compression = variable_get('page_compression', TRUE) && extension_loaded('zlib');
$return_compressed = $page_compression && isset($_SERVER['HTTP_ACCEPT_ENCODING']) && strpos($_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip') !== FALSE;
// Get headers set in hook_boot(). Keys are lower-case.
$hook_boot_headers = drupal_get_header();
// Headers generated in this function, that may be replaced or unset using
// drupal_set_headers(). Keys are mixed-case.
$default_headers = array();
foreach ($cache->headers as $name => $value) {
// In the case of a 304 response, certain headers must be sent, and the
// remaining may not (see RFC 2616, section 10.3.5). Do not override
// headers set in hook_boot().
$name_lower = strtolower($name);
if (in_array($name_lower, array('content-location', 'expires', 'cache-control', 'vary')) && !isset($hook_boot_headers[$name_lower])) {
drupal_set_header($name, $value);
unset($cache->headers[$name]);
}
}
// If a cache is served from a HTTP proxy without hitting the web server,
// the boot and exit hooks cannot be fired, so only allow caching in
// proxies with aggressive caching. If the client send a session cookie, do
// not bother caching the page in a public proxy, because the cached copy
// will only be served to that particular user due to Vary: Cookie, unless
// the Vary header has been replaced or unset in hook_boot() (see below).
$max_age = variable_get('cache') == CACHE_AGGRESSIVE && (!isset($_COOKIE[session_name()]) || isset($hook_boot_headers['vary'])) ? variable_get('cache_lifetime', 0) : 0;
$default_headers['Cache-Control'] = 'public, max-age=' . $max_age;
// Entity tag should change if the output changes.
$etag = '"' . $cache->created . '-' . intval($return_compressed) . '"';
header('Etag: ' . $etag);
// See if the client has provided the required HTTP headers.
$if_modified_since = isset($_SERVER['HTTP_IF_MODIFIED_SINCE']) ? strtotime($_SERVER['HTTP_IF_MODIFIED_SINCE']) : FALSE;
......@@ -780,36 +958,46 @@ function drupal_page_cache_header($cache) {
&& $if_none_match == $etag // etag must match
&& $if_modified_since == $cache->created) { // if-modified-since must match
header($_SERVER['SERVER_PROTOCOL'] . ' 304 Not Modified');
// All 304 responses must send an etag if the 200 response for the same object contained an etag
header("Etag: $etag");
drupal_send_headers($default_headers);
return;
}
// Send appropriate response:
header("Last-Modified: " . gmdate(DATE_RFC1123, $cache->created));
header("ETag: $etag");
// Send the remaining headers.
foreach ($cache->headers as $name => $value) {
drupal_set_header($name, $value);
}
// The following headers force validation of cache:
header("Expires: Sun, 19 Nov 1978 05:00:00 GMT");
header("Cache-Control: must-revalidate");
$default_headers['Last-Modified'] = gmdate(DATE_RFC1123, $cache->created);
if (variable_get('page_compression', TRUE)) {
// Determine if the browser accepts gzipped data.
if (@strpos($_SERVER['HTTP_ACCEPT_ENCODING'], 'gzip') === FALSE && function_exists('gzencode')) {
// Strip the gzip header and run uncompress.
$cache->data = gzinflate(substr(substr($cache->data, 10), 0, -8));
}
elseif (function_exists('gzencode')) {
header('Content-Encoding: gzip');
}
// HTTP/1.0 proxies does not support the Vary header, so prevent any caching
// by sending an Expires date in the past. HTTP/1.1 clients ignores the
// Expires header if a Cache-Control: max-age= directive is specified (see RFC
// 2616, section 14.9.3).
$default_headers['Expires'] = 'Sun, 19 Nov 1978 05:00:00 GMT';
drupal_send_headers($default_headers);
// Allow HTTP proxies to cache pages for anonymous users without a session
// cookie. The Vary header is used to indicates the set of request-header
// fields that fully determines whether a cache is permitted to use the
// response to reply to a subsequent request for a given URL without
// revalidation. If a Vary header has been set in hook_boot(), it is assumed
// that the module knows how to cache the page.
if (!isset($hook_boot_headers['vary']) && !variable_get('omit_vary_cookie')) {
header('Vary: Cookie');
}
// Send the original request's headers. We send them one after
// another so PHP's header() function can deal with duplicate
// headers.
$headers = explode("\n", $cache->headers);
foreach ($headers as $header) {
header($header);
if ($page_compression) {
header('Vary: Accept-Encoding', FALSE);
// If page_compression is enabled, the cache contains gzipped data.
if ($return_compressed) {
header('Content-Encoding: gzip');
}
else {
// The client does not support compression, so unzip the data in the
// cache. Strip the gzip header and run uncompress.
$cache->data = gzinflate(substr(substr($cache->data, 10), 0, -8));
}
}
print $cache->data;
......@@ -1206,6 +1394,7 @@ function _drupal_bootstrap($phase) {
if (drupal_session_is_started() && empty($_SESSION)) {
session_destroy();
}
header('X-Drupal-Cache: HIT');
drupal_page_cache_header($cache);
// If the skipping of the bootstrap hooks is not enforced, call hook_exit.
if ($cache_mode != CACHE_AGGRESSIVE) {
......@@ -1214,15 +1403,20 @@ function _drupal_bootstrap($phase) {
// We are done.
exit;
}
// Prepare for non-cached page workflow.
drupal_page_header();
// If the session has not already been started and output buffering is
// not enabled, the session must be started now before the HTTP headers
// are sent. If output buffering is enabled, the session may be started
// not enabled, the HTTP headers must be sent now, including the session
// cookie. If output buffering is enabled, the session may be started
// at any time using drupal_session_start().
if ($cache === FALSE) {
drupal_page_header();
drupal_session_start();
}
else {
header('X-Drupal-Cache: MISS');
}
break;
case DRUPAL_BOOTSTRAP_LANGUAGE:
......
......@@ -30,33 +30,30 @@ function cache_get($cid, $table = 'cache') {
}
$cache = db_query("SELECT data, created, headers, expire, serialized FROM {" . $table . "} WHERE cid = :cid", array(':cid' => $cid))->fetchObject();
if (isset($cache->data)) {
// If the data is permanent or we're not enforcing a minimum cache lifetime
// always return the cached data.
if ($cache->expire == CACHE_PERMANENT || !variable_get('cache_lifetime', 0)) {
if ($cache->serialized) {
$cache->data = unserialize($cache->data);
}
}
// If enforcing a minimum cache lifetime, validate that the data is
// currently valid for this user before we return it by making sure the
// cache entry was created before the timestamp in the current session's
// cache timer. The cache variable is loaded into the $user object by
// _sess_read() in session.inc.
else {
if ($user->cache > $cache->created) {
// This cache data is too old and thus not valid for us, ignore it.
return FALSE;
}
else {
if ($cache->serialized) {
$cache->data = unserialize($cache->data);
}
}
}
return $cache;
if (!isset($cache->data)) {
return FALSE;
}
// If enforcing a minimum cache lifetime, validate that the data is
// currently valid for this user before we return it by making sure the cache
// entry was created before the timestamp in the current session's cache
// timer. The cache variable is loaded into the $user object by _sess_read()
// in session.inc. If the data is permanent or we're not enforcing a minimum
// cache lifetime always return the cached data.
if ($cache->expire != CACHE_PERMANENT && variable_get('cache_lifetime', 0) && $user->cache > $cache->created) {
// This cache data is too old and thus not valid for us, ignore it.
return FALSE;
}
return FALSE;
if ($cache->serialized) {
$cache->data = unserialize($cache->data);
}
if (isset($cache->headers)) {
$cache->headers = unserialize($cache->headers);
}
return $cache;
}
/**
......@@ -104,12 +101,12 @@ function cache_get($cid, $table = 'cache') {
* @param $headers
* A string containing HTTP header information for cached pages.
*/
function cache_set($cid, $data, $table = 'cache', $expire = CACHE_PERMANENT, $headers = NULL) {
function cache_set($cid, $data, $table = 'cache', $expire = CACHE_PERMANENT, array $headers = NULL) {
$fields = array(
'serialized' => 0,
'created' => REQUEST_TIME,
'expire' => $expire,
'headers' => $headers,
'headers' => isset($headers) ? serialize($headers) : NULL,
);
if (!is_string($data)) {
$fields['data'] = serialize($data);
......
......@@ -154,32 +154,6 @@ function drupal_clear_path_cache() {
drupal_lookup_path('wipe');
}
/**
* Set an HTTP response header for the current page.
*
* Note: When sending a Content-Type header, always include a 'charset' type,
* too. This is necessary to avoid security bugs (e.g. UTF-7 XSS).
*/
function drupal_set_header($header = NULL) {
// We use an array to guarantee there are no leading or trailing delimiters.
// Otherwise, header('') could get called when serving the page later, which
// ends HTTP headers prematurely on some PHP versions.
static $stored_headers = array();
if (strlen($header)) {
header($header);
$stored_headers[] = $header;
}
return implode("\n", $stored_headers);
}
/**
* Get the HTTP response headers for the current page.
*/
function drupal_get_headers() {
return drupal_set_header();
}
/**
* Add a feed URL for the current page.
*
......@@ -357,7 +331,7 @@ function drupal_goto($path = '', $query = NULL, $fragment = NULL, $http_response
*/
function drupal_site_offline() {
drupal_maintenance_theme();
drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' 503 Service unavailable');
drupal_set_header('503 Service unavailable');
drupal_set_title(t('Site offline'));
print theme('maintenance_page', filter_xss_admin(variable_get('site_offline_message',
t('@site is currently under maintenance. We should be back shortly. Thank you for your patience.', array('@site' => variable_get('site_name', 'Drupal'))))));
......@@ -367,7 +341,7 @@ function drupal_site_offline() {
* Generates a 404 error if the request can not be handled.
*/
function drupal_not_found() {
drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' 404 Not Found');
drupal_set_header('404 Not Found');
watchdog('page not found', check_plain($_GET['q']), NULL, WATCHDOG_WARNING);
......@@ -401,7 +375,7 @@ function drupal_not_found() {
* Generates a 403 error if the request is not allowed.
*/
function drupal_access_denied() {
drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' 403 Forbidden');
drupal_set_header('403 Forbidden');
watchdog('access denied', check_plain($_GET['q']), NULL, WATCHDOG_WARNING);
// Keep old path for reference.
......@@ -818,7 +792,7 @@ function _drupal_log_error($error, $fatal = FALSE) {
}
if ($fatal) {
drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' Service unavailable');
drupal_set_header('503 Service unavailable');
drupal_set_title(t('Error'));
if (!defined('MAINTENANCE_MODE') && drupal_get_bootstrap_phase() == DRUPAL_BOOTSTRAP_FULL) {
// To conserve CPU and bandwidth, omit the blocks.
......@@ -2847,7 +2821,7 @@ function drupal_to_js($var) {
*/
function drupal_json($var = NULL) {
// We are returning JavaScript, so tell the browser.
drupal_set_header('Content-Type: text/javascript; charset=utf-8');
drupal_set_header('Content-Type', 'text/javascript; charset=utf-8');
if (isset($var)) {
echo drupal_to_js($var);
......@@ -3015,7 +2989,7 @@ function _drupal_bootstrap_full() {
set_exception_handler('_drupal_exception_handler');
// Emit the correct charset HTTP header.
drupal_set_header('Content-Type: text/html; charset=utf-8');
drupal_set_header('Content-Type', 'text/html; charset=utf-8');
// Detect string handling method
unicode_check();
// Undo magic quotes
......@@ -3047,24 +3021,35 @@ function page_set_cache() {
global $user, $base_root;
if (page_get_cache(FALSE)) {
$cache = TRUE;
$data = ob_get_contents();
$cache_page = TRUE;
$cache = (object) array(
'cid' => $base_root . request_uri(),
'data' => ob_get_clean(),
'expire' => CACHE_TEMPORARY,
'created' => REQUEST_TIME,
'headers' => drupal_get_header(),
);
if (variable_get('page_compression', TRUE) && function_exists('gzencode')) {
// We do not store the data in case the zlib mode is deflate. This should
// be rarely happening.
if (zlib_get_coding_type() == 'deflate') {
$cache = FALSE;
$cache_page = FALSE;
}
elseif (zlib_get_coding_type() == FALSE) {
$data = gzencode($data, 9, FORCE_GZIP);
$cache->data = gzencode($cache->data, 9, FORCE_GZIP);
}
// The remaining case is 'gzip' which means the data is already
// compressed and nothing left to do but to store it.
}
ob_end_flush();
if ($cache && $data) {
cache_set($base_root . request_uri(), $data, 'cache_page', CACHE_TEMPORARY, drupal_get_headers());
if ($cache_page && $cache->data) {
cache_set($cache->cid, $cache->data, 'cache_page', $cache->expire, $cache->headers);
}
drupal_page_cache_header($cache);
}
else {
// If output buffering was enabled during bootstrap, and the headers were
// not sent in the DRUPAL_BOOTSTRAP_LATE_PAGE_CACHE phase, send them now.
drupal_page_header();
}
}
......
......@@ -1311,13 +1311,10 @@ function file_transfer($source, $headers) {
ob_end_clean();
}
foreach ($headers as $header) {
// To prevent HTTP header injection, we delete new lines that are
// not followed by a space or a tab.
// See http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
$header = preg_replace('/\r?\n(?!\t| )/', '', $header);
drupal_set_header($header);
foreach ($headers as $name => $value) {
drupal_set_header($name, $value);
}
drupal_send_headers();
$source = file_create_path($source);
......
......@@ -108,7 +108,7 @@ function theme_task_list($items, $active = NULL) {
* The page content to show.
*/
function theme_install_page($content) {
drupal_set_header('Content-Type: text/html; charset=utf-8');
drupal_set_header('Content-Type', 'text/html; charset=utf-8');
// Assign content.
$variables['content'] = $content;
......@@ -162,7 +162,7 @@ function theme_install_page($content) {
*/
function theme_update_page($content, $show_messages = TRUE) {
// Set required headers.
drupal_set_header('Content-Type: text/html; charset=utf-8');
drupal_set_header('Content-Type', 'text/html; charset=utf-8');
// Assign content and show message flag.
$variables['content'] = $content;
......
......@@ -371,7 +371,7 @@ function aggregator_page_rss() {
* @ingroup themeable
*/
function theme_aggregator_page_rss($feeds, $category = NULL) {
drupal_set_header('Content-Type: application/rss+xml; charset=utf-8');
drupal_set_header('Content-Type', 'application/rss+xml; charset=utf-8');
$items = '';
$feed_length = variable_get('feed_item_length', 'teaser');
......@@ -431,7 +431,7 @@ function aggregator_page_opml($cid = NULL) {
* @ingroup themeable
*/
function theme_aggregator_page_opml($feeds) {
drupal_set_header('Content-Type: text/xml; charset=utf-8');
drupal_set_header('Content-Type', 'text/xml; charset=utf-8');
$output = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
$output .= "<opml version=\"1.1\">\n";
......
......@@ -33,21 +33,21 @@ function aggregator_test_feed($use_last_modified = FALSE, $use_etag = FALSE) {
// Send appropriate response. We respond with a 304 not modified on either
// etag or on last modified.
if ($use_last_modified) {
drupal_set_header("Last-Modified: " . gmdate(DATE_RFC1123, $last_modified));
drupal_set_header('Last-Modified', gmdate(DATE_RFC1123, $last_modified));
}
if ($use_etag) {
drupal_set_header("ETag: " .$etag);
drupal_set_header('ETag', $etag);
}
// Return 304 not modified if either last modified or etag match.
if ($last_modified == $if_modified_since || $etag == $if_none_match) {
drupal_set_header($_SERVER['SERVER_PROTOCOL'] . ' 304 Not Modified');
drupal_set_header('304 Not Modified');
return;