Commit 9058a898 authored by Dries's avatar Dries
Browse files

- Patch #597484 by dww: use the Queue API to fetch available update data.

parent cad226e6
......@@ -406,7 +406,7 @@ function update_calculate_project_data($available) {
break;
case 'not-fetched':
$projects[$project]['status'] = UPDATE_NOT_FETCHED;
$projects[$project]['reason'] = t('Failed to fetch available update data');
$projects[$project]['reason'] = t('Failed to get available update data.');
break;
default:
......@@ -469,6 +469,17 @@ function update_calculate_project_data($available) {
$version_patch_changed = '';
$patch = '';
// If the project is marked as UPDATE_FETCH_PENDING, it means that the
// data we currently have (if any) is stale, and we've got a task queued
// up to (re)fetch the data. In that case, we mark it as such, merge in
// whatever data we have (e.g. project title and link), and move on.
if (!empty($available[$project]['fetch_status']) && $available[$project]['fetch_status'] == UPDATE_FETCH_PENDING) {
$projects[$project]['status'] = UPDATE_FETCH_PENDING;
$projects[$project]['reason'] = t('No available update data');
$projects[$project] += $available[$project];
continue;
}
// Defend ourselves from XML history files that contain no releases.
if (empty($available[$project]['releases'])) {
$projects[$project]['status'] = UPDATE_UNKNOWN;
......
......@@ -10,21 +10,184 @@
* Callback to manually check the update status without cron.
*/
function update_manual_status() {
if (_update_refresh()) {
drupal_set_message(t('Attempted to fetch information about all available new releases and updates.'));
_update_refresh();
$batch = array(
'operations' => array(
array('update_fetch_data_batch', array()),
),
'finished' => 'update_fetch_data_finished',
'title' => t('Checking available update data'),
'progress_message' => t('Trying to check available update data ...'),
'error_message' => t('Error checking available update data.'),
'file' => drupal_get_path('module', 'update') . '/update.fetch.inc',
);
batch_set($batch);
batch_process('admin/reports/updates');
}
/**
* Process a step in the batch for fetching available update data.
*/
function update_fetch_data_batch(&$context) {
$queue = DrupalQueue::get('update_fetch_tasks');
if (empty($context['sandbox']['max'])) {
$context['finished'] = 0;
$context['sandbox']['max'] = $queue->numberOfItems();
$context['sandbox']['progress'] = 0;
$context['message'] = t('Checking available update data ...');
$context['results']['updated'] = 0;
$context['results']['failures'] = 0;
$context['results']['processed'] = 0;
}
// Grab another item from the fetch queue.
for ($i = 0; $i < 5; $i++) {
if ($item = $queue->claimItem()) {
if (_update_process_fetch_task($item->data)) {
$context['results']['updated']++;
$context['message'] = t('Checked available update data for %title.', array('%title' => $item->data['info']['name']));
}
else {
$context['message'] = t('Failed to check available update data for %title.', array('%title' => $item->data['info']['name']));
$context['results']['failures']++;
}
$context['sandbox']['progress']++;
$context['results']['processed']++;
$context['finished'] = $context['sandbox']['progress'] / $context['sandbox']['max'];
$queue->deleteItem($item);
}
else {
// If the queue is currently empty, we're done. It's possible that
// another thread might have added new fetch tasks while we were
// processing this batch. In that case, the usual 'finished' math could
// get confused, since we'd end up processing more tasks that we thought
// we had when we started and initialized 'max' with numberOfItems(). By
// forcing 'finished' to be exactly 1 here, we ensure that batch
// processing is terminated.
$context['finished'] = 1;
return;
}
}
}
/**
* Batch API callback when all fetch tasks have been completed.
*
* @param $success
* Boolean indicating the success of the batch.
* @param $results
* Associative array holding the results of the batch, including the key
* 'updated' which holds the total number of projects we fetched available
* update data for.
*/
function update_fetch_data_finished($success, $results) {
if ($success) {
if (!empty($results)) {
if (!empty($results['updated'])) {
drupal_set_message(format_plural($results['updated'], 'Checked available update data for one project.', 'Checked available update data for @count projects.'));
}
if (!empty($results['failures'])) {
drupal_set_message(format_plural($results['failures'], 'Failed to get available update data for one project.', 'Failed to get available update data for @count projects.'), 'error');
}
}
}
else {
drupal_set_message(t('Unable to fetch any information about available new releases and updates.'), 'error');
drupal_set_message(t('An error occurred trying to get available update data.'), 'error');
}
drupal_goto('admin/reports/updates');
}
/**
* Fetch project info via XML from a central server.
* Attempt to drain the queue of tasks for release history data to fetch.
*/
function _update_refresh() {
function _update_fetch_data() {
$queue = DrupalQueue::get('update_fetch_tasks');
$end = time() + variable_get('update_max_fetch_time', UPDATE_MAX_FETCH_TIME);
while (time() < $end && ($item = $queue->claimItem())) {
_update_process_fetch_task($item->data);
$queue->deleteItem($item);
}
}
/**
* Process a task to fetch available update data for a single project.
*
* Once the release history XML data is downloaded, it is parsed and saved
* into the {cache_update} table in an entry just for that project.
*
* @param $project
* Associative array of information about the project to fetch data for.
* @return
* TRUE if we fetched parsable XML, otherwise FALSE.
*/
function _update_process_fetch_task($project) {
global $base_url;
$fail = &drupal_static(__FUNCTION__, array());
// This can be in the middle of a long-running batch, so REQUEST_TIME won't
// necessarily be valid.
$now = time();
if (empty($fail)) {
// If we have valid data about release history XML servers that we have
// failed to fetch from on previous attempts, load that from the cache.
if (($cache = _update_cache_get('fetch_failures')) && ($cache->expire > $now)) {
$fail = $cache->data;
}
}
$max_fetch_attempts = variable_get('update_max_fetch_attempts', UPDATE_MAX_FETCH_ATTEMPTS);
$success = FALSE;
$available = array();
$site_key = md5($base_url . drupal_get_private_key());
$url = _update_build_fetch_url($project, $site_key);
$fetch_url_base = _update_get_fetch_url_base($project);
$project_name = $project['name'];
if (empty($fail[$fetch_url_base]) || $fail[$fetch_url_base] < $max_fetch_attempts) {
$xml = drupal_http_request($url);
if (isset($xml->data)) {
$data = $xml->data;
}
}
if (!empty($data)) {
$available = update_parse_xml($data);
// @todo: Purge release data we don't need (http://drupal.org/node/238950).
if (!empty($available)) {
// Only if we fetched and parsed something sane do we return success.
$success = TRUE;
}
}
else {
$available['project_status'] = 'not-fetched';
if (empty($fail[$fetch_url_base])) {
$fail[$fetch_url_base] = 1;
}
else {
$fail[$fetch_url_base]++;
}
}
$frequency = variable_get('update_check_frequency', 1);
$cid = 'available_releases::' . $project_name;
_update_cache_set($cid, $available, $now + (60 * 60 * 24 * $frequency));
// Stash the $fail data back in the DB for the next 5 minutes.
_update_cache_set('fetch_failures', $fail, $now + (60 * 5));
// Whether this worked or not, we did just (try to) check for updates.
variable_set('update_last_check', $now);
// Now that we processed the fetch task for this project, clear out the
// record in {cache_update} for this task so we're willing to fetch again.
_update_cache_clear('fetch_task::' . $project_name);
return $success;
}
/**
* Clear out all the cached available update data and initiate re-fetching.
*/
function _update_refresh() {
module_load_include('inc', 'update', 'update.compare');
// Since we're fetching new available update data, we want to clear
......@@ -36,57 +199,53 @@ function _update_refresh() {
_update_cache_clear('update_project_projects');
_update_cache_clear('update_project_data');
$available = array();
$data = array();
$site_key = md5($base_url . drupal_get_private_key());
$projects = update_get_projects();
// Now that we have the list of projects, we should also clear our cache of
// available release data, since even if we fail to fetch new data, we need
// to clear out the stale data at this point.
_update_cache_clear('update_available_releases');
$max_fetch_attempts = variable_get('update_max_fetch_attempts', UPDATE_MAX_FETCH_ATTEMPTS);
_update_cache_clear('available_releases::', TRUE);
foreach ($projects as $key => $project) {
$url = _update_build_fetch_url($project, $site_key);
$fetch_url_base = _update_get_fetch_url_base($project);
if (empty($fail[$fetch_url_base]) || count($fail[$fetch_url_base]) < $max_fetch_attempts) {
$xml = drupal_http_request($url);
if (isset($xml->data)) {
$data[] = $xml->data;
}
else {
// Connection likely broken; prepare to give up.
$fail[$fetch_url_base][$key] = 1;
}
}
else {
// Didn't bother trying to fetch.
$fail[$fetch_url_base][$key] = 1;
}
update_create_fetch_task($project);
}
}
if ($data) {
$available = update_parse_xml($data);
}
if (!empty($available) && is_array($available)) {
// Record the projects where we failed to fetch data.
foreach ($fail as $fetch_url_base => $failures) {
foreach ($failures as $key => $value) {
$available[$key]['project_status'] = 'not-fetched';
}
}
$frequency = variable_get('update_check_frequency', 1);
_update_cache_set('update_available_releases', $available, REQUEST_TIME + (60 * 60 * 24 * $frequency));
watchdog('update', 'Attempted to fetch information about all available new releases and updates.', array(), WATCHDOG_NOTICE, l(t('view'), 'admin/reports/updates'));
/**
* Add a task to the queue for fetching release history data for a project.
*
* We only create a new fetch task if there's no task already in the queue for
* this particular project (based on 'fetch_task::' entries in the
* {cache_update} table).
*
* @param $project
* Associative array of information about a project as created by
* update_get_projects(), including keys such as 'name' (short name),
* and the 'info' array with data from a .info file for the project.
*
* @see update_get_projects()
* @see update_get_available()
* @see update_refresh()
* @see update_fetch_data()
* @see _update_process_fetch_task()
*/
function _update_create_fetch_task($project) {
$fetch_tasks = &drupal_static(__FUNCTION__, array());
if (empty($fetch_tasks)) {
$fetch_tasks = _update_get_cache_multiple('fetch_task');
}
else {
watchdog('update', 'Unable to fetch any information about available new releases and updates.', array(), WATCHDOG_ERROR, l(t('view'), 'admin/reports/updates'));
$cid = 'fetch_task::' . $project['name'];
if (empty($fetch_tasks[$cid])) {
$queue = DrupalQueue::get('update_fetch_tasks');
$queue->createItem($project);
db_insert('cache_update')
->fields(array(
'cid' => $cid,
'created' => REQUEST_TIME,
))
->execute();
$fetch_tasks[$cid] = REQUEST_TIME;
}
// Whether this worked or not, we did just (try to) check for updates.
variable_set('update_last_check', REQUEST_TIME);
return $available;
}
/**
......@@ -101,7 +260,8 @@ function _update_refresh() {
* @param $site_key
* The anonymous site key hash (optional).
*
* @see update_refresh()
* @see update_fetch_data()
* @see _update_process_fetch_task()
* @see update_get_projects()
*/
function _update_build_fetch_url($project, $site_key = '') {
......@@ -180,44 +340,42 @@ function _update_cron_notify() {
/**
* Parse the XML of the Drupal release history info files.
*
* @param $raw_xml_list
* Array of raw XML strings, one for each fetched project.
* @param $raw_xml
* A raw XML string of available release data for a given project.
*
* @return
* Nested array of parsed data about projects and releases.
* Array of parsed data about releases for a given project, or NULL if there
* was an error parsing the string.
*/
function update_parse_xml($raw_xml_list) {
function update_parse_xml($raw_xml) {
try {
$xml = new SimpleXMLElement($raw_xml);
}
catch (Exception $e) {
// SimpleXMLElement::__construct produces an E_WARNING error message for
// each error found in the XML data and throws an exception if errors
// were detected. Catch any exception and return failure (NULL).
return;
}
$short_name = (string)$xml->short_name;
$data = array();
foreach ($raw_xml_list as $raw_xml) {
try {
$xml = new SimpleXMLElement($raw_xml);
}
catch (Exception $e) {
// SimpleXMLElement::__construct produces an E_WARNING error message for
// each error found in the XML data and throws an exception if errors
// were detected. Catch any exception and break to the next XML string.
break;
}
$short_name = (string)$xml->short_name;
$data[$short_name] = array();
foreach ($xml as $k => $v) {
$data[$short_name][$k] = (string)$v;
foreach ($xml as $k => $v) {
$data[$k] = (string)$v;
}
$data['releases'] = array();
foreach ($xml->releases->children() as $release) {
$version = (string)$release->version;
$data['releases'][$version] = array();
foreach ($release->children() as $k => $v) {
$data['releases'][$version][$k] = (string)$v;
}
$data[$short_name]['releases'] = array();
foreach ($xml->releases->children() as $release) {
$version = (string)$release->version;
$data[$short_name]['releases'][$version] = array();
foreach ($release->children() as $k => $v) {
$data[$short_name]['releases'][$version][$k] = (string)$v;
}
$data[$short_name]['releases'][$version]['terms'] = array();
if ($release->terms) {
foreach ($release->terms->children() as $term) {
if (!isset($data[$short_name]['releases'][$version]['terms'][(string)$term->name])) {
$data[$short_name]['releases'][$version]['terms'][(string)$term->name] = array();
}
$data[$short_name]['releases'][$version]['terms'][(string)$term->name][] = (string)$term->value;
$data['releases'][$version]['terms'] = array();
if ($release->terms) {
foreach ($release->terms->children() as $term) {
if (!isset($data['releases'][$version]['terms'][(string)$term->name])) {
$data['releases'][$version]['terms'][(string)$term->name] = array();
}
$data['releases'][$version]['terms'][(string)$term->name][] = (string)$term->value;
}
}
}
......
......@@ -6,6 +6,14 @@
* Install, update and uninstall functions for the update module.
*/
/**
* Implement hook_install().
*/
function update_install() {
$queue = DrupalQueue::get('update_fetch_tasks');
$queue->createQueue();
}
/**
* Implement hook_uninstall().
*/
......@@ -17,11 +25,15 @@ function update_uninstall() {
'update_last_check',
'update_notification_threshold',
'update_notify_emails',
'update_max_fetch_attempts',
'update_max_fetch_time',
);
foreach ($variables as $variable) {
variable_del($variable);
}
menu_rebuild();
$queue = DrupalQueue::get('update_fetch_tasks');
$queue->deleteQueue();
}
/**
......@@ -32,3 +44,13 @@ function update_schema() {
$schema['cache_update']['description'] = 'Cache table for the Update module to store information about available releases, fetched from central server.';
return $schema;
}
/**
* Create a queue to store tasks for requests to fetch available update data.
*/
function update_update_7000() {
module_load_include('inc', 'system', 'system.queue');
$queue = DrupalQueue::get('update_fetch_tasks');
$queue->createQueue();
}
......@@ -56,11 +56,21 @@
*/
define('UPDATE_NOT_FETCHED', -3);
/**
* We need to (re)fetch available update data for this project.
*/
define('UPDATE_FETCH_PENDING', -4);
/**
* Maximum number of attempts to fetch available update data from a given host.
*/
define('UPDATE_MAX_FETCH_ATTEMPTS', 2);
/**
* Maximum number of seconds to try fetching available update data at a time.
*/
define('UPDATE_MAX_FETCH_TIME', 5);
/**
* Implement hook_help().
*/
......@@ -298,12 +308,18 @@ function _update_requirement_check($project, $type) {
function update_cron() {
$frequency = variable_get('update_check_frequency', 1);
$interval = 60 * 60 * 24 * $frequency;
// Cron should check for updates if there is no update data cached or if the
// configured update interval has elapsed.
if (!_update_cache_get('update_available_releases') || ((REQUEST_TIME - variable_get('update_last_check', 0)) > $interval)) {
if ((REQUEST_TIME - variable_get('update_last_check', 0)) > $interval) {
// If the configured update interval has elapsed, we want to invalidate
// the cached data for all projects, attempt to re-fetch, and trigger any
// configured notifications about the new status.
update_refresh();
_update_cron_notify();
}
else {
// Otherwise, see if any individual projects are now stale or still
// missing data, and if so, try to fetch the data.
update_get_available(TRUE);
}
}
/**
......@@ -370,39 +386,104 @@ function _update_no_data() {
*/
function update_get_available($refresh = FALSE) {
module_load_include('inc', 'update', 'update.compare');
$available = array();
// First, make sure that none of the .info files have a change time
// newer than the last time we checked for available updates.
$needs_refresh = FALSE;
$last_check = variable_get('update_last_check', 0);
// Grab whatever data we currently have cached in the DB.
$available = _update_get_cached_available_releases();
$projects = update_get_projects();
foreach ($projects as $key => $project) {
if ($project['info']['_info_file_ctime'] > $last_check) {
// If there's no data at all, we clearly need to fetch some.
if (empty($available[$key])) {
update_create_fetch_task($project);
$needs_refresh = TRUE;
continue;
}
// See if the .info file is newer than the last time we checked for data,
// and if so, mark this project's data as needing to be re-fetched. Any
// time an admin upgrades their local installation, the .info file will
// be changed, so this is the only way we can be sure we're not showing
// bogus information right after they upgrade.
if ($project['info']['_info_file_ctime'] > $available[$key]['last_fetch']) {
$available[$key]['fetch_status'] = UPDATE_FETCH_PENDING;
}
// If we have project data but no release data, we need to fetch. This
// can be triggered when we fail to contact a release history server.
if (empty($available[$key]['releases'])) {
$available[$key]['fetch_status'] = UPDATE_FETCH_PENDING;
}
// If we think this project needs to fetch, actually create the task now
// and remember that we think we're missing some data.
if (!empty($available[$key]['fetch_status']) && $available[$key]['fetch_status'] == UPDATE_FETCH_PENDING) {
update_create_fetch_task($project);
$needs_refresh = TRUE;
break;
}
}
if (!$needs_refresh && ($cache = _update_cache_get('update_available_releases')) && $cache->expire > REQUEST_TIME) {
$available = $cache->data;
}
elseif ($needs_refresh || $refresh) {
// If we need to refresh due to a newer .info file, ignore the argument
// and force the refresh (e.g., even for update_requirements()) to prevent
// bogus results.
$available = update_refresh();
if ($needs_refresh && $refresh) {
// Attempt to drain the queue of fetch tasks.
update_fetch_data();
// After processing the queue, we've (hopefully) got better data, so pull
// the latest from the cache again and use that directly.
$available = _update_get_cached_available_releases();
}
return $available;
}
/**
* Wrapper to load the include file and then create a new fetch task.
*
* @see _update_create_fetch_task()
*/
function update_create_fetch_task($project) {
module_load_include('inc', 'update', 'update.fetch');
return _update_create_fetch_task($project);
}
/**
* Wrapper to load the include file and then refresh the release data.
*
* @see _update_refresh();
*/
function update_refresh() {
module_load_include('inc', 'update', 'update.fetch');
return _update_refresh();
}
/**
* Wrapper to load the include file and then attempt to fetch update data.
*/
function update_fetch_data() {
module_load_include('inc', 'update', 'update.fetch');
return _update_fetch_data();
}
/**
* Return all currently cached data about available releases for all projects.
*
* @return
* Array of data about available releases, keyed by project shortname.
*/
function _update_get_cached_available_releases() {
$data = array();
$cache_items = _update_get_cache_multiple('available_releases');
foreach ($cache_items as $cid => $cache) {
$cache->data['last_fetch'] = $cache->created;
if ($cache->expire < REQUEST_TIME) {
$cache->data['fetch_status'] = UPDATE_FETCH_PENDING;
}
// The project shortname is embedded in the cache ID, even if there's no
// data for this project in the DB at all, so use that for the indexes in
// the array.
$parts = explode('::', $cid, 2);
$data[$parts[1]] = $cache->data;
}
return $data;
}
/**
* Implement hook_mail().
*
......@@ -503,6 +584,7 @@ function _update_message_text($msg_type, $msg_reason, $report_link = FALSE, $lan
case UPDATE_UNKNOWN:
case UPDATE_NOT_CHECKED:
case UPDATE_NOT_FETCHED:
case UPDATE_FETCH_PENDING:
if ($msg_type == 'core') {
$text = t('There was a problem determining the status of available updates for your version of Drupal.', array(), array('langcode' => $langcode));
}
......@@ -610,19 +692,53 @@ function _update_cache_get($cid) {
return $cache;
}
/**
* Return an array of cache items with a given cache ID prefix.
*