Commit 3cbb29b2 authored by webchick's avatar webchick
Browse files

Issue #1930274 by rootatwc, Berdir: Convert aggregator processors and parsers to plugins.

parent 0b306f93
......@@ -306,70 +306,50 @@ function aggregator_admin_form($form, $form_state) {
'#description' => t('A space-separated list of HTML tags allowed in the content of feed items. Disallowed tags are stripped from the content.'),
);
// Make sure configuration is sane.
aggregator_sanitize_configuration();
// Get all available fetchers.
$fetcher_manager = drupal_container()->get('plugin.manager.aggregator.fetcher');
$fetchers = array();
foreach ($fetcher_manager->getDefinitions() as $id => $definition) {
$label = $definition['title'] . ' <span class="description">' . $definition['description'] . '</span>';
$fetchers[$id] = $label;
}
$config = config('aggregator.settings');
// Get all available parsers.
$parsers = module_implements('aggregator_parse');
foreach ($parsers as $k => $module) {
if ($info = module_invoke($module, 'aggregator_parse_info')) {
$label = $info['title'] . ' <span class="description">' . $info['description'] . '</span>';
}
else {
$label = $module;
// Get all available fetchers, parsers and processors.
foreach (array('fetcher', 'parser', 'processor') as $type) {
// Initialize definitions if not set.
$definitions[$type] = isset($definitions[$type]) ? $definitions[$type] : array();
$managers[$type] = Drupal::service("plugin.manager.aggregator.$type");
foreach ($managers[$type]->getDefinitions() as $id => $definition) {
$label = $definition['title'] . ' <span class="description">' . $definition['description'] . '</span>';
$definitions[$type][$id] = $label;
}
unset($parsers[$k]);
$parsers[$module] = $label;
}
// Get all available processors.
$processors = module_implements('aggregator_process');
foreach ($processors as $k => $module) {
if ($info = module_invoke($module, 'aggregator_process_info')) {
$label = $info['title'] . ' <span class="description">' . $info['description'] . '</span>';
}
else {
$label = $module;
}
unset($processors[$k]);
$processors[$module] = $label;
}
// Store definitions and managers so we can access them later.
$form_state['definitions'] = $definitions;
$form_state['managers'] = $managers;
// Only show basic configuration if there are actually options.
$basic_conf = array();
if (count($fetchers) > 1) {
if (count($definitions['fetcher']) > 1) {
$basic_conf['aggregator_fetcher'] = array(
'#type' => 'radios',
'#title' => t('Fetcher'),
'#description' => t('Fetchers download data from an external source. Choose a fetcher suitable for the external source you would like to download from.'),
'#options' => $fetchers,
'#default_value' => config('aggregator.settings')->get('fetcher'),
'#options' => $definitions['fetcher'],
'#default_value' => $config->get('fetcher'),
);
}
if (count($parsers) > 1) {
if (count($definitions['parser']) > 1) {
$basic_conf['aggregator_parser'] = array(
'#type' => 'radios',
'#title' => t('Parser'),
'#description' => t('Parsers transform downloaded data into standard structures. Choose a parser suitable for the type of feeds you would like to aggregate.'),
'#options' => $parsers,
'#default_value' => config('aggregator.settings')->get('parser'),
'#options' => $definitions['parser'],
'#default_value' => $config->get('parser'),
);
}
if (count($processors) > 1) {
if (count($definitions['processor']) > 1) {
$basic_conf['aggregator_processors'] = array(
'#type' => 'checkboxes',
'#title' => t('Processors'),
'#description' => t('Processors act on parsed feed data, for example they store feed items. Choose the processors suitable for your task.'),
'#options' => $processors,
'#default_value' => config('aggregator.settings')->get('processors'),
'#options' => $definitions['processor'],
'#default_value' => $config->get('processors'),
);
}
if (count($basic_conf)) {
......@@ -382,9 +362,14 @@ function aggregator_admin_form($form, $form_state) {
$form['basic_conf'] += $basic_conf;
}
// Implementing modules will expect an array at $form['modules'].
$form['modules'] = array();
// Implementing processor plugins will expect an array at $form['processors'].
$form['processors'] = array();
// Call settingsForm() for each acrive processor.
foreach ($definitions['processor'] as $id => $definition) {
if (in_array($id, $config->get('processors'))) {
$form = $managers['processor']->createInstance($id)->settingsForm($form, $form_state);
}
}
return system_config_form($form, $form_state);
}
......@@ -393,13 +378,14 @@ function aggregator_admin_form($form, $form_state) {
*/
function aggregator_admin_form_submit($form, &$form_state) {
$config = config('aggregator.settings');
$config
->set('items.allowed_html', $form_state['values']['aggregator_allowed_html_tags'])
->set('items.expire', $form_state['values']['aggregator_clear'])
->set('items.teaser_length', $form_state['values']['aggregator_teaser_length'])
->set('source.list_max', $form_state['values']['aggregator_summary_items'])
->set('source.category_selector', $form_state['values']['aggregator_category_selector']);
// Let active processors save their settings.
foreach ($form_state['definitions']['processor'] as $id => $definition) {
if (in_array($id, $config->get('processors'))) {
$form_state['managers']['processor']->createInstance($id)->settingsSubmit($form, $form_state);
}
}
$config->set('items.allowed_html', $form_state['values']['aggregator_allowed_html_tags']);
if (isset($form_state['values']['aggregator_fetcher'])) {
$config->set('fetcher', $form_state['values']['aggregator_fetcher']);
}
......
<?php
/**
* @file
* Documentation for aggregator API.
*/
/**
* @addtogroup hooks
* @{
*/
/**
* Specify the class, title, and short description of your fetcher plugins.
*
* The title and the description provided are shown within the
* configuration page.
*
* @return
* An associative array whose keys define the fetcher id and whose values
* contain the fetcher definitions. Each fetcher definition is itself an
* associative array, with the following key-value pairs:
* - class: (required) The PHP class containing the fetcher implementation.
* - title: (required) A human readable name of the fetcher.
* - description: (required) A brief (40 to 80 characters) explanation of the
* fetcher's functionality.
*
* @ingroup aggregator
*/
function hook_aggregator_fetch_info() {
return array(
'aggregator' => array(
'class' => 'Drupal\aggregator\Plugin\aggregator\fetcher\DefaultFetcher',
'title' => t('Default fetcher'),
'description' => t('Downloads data from a URL using Drupal\'s HTTP request handler.'),
),
);
}
/**
* Create an alternative parser for aggregator module.
*
* A parser converts feed item data to a common format. The parser is called
* at the second of the three aggregation stages: first, data is downloaded
* by the active fetcher; second, it is converted to a common format by the
* active parser; and finally, it is passed to all active processors which
* manipulate or store the data.
*
* Modules that define this hook can be set as the active parser within the
* configuration page. Only one parser can be active at a time.
*
* @param $feed
* An object describing the resource to be parsed. $feed->source_string
* contains the raw feed data. The hook implementation should parse this data
* and add the following properties to the $feed object:
* - description: The human-readable description of the feed.
* - link: A full URL that directly relates to the feed.
* - image: An image URL used to display an image of the feed.
* - etag: An entity tag from the HTTP header used for cache validation to
* determine if the content has been changed.
* - modified: The UNIX timestamp when the feed was last modified.
* - items: An array of feed items. The common format for a single feed item
* is an associative array containing:
* - title: The human-readable title of the feed item.
* - description: The full body text of the item or a summary.
* - timestamp: The UNIX timestamp when the feed item was last published.
* - author: The author of the feed item.
* - guid: The global unique identifier (GUID) string that uniquely
* identifies the item. If not available, the link is used to identify
* the item.
* - link: A full URL to the individual feed item.
*
* @return
* TRUE if parsing was successful, FALSE otherwise.
*
* @see hook_aggregator_parse_info()
* @see hook_aggregator_fetch()
* @see hook_aggregator_process()
*
* @ingroup aggregator
*/
function hook_aggregator_parse($feed) {
if ($items = mymodule_parse($feed->source_string)) {
$feed->items = $items;
return TRUE;
}
return FALSE;
}
/**
* Specify the title and short description of your parser.
*
* The title and the description provided are shown within the configuration
* page. Use as title the human readable name of the parser and as description
* a brief (40 to 80 characters) explanation of the parser's functionality.
*
* This hook is only called if your module implements hook_aggregator_parse().
* If this hook is not implemented aggregator will use your module's file name
* as title and there will be no description.
*
* @return
* An associative array defining a title and a description string.
*
* @see hook_aggregator_parse()
*
* @ingroup aggregator
*/
function hook_aggregator_parse_info() {
return array(
'title' => t('Default parser'),
'description' => t('Default parser for RSS, Atom and RDF feeds.'),
);
}
/**
* Create a processor for aggregator.module.
*
* A processor acts on parsed feed data. Active processors are called at the
* third and last of the aggregation stages: first, data is downloaded by the
* active fetcher; second, it is converted to a common format by the active
* parser; and finally, it is passed to all active processors that manipulate or
* store the data.
*
* Modules that define this hook can be activated as a processor within the
* configuration page.
*
* @param $feed
* A feed object representing the resource to be processed. $feed->items
* contains an array of feed items downloaded and parsed at the parsing stage.
* See hook_aggregator_parse() for the basic format of a single item in the
* $feed->items array. For the exact format refer to the particular parser in
* use.
*
* @see hook_aggregator_process_info()
* @see hook_aggregator_fetch()
* @see hook_aggregator_parse()
*
* @ingroup aggregator
*/
function hook_aggregator_process($feed) {
foreach ($feed->items as $item) {
mymodule_save($item);
}
}
/**
* Specify the title and short description of your processor.
*
* The title and the description provided are shown within the configuration
* page. Use as title the natural name of the processor and as description a
* brief (40 to 80 characters) explanation of the functionality.
*
* This hook is only called if your module implements hook_aggregator_process().
* If this hook is not implemented aggregator will use your module's file name
* as title and there will be no description.
*
* @return
* An associative array defining a title and a description string.
*
* @see hook_aggregator_process()
*
* @ingroup aggregator
*/
function hook_aggregator_process_info($feed) {
return array(
'title' => t('Default processor'),
'description' => t('Creates lightweight records of feed items.'),
);
}
/**
* Remove stored feed data.
*
* Aggregator calls this hook if either a feed is deleted or a user clicks on
* "remove items".
*
* If your module stores feed items for example on hook_aggregator_process() it
* is recommended to implement this hook and to remove data related to $feed
* when called.
*
* @param $feed
* The $feed object whose items are being removed.
*
* @ingroup aggregator
*/
function hook_aggregator_remove($feed) {
mymodule_remove_items($feed->fid);
}
/**
* @} End of "addtogroup hooks".
*/
......@@ -6,6 +6,7 @@
*/
use Drupal\aggregator\Plugin\Core\Entity\Feed;
use Drupal\Component\Plugin\Exception\PluginException;
/**
* Denotes that a feed's items should never expire.
......@@ -301,7 +302,7 @@ function aggregator_permission() {
* Queues news feeds for updates once their refresh interval has elapsed.
*/
function aggregator_cron() {
$result = db_query('SELECT fid FROM {aggregator_feed} WHERE queued = 0 AND checked + refresh < :time AND refresh <> :never', array(
$result = db_query('SELECT fid FROM {aggregator_feed} WHERE queued = 0 AND checked + refresh < :time AND refresh <> :never', array(
':time' => REQUEST_TIME,
':never' => AGGREGATOR_CLEAR_NEVER
));
......@@ -310,10 +311,8 @@ function aggregator_cron() {
$feed = aggregator_feed_load($fid);
if ($queue->createItem($feed)) {
// Add timestamp to avoid queueing item more than once.
db_update('aggregator_feed')
->fields(array('queued' => REQUEST_TIME))
->condition('fid', $feed->id())
->execute();
$feed->queued->value = REQUEST_TIME;
$feed->save();
}
}
......@@ -395,42 +394,18 @@ function aggregator_save_category($edit) {
* An object describing the feed to be cleared.
*/
function aggregator_remove(Feed $feed) {
_aggregator_get_variables();
// Call hook_aggregator_remove() on all modules.
module_invoke_all('aggregator_remove', $feed);
// Reset feed.
db_update('aggregator_feed')
->condition('fid', $feed->id())
->fields(array(
'checked' => 0,
'hash' => '',
'etag' => '',
'modified' => 0,
))
->execute();
}
/**
* Gets the fetcher, parser, and processors.
*
* @return
* An array containing the fetcher, parser, and processors.
*/
function _aggregator_get_variables() {
$config = config('aggregator.settings');
$fetcher = $config->get('fetcher');
$parser = $config->get('parser');
if ($parser == 'aggregator') {
include_once DRUPAL_ROOT . '/' . drupal_get_path('module', 'aggregator') . '/aggregator.parser.inc';
// Call \Drupal\aggregator\Plugin\ProcessorInterface::remove() on all
// processors.
$manager = Drupal::service('plugin.manager.aggregator.processor');
foreach ($manager->getDefinitions() as $id => $definition) {
$manager->createInstance($id)->remove($feed);
}
$processors = $config->get('processors');
if (in_array('aggregator', $processors)) {
include_once DRUPAL_ROOT . '/' . drupal_get_path('module', 'aggregator') . '/aggregator.processor.inc';
}
return array($fetcher, $parser, $processors);
// Reset feed.
$feed->checked->value = 0;
$feed->hash->value = '';
$feed->etag->value = '';
$feed->modified->value = 0;
$feed->save();
}
/**
......@@ -443,15 +418,28 @@ function aggregator_refresh(Feed $feed) {
// Store feed URL to track changes.
$feed_url = $feed->url->value;
list($fetcher, $parser, $processors) = _aggregator_get_variables();
$config = config('aggregator.settings');
// Fetch the feed.
$fetcher_manager = drupal_container()->get('plugin.manager.aggregator.fetcher');
$fetcher_manager = Drupal::service('plugin.manager.aggregator.fetcher');
try {
$success = $fetcher_manager->createInstance($fetcher)->fetch($feed);
$success = $fetcher_manager->createInstance($config->get('fetcher'))->fetch($feed);
}
catch (PluginException $e) {
$success = FALSE;
watchdog_exception('aggregator', $e);
}
// Retrieve processor manager now.
$processor_manager = Drupal::service('plugin.manager.aggregator.processor');
// Store instances in an array so we dont have to instantiate new objects.
$processor_instances = array();
foreach ($config->get('processors') as $processor) {
try {
$processor_instances[$processor] = $processor_manager->createInstance($processor);
}
catch (PluginException $e) {
watchdog_exception('aggregator', $e);
}
}
// We store the hash of feed data in the database. When refreshing a
......@@ -461,43 +449,48 @@ function aggregator_refresh(Feed $feed) {
if ($success && ($feed->hash->value != $hash)) {
// Parse the feed.
if (module_invoke($parser, 'aggregator_parse', $feed)) {
if (empty($feed->link->value)) {
$feed->link->value = $feed->url->value;
}
$feed->hash->value = $hash;
// Update feed with parsed data.
$feed->save();
$parser_manager = Drupal::service('plugin.manager.aggregator.parser');
try {
if ($parser_manager->createInstance($config->get('parser'))->parse($feed)) {
if (empty($feed->link->value)) {
$feed->link->value = $feed->url->value;
}
$feed->hash->value = $hash;
// Update feed with parsed data.
$feed->save();
// Log if feed URL has changed.
if ($feed->url->value != $feed_url) {
watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed->label(), '%url' => $feed->url->value));
}
// Log if feed URL has changed.
if ($feed->url->value != $feed_url) {
watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed->label(), '%url' => $feed->url->value));
}
watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed->label()));
drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed->label())));
watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed->label()));
drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed->label())));
// If there are items on the feed, let all enabled processors do their work on it.
if (@count($feed->items)) {
foreach ($processors as $processor) {
module_invoke($processor, 'aggregator_process', $feed);
// If there are items on the feed, let enabled processors process them.
if (!empty($feed->items)) {
foreach ($processor_instances as $instance) {
$instance->process($feed);
}
}
}
}
catch (PluginException $e) {
watchdog_exception('aggregator', $e);
}
}
else {
drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed->label())));
}
// Regardless of successful or not, indicate that this feed has been checked.
db_update('aggregator_feed')
->fields(array('checked' => REQUEST_TIME, 'queued' => 0))
->condition('fid', $feed->id())
->execute();
$feed->checked->value = REQUEST_TIME;
$feed->queued->value = 0;
$feed->save();
// Expire old feed items.
if (function_exists('aggregator_expire')) {
aggregator_expire($feed);
// Processing is done, call postProcess on enabled processors.
foreach ($processor_instances as $instance) {
$instance->postProcess($feed);
}
}
......@@ -560,55 +553,6 @@ function aggregator_filter_xss($value) {
return filter_xss($value, preg_split('/\s+|<|>/', config('aggregator.settings')->get('items.allowed_html'), -1, PREG_SPLIT_NO_EMPTY));
}
/**
* Checks and sanitizes the aggregator configuration.
*
* Goes through all fetchers, parsers and processors and checks whether they
* are available. If one is missing, resets to standard configuration.
*
* @return
* TRUE if this function resets the configuration; FALSE if not.
*/
function aggregator_sanitize_configuration() {
$reset = FALSE;
list($fetcher, $parser, $processors) = _aggregator_get_variables();
if (!module_exists($fetcher)) {
$reset = TRUE;
}
if (!module_exists($parser)) {
$reset = TRUE;
}
foreach ($processors as $processor) {
if (!module_exists($processor)) {
$reset = TRUE;
break;
}
}
if ($reset) {
// Reset aggregator config if necessary using the module defaults.
config('aggregator.settings')
->set('fetcher', 'aggregator')
->set('parser', 'aggregator')
->set('processors', array('aggregator' => 'aggregator'))
->save();
return TRUE;
}
return FALSE;
}
/**
* Helper function for drupal_map_assoc.
*
* @param $count
* Items count.
*
* @return
* A string that is plural-formatted as "@count items".
*/
function _aggregator_items($count) {
return format_plural($count, '1 item', '@count items');
}
/**
* Implements hook_preprocess_HOOK() for block.tpl.php.
*/
......
<?php
/**
* @file
* Parser functions for the aggregator module.
*/
use Drupal\aggregator\Plugin\Core\Entity\Feed;
/**
* Implements hook_aggregator_parse_info().
*/
function aggregator_aggregator_parse_info() {
return array(
'title' => t('Default parser'),
'description' => t('Parses RSS, Atom and RDF feeds.'),
);
}
/**
* Implements hook_aggregator_parse().
*/
function aggregator_aggregator_parse(Feed $feed) {
global $channel, $image;
// Filter the input data.
if (aggregator_parse_feed($feed->source_string, $feed)) {
// Prepare the channel data.
foreach ($channel as $key => $value) {