aggregator.module 33.2 KB
Newer Older
1
<?php
Dries's avatar
Dries committed
2
// $Id$
Dries's avatar
 
Dries committed
3

Dries's avatar
 
Dries committed
4 5
/**
 * @file
6
 * Used to aggregate syndicated content (RSS, RDF, and Atom).
Dries's avatar
 
Dries committed
7 8
 */

Dries's avatar
 
Dries committed
9 10 11
/**
 * Implementation of hook_help().
 */
12 13
function aggregator_help($path, $arg) {
  switch ($path) {
Kjartan's avatar
Kjartan committed
14
    case 'admin/help#aggregator':
15
      $output = '<p>'. t('The aggregator is a powerful on-site syndicator and news reader that gathers fresh content from RSS-, RDF-, and Atom-based feeds made available across the web. Thousands of sites (particularly news sites and blogs) publish their latest headlines and posts in feeds, using a number of standardized XML-based formats. Formats supported by the aggregator include <a href="@rss">RSS</a>, <a href="@rdf">RDF</a>, and <a href="@atom">Atom</a>.', array('@rss' => 'http://blogs.law.harvard.edu/tech/rss', '@rdf' => 'http://www.w3.org/RDF/', '@atom' => 'http://www.atomenabled.org')) .'</p>';
16
      $output .= '<p>'. t('Feeds contain feed items, or individual posts published by the site providing the feed. Feeds may be grouped in categories, generally by topic. Users view feed items in the <a href="@aggregator">main aggregator display</a> or by <a href="@aggregator-sources">their source</a>. Administrators can <a href="@feededit">add, edit and delete feeds</a> and choose how often to check each feed for newly updated items. The most recent items in either a feed or category can be displayed as a block through the <a href="@admin-block">block administration page</a>. A <a href="@aggregator-opml">machine-readable OPML file</a> of all feeds is available. A correctly configured <a href="@cron">Cron maintenance task</a> is required to update feeds automatically.', array('@aggregator' => url('aggregator'), '@aggregator-sources' => url('aggregator/sources'), '@feededit' => url('admin/content/aggregator'), '@admin-block' => url('admin/build/block'), '@aggregator-opml' => url('aggregator/opml'), '@cron' => url('admin/reports/status'))) .'</p>';
17
      $output .= '<p>'. t('For more information, see the online handbook entry for <a href="@aggregator">Aggregator module</a>.', array('@aggregator' => 'http://drupal.org/handbook/modules/aggregator/')) .'</p>';
18
      return $output;
19
    case 'admin/content/aggregator':
20 21 22
      $output = '<p>'. t('Thousands of sites (particularly news sites and blogs) publish their latest headlines and posts in feeds, using a number of standardized XML-based formats. Formats supported by the aggregator include <a href="@rss">RSS</a>, <a href="@rdf">RDF</a>, and <a href="@atom">Atom</a>.', array('@rss' => 'http://blogs/law.harvard.edu/tech/rss', '@rdf' => 'http://www.w3.org/RDF/', '@atom' => 'http://www.atomenabled.org')) .'</p>';
      $output .= '<p>'. t('Current feeds are listed below, and <a href="@addfeed">new feeds may be added</a>. For each feed or feed category, the <em>latest items</em> block may be enabled at the <a href="@block">blocks configuration page</a>.', array('@addfeed' => url('admin/content/aggregator/add/feed'), '@block' => url('admin/build/block'))) .'</p>';
      return $output;
23
    case 'admin/content/aggregator/add/feed':
24
      return '<p>'. t('Add a feed in RSS, RDF or Atom format. A feed may only have one entry.') .'</p>';
25
    case 'admin/content/aggregator/add/category':
26
      return '<p>'. t('Categories allow feed items from different feeds to be grouped together. For example, several sport-related feeds may belong to a category named <em>Sports</em>. Feed items may be grouped automatically (by selecting a category when creating or editing a feed) or manually (via the <em>Categorize</em> page available from feed item listings). Each category provides its own feed page and block.') .'</p>';
Dries's avatar
 
Dries committed
27
  }
28 29
}

30 31 32 33 34
/**
 * Implementation of hook_theme()
 */
function aggregator_theme() {
  return array(
35 36 37 38 39 40
    'aggregator_wrapper' => array(
      'arguments' => array('content' => NULL),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-wrapper',
    ),
    'aggregator_categorize_items' => array(
41
      'arguments' => array('form' => NULL),
42
      'file' => 'aggregator.pages.inc',
43
    ),
44
    'aggregator_feed_source' => array(
45
      'arguments' => array('feed' => NULL),
46
      'file' => 'aggregator.pages.inc',
47
      'template' => 'aggregator-feed-source',
48 49 50 51
    ),
    'aggregator_block_item' => array(
      'arguments' => array('item' => NULL, 'feed' => 0),
    ),
52 53 54 55 56
    'aggregator_summary_items' => array(
      'arguments' => array('summary_items' => NULL, 'source' => NULL),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-summary-items',
    ),
57 58
    'aggregator_summary_item' => array(
      'arguments' => array('item' => NULL),
59
      'file' => 'aggregator.pages.inc',
60
      'template' => 'aggregator-summary-item',
61
    ),
62
    'aggregator_item' => array(
63
      'arguments' => array('item' => NULL),
64 65 66 67 68 69 70 71 72
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-item',
    ),
    'aggregator_page_opml' => array(
      'arguments' => array('feeds' => NULL),
      'file' => 'aggregator.pages.inc',
    ),
    'aggregator_page_rss' => array(
      'arguments' => array('feeds' => NULL, 'category' => NULL),
73
      'file' => 'aggregator.pages.inc',
74 75
    ),
  );
76
}
77

78 79 80
/**
 * Implementation of hook_menu().
 */
81 82
function aggregator_menu() {
  $items['admin/content/aggregator'] = array(
83
    'title' => 'Feed aggregator',
84
    'description' => "Configure which content your site aggregates from other sites, how often it polls them, and how they're categorized.",
85 86
    'page callback' => 'aggregator_admin_overview',
    'access arguments' => array('administer news feeds'),
87
    'file' => 'aggregator.admin.inc',
88 89
  );
  $items['admin/content/aggregator/add/feed'] = array(
90
    'title' => 'Add feed',
91 92 93 94
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_form_feed'),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_LOCAL_TASK,
95
    'parent' => 'admin/content/aggregator',
96
    'file' => 'aggregator.admin.inc',
97 98
  );
  $items['admin/content/aggregator/add/category'] = array(
99
    'title' => 'Add category',
100 101 102 103
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_form_category'),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_LOCAL_TASK,
104
    'parent' => 'admin/content/aggregator',
105
    'file' => 'aggregator.admin.inc',
106
  );
107
  $items['admin/content/aggregator/remove/%aggregator_feed'] = array(
108
    'title' => 'Remove items',
109 110 111 112
    'page callback' => 'aggregator_admin_remove_feed',
    'page arguments' => array(4),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_CALLBACK,
113
    'file' => 'aggregator.admin.inc',
114
  );
115
  $items['admin/content/aggregator/update/%aggregator_feed'] = array(
116
    'title' => 'Update items',
117 118 119 120
    'page callback' => 'aggregator_admin_refresh_feed',
    'page arguments' => array(4),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_CALLBACK,
121
    'file' => 'aggregator.admin.inc',
122 123
  );
  $items['admin/content/aggregator/list'] = array(
124
    'title' => 'List',
125 126 127 128
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
  $items['admin/content/aggregator/settings'] = array(
129
    'title' => 'Settings',
130 131 132 133 134
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_admin_settings'),
    'type' => MENU_LOCAL_TASK,
    'weight' => 10,
    'access arguments' => array('administer news feeds'),
135
    'file' => 'aggregator.admin.inc',
136 137
  );
  $items['aggregator'] = array(
138
    'title' => 'Feed aggregator',
139 140 141
    'page callback' => 'aggregator_page_last',
    'access arguments' => array('access news feeds'),
    'weight' => 5,
142
    'file' => 'aggregator.pages.inc',
143 144
  );
  $items['aggregator/sources'] = array(
145
    'title' => 'Sources',
146
    'page callback' => 'aggregator_page_sources',
147 148 149
    'access arguments' => array('access news feeds'),
    'file' => 'aggregator.pages.inc',
  );
150
  $items['aggregator/categories'] = array(
151
    'title' => 'Categories',
152
    'page callback' => 'aggregator_page_categories',
153
    'access callback' => '_aggregator_has_categories',
154
    'file' => 'aggregator.pages.inc',
155 156
  );
  $items['aggregator/rss'] = array(
157
    'title' => 'RSS feed',
158 159 160
    'page callback' => 'aggregator_page_rss',
    'access arguments' => array('access news feeds'),
    'type' => MENU_CALLBACK,
161
    'file' => 'aggregator.pages.inc',
162 163
  );
  $items['aggregator/opml'] = array(
164
    'title' => 'OPML feed',
165 166 167
    'page callback' => 'aggregator_page_opml',
    'access arguments' => array('access news feeds'),
    'type' => MENU_CALLBACK,
168
    'file' => 'aggregator.pages.inc',
169
  );
170 171 172 173 174 175 176
  $items['aggregator/categories/%aggregator_category'] = array(
    'title callback' => '_aggregator_category_title',
    'title arguments' => array(2),
    'page callback' => 'aggregator_page_category',
    'page arguments' => array(2),
    'access callback' => 'user_access',
    'access arguments' => array('access news feeds'),
177
    'file' => 'aggregator.pages.inc',
178 179 180 181 182 183 184 185 186 187 188 189
  );
  $items['aggregator/categories/%aggregator_category/view'] = array(
    'title' => 'View',
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
  $items['aggregator/categories/%aggregator_category/categorize'] = array(
    'title' => 'Categorize',
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_page_category', 2),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_LOCAL_TASK,
190
    'file' => 'aggregator.pages.inc',
191 192 193 194 195 196 197 198
  );
  $items['aggregator/categories/%aggregator_category/configure'] = array(
    'title' => 'Configure',
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_form_category', 2),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
199
    'file' => 'aggregator.admin.inc',
200
  );
201
  $items['aggregator/sources/%aggregator_feed'] = array(
202
    'page callback' => 'aggregator_page_source',
203
    'page arguments' => array(2),
204
    'type' => MENU_CALLBACK,
205
    'file' => 'aggregator.pages.inc',
206
  );
207
  $items['aggregator/sources/%aggregator_feed/view'] = array(
208
    'title' => 'View',
209 210 211
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
212
  $items['aggregator/sources/%aggregator_feed/categorize'] = array(
213
    'title' => 'Categorize',
214
    'page callback' => 'drupal_get_form',
215
    'page arguments' => array('aggregator_page_source', 2),
216 217
    'access arguments' => array('administer news feeds'),
    'type' => MENU_LOCAL_TASK,
218
    'file' => 'aggregator.pages.inc',
219
  );
220
  $items['aggregator/sources/%aggregator_feed/configure'] = array(
221
    'title' => 'Configure',
222 223 224 225 226
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_form_feed', 2),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
227
    'file' => 'aggregator.admin.inc',
228
  );
229
  $items['admin/content/aggregator/edit/feed/%aggregator_feed'] = array(
230
    'title' => 'Edit feed',
231 232 233 234
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_form_feed', 5),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_CALLBACK,
235
    'file' => 'aggregator.admin.inc',
236
  );
237
  $items['admin/content/aggregator/edit/category/%aggregator_category'] = array(
238
    'title' => 'Edit category',
239 240 241 242
    'page callback' => 'drupal_get_form',
    'page arguments' => array('aggregator_form_category', 5),
    'access arguments' => array('administer news feeds'),
    'type' => MENU_CALLBACK,
243
    'file' => 'aggregator.admin.inc',
244
  );
245 246 247 248

  return $items;
}

249 250 251 252
function _aggregator_category_title($category) {
  return $category['title'];
}

253 254 255
function aggregator_init() {
  drupal_add_css(drupal_get_path('module', 'aggregator') .'/aggregator.css');
}
256 257 258 259

function _aggregator_has_categories() {
  return user_access('access news feeds') && db_result(db_query('SELECT COUNT(*) FROM {aggregator_category}'));
}
260

Dries's avatar
Dries committed
261

Dries's avatar
 
Dries committed
262

Dries's avatar
 
Dries committed
263 264 265
/**
 * Implementation of hook_perm().
 */
Kjartan's avatar
Kjartan committed
266
function aggregator_perm() {
Dries's avatar
 
Dries committed
267
  return array('administer news feeds', 'access news feeds');
Dries's avatar
 
Dries committed
268 269
}

Dries's avatar
 
Dries committed
270 271 272 273 274
/**
 * Implementation of hook_cron().
 *
 * Checks news feeds for updates once their refresh interval has elapsed.
 */
Dries's avatar
 
Dries committed
275
function aggregator_cron() {
Dries's avatar
 
Dries committed
276
  $result = db_query('SELECT * FROM {aggregator_feed} WHERE checked + refresh < %d', time());
Dries's avatar
 
Dries committed
277 278
  while ($feed = db_fetch_array($result)) {
    aggregator_refresh($feed);
Dries's avatar
 
Dries committed
279 280 281
  }
}

Dries's avatar
 
Dries committed
282 283 284 285 286
/**
 * Implementation of hook_block().
 *
 * Generates blocks for the latest news items in each category and feed.
 */
287
function aggregator_block($op = 'list', $delta = 0, $edit = array()) {
Dries's avatar
 
Dries committed
288
  if (user_access('access news feeds')) {
Dries's avatar
 
Dries committed
289
    if ($op == 'list') {
Dries's avatar
Dries committed
290
      $result = db_query('SELECT cid, title FROM {aggregator_category} ORDER BY title');
Dries's avatar
 
Dries committed
291
      while ($category = db_fetch_object($result)) {
292
        $block['category-'. $category->cid]['info'] = t('!title category latest items', array('!title' => $category->title));
Kjartan's avatar
Kjartan committed
293
      }
Dries's avatar
Dries committed
294
      $result = db_query('SELECT fid, title FROM {aggregator_feed} ORDER BY fid');
Kjartan's avatar
Kjartan committed
295
      while ($feed = db_fetch_object($result)) {
296
        $block['feed-'. $feed->fid]['info'] = t('!title feed latest items', array('!title' => $feed->title));
Kjartan's avatar
Kjartan committed
297
      }
Dries's avatar
 
Dries committed
298
    }
Dries's avatar
Dries committed
299
    else if ($op == 'configure') {
300
      list($type, $id) = explode('-', $delta);
Dries's avatar
Dries committed
301 302 303 304 305 306
      if ($type == 'category') {
        $value = db_result(db_query('SELECT block FROM {aggregator_category} WHERE cid = %d', $id));
      }
      else {
        $value = db_result(db_query('SELECT block FROM {aggregator_feed} WHERE fid = %d', $id));
      }
307
      $form['block'] = array('#type' => 'select', '#title' => t('Number of news items in block'), '#default_value' => $value, '#options' => drupal_map_assoc(array(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)));
308
      return $form;
Dries's avatar
Dries committed
309 310
    }
    else if ($op == 'save') {
311
      list($type, $id) = explode('-', $delta);
Dries's avatar
Dries committed
312 313 314 315 316 317 318
      if ($type == 'category') {
        $value = db_query('UPDATE {aggregator_category} SET block = %d WHERE cid = %d', $edit['block'], $id);
      }
      else {
        $value = db_query('UPDATE {aggregator_feed} SET block = %d WHERE fid = %d', $edit['block'], $id);
      }
    }
319
    else if ($op == 'view') {
320
      list($type, $id) = explode('-', $delta);
Kjartan's avatar
Kjartan committed
321
      switch ($type) {
Dries's avatar
 
Dries committed
322
        case 'feed':
323
          if ($feed = db_fetch_object(db_query('SELECT fid, title, block FROM {aggregator_feed} WHERE fid = %d', $id))) {
324
            $block['subject'] = check_plain($feed->title);
325
            $result = db_query_range('SELECT * FROM {aggregator_item} WHERE fid = %d ORDER BY timestamp DESC, iid DESC', $feed->fid, 0, $feed->block);
326
            $read_more = theme('more_link', url('aggregator/sources/'. $feed->fid), t("View this feed's recent news."));
327
          }
Kjartan's avatar
Kjartan committed
328
          break;
329

Dries's avatar
 
Dries committed
330
        case 'category':
331
          if ($category = db_fetch_object(db_query('SELECT cid, title, block FROM {aggregator_category} WHERE cid = %d', $id))) {
332
            $block['subject'] = check_plain($category->title);
333
            $result = db_query_range('SELECT i.* FROM {aggregator_category_item} ci LEFT JOIN {aggregator_item} i ON ci.iid = i.iid WHERE ci.cid = %d ORDER BY i.timestamp DESC, i.iid DESC', $category->cid, 0, $category->block);
334
            $read_more = theme('more_link', url('aggregator/categories/'. $category->cid), t("View this category's recent news."));
335
          }
Kjartan's avatar
Kjartan committed
336 337
          break;
      }
Dries's avatar
 
Dries committed
338 339
      $items = array();
      while ($item = db_fetch_object($result)) {
Dries's avatar
 
Dries committed
340
        $items[] = theme('aggregator_block_item', $item);
Dries's avatar
 
Dries committed
341
      }
342 343 344 345 346

      // Only display the block if there are items to show.
      if (count($items) > 0) {
        $block['content'] = theme('item_list', $items) . $read_more;
      }
Dries's avatar
 
Dries committed
347
    }
348 349 350
    if (isset($block)) {
      return $block;
    }
Dries's avatar
 
Dries committed
351
  }
Dries's avatar
 
Dries committed
352 353
}

354 355 356 357
/**
 * Add/edit/delete aggregator categories.
 */
function aggregator_save_category($edit) {
358 359 360 361 362 363 364 365 366 367 368 369
  $link_path = 'aggregator/categories/';
  if (!empty($edit['cid'])) {
    $link_path .= $edit['cid'];
    if (!empty($edit['title'])) {
      db_query("UPDATE {aggregator_category} SET title = '%s', description = '%s' WHERE cid = %d", $edit['title'], $edit['description'], $edit['cid']);
      $op = 'update';
    }
    else {
      db_query('DELETE FROM {aggregator_category} WHERE cid = %d', $edit['cid']);
      $edit['title'] = '';
      $op = 'delete';
    }
370
  }
371
  else if (!empty($edit['title'])) {
372
    // A single unique id for bundles and feeds, to use in blocks
373
    db_query("INSERT INTO {aggregator_category} (title, description, block) VALUES ('%s', '%s', 5)", $edit['title'], $edit['description']);
374 375 376 377 378
    $link_path .= db_last_insert_id('aggregator', 'cid');
    $op = 'insert';
  }
  if (isset($op)) {
    menu_link_maintain('aggregator', $op, $link_path, $edit['title']);
379 380 381 382 383 384 385
  }
}

/**
 * Add/edit/delete an aggregator feed.
 */
function aggregator_save_feed($edit) {
386
  if (!empty($edit['fid'])) {
387 388 389
    // An existing feed is being modified, delete the category listings.
    db_query('DELETE FROM {aggregator_category_feed} WHERE fid = %d', $edit['fid']);
  }
390
  if (!empty($edit['fid']) && !empty($edit['title'])) {
391 392
    db_query("UPDATE {aggregator_feed} SET title = '%s', url = '%s', refresh = %d WHERE fid = %d", $edit['title'], $edit['url'], $edit['refresh'], $edit['fid']);
  }
393
  else if (!empty($edit['fid'])) {
394
    $items = array();
395 396 397 398
    $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d', $edit['fid']);
    while ($item = db_fetch_object($result)) {
      $items[] = "iid = $item->iid";
    }
399
    if (!empty($items)) {
400 401 402 403 404
      db_query('DELETE FROM {aggregator_category_item} WHERE '. implode(' OR ', $items));
    }
    db_query('DELETE FROM {aggregator_feed} WHERE fid = %d', $edit['fid']);
    db_query('DELETE FROM {aggregator_item} WHERE fid = %d', $edit['fid']);
  }
405
  else if (!empty($edit['title'])) {
406
    db_query("INSERT INTO {aggregator_feed} (title, url, refresh, block, description, image) VALUES ('%s', '%s', %d, 5, '', '')", $edit['title'], $edit['url'], $edit['refresh']);
407
    // A single unique id for bundles and feeds, to use in blocks.
408
    $edit['fid'] = db_last_insert_id('aggregator_feed', 'fid');
409
  }
410
  if (!empty($edit['title'])) {
411
    // The feed is being saved, save the categories as well.
412
    if (!empty($edit['category'])) {
413 414 415 416 417 418 419 420 421
      foreach ($edit['category'] as $cid => $value) {
        if ($value) {
          db_query('INSERT INTO {aggregator_category_feed} (fid, cid) VALUES (%d, %d)', $edit['fid'], $cid);
        }
      }
    }
  }
}

Dries's avatar
 
Dries committed
422 423 424 425
function aggregator_remove($feed) {
  $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d', $feed['fid']);
  while ($item = db_fetch_object($result)) {
    $items[] = "iid = $item->iid";
Dries's avatar
 
Dries committed
426
  }
427
  if (!empty($items)) {
Dries's avatar
 
Dries committed
428
    db_query('DELETE FROM {aggregator_category_item} WHERE '. implode(' OR ', $items));
Dries's avatar
 
Dries committed
429
  }
Dries's avatar
 
Dries committed
430
  db_query('DELETE FROM {aggregator_item} WHERE fid = %d', $feed['fid']);
Kjartan's avatar
Kjartan committed
431
  db_query("UPDATE {aggregator_feed} SET checked = 0, etag = '', modified = 0 WHERE fid = %d", $feed['fid']);
432
  drupal_set_message(t('The news items from %site have been removed.', array('%site' => $feed['title'])));
Dries's avatar
 
Dries committed
433 434
}

Dries's avatar
 
Dries committed
435 436 437
/**
 * Call-back function used by the XML parser.
 */
Kjartan's avatar
Kjartan committed
438
function aggregator_element_start($parser, $name, $attributes) {
439
  global $item, $element, $tag, $items, $channel;
Dries's avatar
 
Dries committed
440 441

  switch ($name) {
Dries's avatar
 
Dries committed
442 443
    case 'IMAGE':
    case 'TEXTINPUT':
444 445 446 447 448 449
    case 'CONTENT':
    case 'SUMMARY':
    case 'TAGLINE':
    case 'SUBTITLE':
    case 'LOGO':
    case 'INFO':
Dries's avatar
 
Dries committed
450 451
      $element = $name;
      break;
452 453 454 455
    case 'ID':
      if ($element != 'ITEM') {
        $element = $name;
      }
456
    case 'LINK':
457
      if (!empty($attributes['REL']) && $attributes['REL'] == 'alternate') {
458 459 460 461 462 463 464 465
        if ($element == 'ITEM') {
          $items[$item]['LINK'] = $attributes['HREF'];
        }
        else {
          $channel['LINK'] = $attributes['HREF'];
        }
      }
      break;
Dries's avatar
 
Dries committed
466
    case 'ITEM':
Dries's avatar
 
Dries committed
467 468
      $element = $name;
      $item += 1;
469 470 471 472 473
      break;
    case 'ENTRY':
      $element = 'ITEM';
      $item += 1;
      break;
474 475 476 477 478
  }

  $tag = $name;
}

Dries's avatar
 
Dries committed
479 480 481
/**
 * Call-back function used by the XML parser.
 */
Kjartan's avatar
Kjartan committed
482
function aggregator_element_end($parser, $name) {
Dries's avatar
 
Dries committed
483 484
  global $element;

Dries's avatar
 
Dries committed
485
  switch ($name) {
Dries's avatar
 
Dries committed
486 487 488
    case 'IMAGE':
    case 'TEXTINPUT':
    case 'ITEM':
489 490 491
    case 'ENTRY':
    case 'CONTENT':
    case 'INFO':
492 493
      $element = '';
      break;
494
    case 'ID':
495
      if ($element == 'ID') {
496 497
        $element = '';
      }
Dries's avatar
 
Dries committed
498
  }
499 500
}

Dries's avatar
 
Dries committed
501 502 503
/**
 * Call-back function used by the XML parser.
 */
Kjartan's avatar
Kjartan committed
504
function aggregator_element_data($parser, $data) {
Dries's avatar
 
Dries committed
505
  global $channel, $element, $items, $item, $image, $tag;
506
  $items += array($item => array());
Dries's avatar
 
Dries committed
507
  switch ($element) {
Dries's avatar
 
Dries committed
508
    case 'ITEM':
509
      $items[$item] += array($tag => '');
Dries's avatar
 
Dries committed
510 511
      $items[$item][$tag] .= $data;
      break;
Dries's avatar
 
Dries committed
512
    case 'IMAGE':
513
    case 'LOGO':
514
      $image += array($tag => '');
Dries's avatar
 
Dries committed
515 516
      $image[$tag] .= $data;
      break;
517 518
    case 'LINK':
      if ($data) {
519
        $items[$item] += array($tag => '');
520 521 522 523
        $items[$item][$tag] .= $data;
      }
      break;
    case 'CONTENT':
524
      $items[$item] += array('CONTENT' => '');
525
      $items[$item]['CONTENT'] .= $data;
526 527
      break;
    case 'SUMMARY':
528
      $items[$item] += array('SUMMARY' => '');
529
      $items[$item]['SUMMARY'] .= $data;
530 531 532
      break;
    case 'TAGLINE':
    case 'SUBTITLE':
533
      $channel += array('DESCRIPTION' => '');
534 535 536 537
      $channel['DESCRIPTION'] .= $data;
      break;
    case 'INFO':
    case 'ID':
Dries's avatar
 
Dries committed
538 539 540
    case 'TEXTINPUT':
      // The sub-element is not supported. However, we must recognize
      // it or its contents will end up in the item array.
Dries's avatar
 
Dries committed
541 542
      break;
    default:
543
      $channel += array($tag => '');
Dries's avatar
 
Dries committed
544
      $channel[$tag] .= $data;
545 546 547
  }
}

Dries's avatar
 
Dries committed
548 549 550
/**
 * Checks a news feed for new items.
 */
Kjartan's avatar
Kjartan committed
551
function aggregator_refresh($feed) {
Dries's avatar
 
Dries committed
552 553
  global $channel, $image;

Dries's avatar
 
Dries committed
554 555 556 557 558 559
  // Generate conditional GET headers.
  $headers = array();
  if ($feed['etag']) {
    $headers['If-None-Match'] = $feed['etag'];
  }
  if ($feed['modified']) {
Dries's avatar
 
Dries committed
560
    $headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $feed['modified']) .' GMT';
Dries's avatar
 
Dries committed
561 562 563 564 565
  }

  // Request feed.
  $result = drupal_http_request($feed['url'], $headers);

566
  // Process HTTP response code.
Dries's avatar
 
Dries committed
567 568
  switch ($result->code) {
    case 304:
Dries's avatar
 
Dries committed
569
      db_query('UPDATE {aggregator_feed} SET checked = %d WHERE fid = %d', time(), $feed['fid']);
570
      drupal_set_message(t('There is no new syndicated content from %site.', array('%site' => $feed['title'])));
Dries's avatar
 
Dries committed
571
      break;
Dries's avatar
 
Dries committed
572 573
    case 301:
      $feed['url'] = $result->redirect_url;
574
      watchdog('aggregator', 'Updated URL for feed %title to %url.', array('%title' => $feed['title'], '%url' => $feed['url']));
575

Dries's avatar
 
Dries committed
576 577 578 579
    case 200:
    case 302:
    case 307:
      // Filter the input data:
580
      if (aggregator_parse_feed($result->data, $feed)) {
581

582
        $modified = empty($result->headers['Last-Modified']) ? 0 : strtotime($result->headers['Last-Modified']);
Dries's avatar
Dries committed
583

584 585 586 587 588
        /*
        ** Prepare the channel data:
        */

        foreach ($channel as $key => $value) {
589
          $channel[$key] = trim($value);
590 591
        }

Dries's avatar
 
Dries committed
592 593 594
        /*
        ** Prepare the image data (if any):
        */
Dries's avatar
 
Dries committed
595

Dries's avatar
 
Dries committed
596 597 598
        foreach ($image as $key => $value) {
          $image[$key] = trim($value);
        }
Dries's avatar
 
Dries committed
599

600
        if (!empty($image['LINK']) && !empty($image['URL']) && !empty($image['TITLE'])) {
601 602
          // Note, we should really use theme_image() here but that only works with local images it won't work with images fetched with a URL unless PHP version > 5
          $image = '<a href="'. check_url($image['LINK']) .'" class="feed-image"><img src="'. check_url($image['URL']) .'" alt="'. check_plain($image['TITLE']) .'" /></a>';
Dries's avatar
 
Dries committed
603
        }
Dries's avatar
 
Dries committed
604 605 606
        else {
          $image = NULL;
        }
Dries's avatar
 
Dries committed
607

608
        $etag = empty($result->headers['ETag']) ? '' : $result->headers['ETag'];
Dries's avatar
 
Dries committed
609 610 611
        /*
        ** Update the feed data:
        */
Dries's avatar
 
Dries committed
612

613
        db_query("UPDATE {aggregator_feed} SET url = '%s', checked = %d, link = '%s', description = '%s', image = '%s', etag = '%s', modified = %d WHERE fid = %d", $feed['url'], time(), $channel['LINK'], $channel['DESCRIPTION'], $image, $etag, $modified, $feed['fid']);
Dries's avatar
 
Dries committed
614

Dries's avatar
 
Dries committed
615 616 617
        /*
        ** Clear the cache:
        */
Dries's avatar
 
Dries committed
618

Dries's avatar
 
Dries committed
619
        cache_clear_all();
Dries's avatar
 
Dries committed
620

621
        watchdog('aggregator', 'There is new syndicated content from %site.', array('%site' => $feed['title']));
622
        drupal_set_message(t('There is new syndicated content from %site.', array('%site' => $feed['title'])));
Dries's avatar
 
Dries committed
623 624
      }
      break;
Dries's avatar
 
Dries committed
625
    default:
626
      watchdog('aggregator', 'The feed from %site seems to be broken, due to "%error".', array('%site' => $feed['title'], '%error' => $result->code .' '. $result->error), WATCHDOG_WARNING);
627
      drupal_set_message(t('The feed from %site seems to be broken, because of error "%error".', array('%site' => $feed['title'], '%error' => $result->code .' '. $result->error)));
Dries's avatar
 
Dries committed
628
  }
Dries's avatar
 
Dries committed
629
}
Dries's avatar
 
Dries committed
630

Dries's avatar
 
Dries committed
631 632 633 634
/**
 * Parse the W3C date/time format, a subset of ISO 8601. PHP date parsing
 * functions do not handle this format.
 * See http://www.w3.org/TR/NOTE-datetime for more information.
635
 * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
Dries's avatar
 
Dries committed
636 637
 *
 * @param $date_str A string with a potentially W3C DTF date.
638
 * @return A timestamp if parsed successfully or -1 if not.
Dries's avatar
 
Dries committed
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663
 */
function aggregator_parse_w3cdtf($date_str) {
  if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
    list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
    // calc epoch for current date assuming GMT
    $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
    if ($match[10] != 'Z') { // Z is zulu time, aka GMT
      list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
      // zero out the variables
      if (!$tz_hour) {
        $tz_hour = 0;
      }
      if (!$tz_min) {
        $tz_min = 0;
      }
      $offset_secs = (($tz_hour * 60) + $tz_min) * 60;
      // is timezone ahead of GMT?  then subtract offset
      if ($tz_mod == '+') {
        $offset_secs *= -1;
      }
      $epoch += $offset_secs;
    }
    return $epoch;
  }
  else {
664
    return FALSE;
Dries's avatar
 
Dries committed
665 666 667
  }
}

Dries's avatar
 
Dries committed
668
function aggregator_parse_feed(&$data, $feed) {
Dries's avatar
 
Dries committed
669
  global $items, $image, $channel;
Dries's avatar
 
Dries committed
670

Dries's avatar
 
Dries committed
671
  // Unset the global variables before we use them:
Dries's avatar
 
Dries committed
672
  unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']);
Dries's avatar
 
Dries committed
673
  $items = array();
Dries's avatar
 
Dries committed
674
  $image = array();
Dries's avatar
 
Dries committed
675
  $channel = array();
676

Dries's avatar
 
Dries committed
677 678
  // parse the data:
  $xml_parser = drupal_xml_parser_create($data);
Dries's avatar
 
Dries committed
679 680
  xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end');
  xml_set_character_data_handler($xml_parser, 'aggregator_element_data');
Dries's avatar
 
Dries committed
681

Dries's avatar
 
Dries committed
682
  if (!xml_parse($xml_parser, $data, 1)) {
683
    watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser)), WATCHDOG_WARNING);
684
    drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array('%site' => $feed['title'], '%error' => xml_error_string(xml_get_error_code($xml_parser)), '%line' => xml_get_current_line_number($xml_parser))), 'error');
Dries's avatar
 
Dries committed
685
    return 0;
Dries's avatar
 
Dries committed
686 687
  }
  xml_parser_free($xml_parser);
Dries's avatar
 
Dries committed
688

Dries's avatar
 
Dries committed
689 690
  /*
  ** We reverse the array such that we store the first item last,
691
  ** and the last item first. In the database, the newest item
Dries's avatar
 
Dries committed
692 693
  ** should be at the top.
  */
Dries's avatar
 
Dries committed
694

Dries's avatar
 
Dries committed
695
  $items = array_reverse($items);
Dries's avatar
 
Dries committed
696

697 698
  // Initialize variables
  $title = $link = $author = $description = $guid = NULL;
Dries's avatar
 
Dries committed
699
  foreach ($items as $item) {
700
    unset($title, $link, $author, $description, $guid);
701

Dries's avatar
 
Dries committed
702 703
    // Prepare the item:
    foreach ($item as $key => $value) {
704
      $item[$key] = trim($value);
Dries's avatar
 
Dries committed
705
    }
Dries's avatar
Dries committed
706

Dries's avatar
 
Dries committed
707
    /*
708
    ** Resolve the item's title. If no title is found, we use
Dries's avatar
 
Dries committed
709 710 711 712
    ** up to 40 characters of the description ending at a word
    ** boundary but not splitting potential entities.
    */

713
    if (!empty($item['TITLE'])) {
Dries's avatar
 
Dries committed
714
      $title = $item['TITLE'];
Dries's avatar
 
Dries committed
715
    }
716
    elseif (!empty($item['DESCRIPTION'])) {
Dries's avatar
 
Dries committed
717
      $title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40));
Dries's avatar
 
Dries committed
718
    }
719 720 721
    else {
      $title = '';
    }
Dries's avatar
 
Dries committed
722

Dries's avatar
 
Dries committed
723 724 725 726
    /*
    ** Resolve the items link.
    */

727
    if (!empty($item['LINK'])) {
Dries's avatar
 
Dries committed
728
      $link = $item['LINK'];
Dries's avatar
 
Dries committed
729 730
    }
    else {
Dries's avatar
 
Dries committed
731
      $link = $feed['link'];
Dries's avatar
 
Dries committed
732
    }
733
    $guid = isset($item['GUID']) ? $item['GUID'] : '';
734

735 736 737
    /**
     * Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag
     */
738
    if (!empty($item['CONTENT:ENCODED'])) {
739
      $item['DESCRIPTION'] = $item['CONTENT:ENCODED'];
740
    }
741
    else if (!empty($item['SUMMARY'])) {
742 743
      $item['DESCRIPTION'] = $item['SUMMARY'];
    }
744
    else if (!empty($item['CONTENT'])) {
745 746
      $item['DESCRIPTION'] = $item['CONTENT'];
    }
747

Dries's avatar
 
Dries committed
748
    /*
749
    ** Try to resolve and parse the item's publication date. If no
Dries's avatar
 
Dries committed
750 751 752
    ** date is found, we use the current date instead.
    */

753 754 755 756 757 758 759
    $date = 'now';
    foreach (array('PUBDATE', 'DC:DATE', 'DCTERMS:ISSUED', 'DCTERMS:CREATED', 'DCTERMS:MODIFIED', 'ISSUED', 'CREATED', 'MODIFIED', 'PUBLISHED', 'UPDATED') as $key) {
      if (!empty($item[$key])) {
        $date = $item[$key];
        break;
      }
    }
Dries's avatar
 
Dries committed
760

761 762 763 764
    $timestamp = strtotime($date); // As of PHP 5.1.0, strtotime returns FALSE on failure instead of -1.
    if ($timestamp <= 0) {
      $timestamp = aggregator_parse_w3cdtf($date); // Returns FALSE on failure
      if (!$timestamp) {
Dries's avatar
 
Dries committed
765 766
        $timestamp = time(); // better than nothing
      }
767
    }
Dries's avatar
 
Dries committed
768 769

    /*
770 771
    ** Save this item. Try to avoid duplicate entries as much as
    ** possible. If we find a duplicate entry, we resolve it and
772
    ** pass along its ID is such that we can update it if needed.
Dries's avatar
 
Dries committed
773 774
    */

775
    if (!empty($guid)) {
776 777 778
      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND guid = '%s'", $feed['fid'], $guid));
    }
    else if ($link && $link != $feed['link'] && $link != $feed['url']) {
Dries's avatar
 
Dries committed
779
      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND link = '%s'", $feed['fid'], $link));
Dries's avatar
 
Dries committed
780 781
    }
    else {
Dries's avatar
 
Dries committed
782
      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND title = '%s'", $feed['fid'], $title));
Dries's avatar
 
Dries committed
783
    }
784 785
    $item += array('AUTHOR' => '', 'DESCRIPTION' => '');
    aggregator_save_item(array('iid' => (isset($entry->iid) ? $entry->iid:  ''), 'fid' => $feed['fid'], 'timestamp' => $timestamp, 'title' => $title, 'link' => $link, 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION'], 'guid' => $guid));
Dries's avatar
 
Dries committed
786
  }
Dries's avatar
 
Dries committed
787

Dries's avatar
 
Dries committed
788
  /*
789
  ** Remove all items that are older than flush item timer:
Dries's avatar
 
Dries committed
790
  */
Dries's avatar